Monitoring service: error handling improvements
This commit is contained in:
parent
869136da89
commit
43aacd0a30
@ -16,15 +16,16 @@
|
|||||||
package org.thingsboard.monitoring.data.notification;
|
package org.thingsboard.monitoring.data.notification;
|
||||||
|
|
||||||
import lombok.Getter;
|
import lombok.Getter;
|
||||||
|
import org.apache.commons.lang3.exception.ExceptionUtils;
|
||||||
|
|
||||||
@Getter
|
@Getter
|
||||||
public class ServiceFailureNotification implements Notification {
|
public class ServiceFailureNotification implements Notification {
|
||||||
|
|
||||||
private final Object serviceKey;
|
private final Object serviceKey;
|
||||||
private final Exception error;
|
private final Throwable error;
|
||||||
private final int failuresCount;
|
private final int failuresCount;
|
||||||
|
|
||||||
public ServiceFailureNotification(Object serviceKey, Exception error, int failuresCount) {
|
public ServiceFailureNotification(Object serviceKey, Throwable error, int failuresCount) {
|
||||||
this.serviceKey = serviceKey;
|
this.serviceKey = serviceKey;
|
||||||
this.error = error;
|
this.error = error;
|
||||||
this.failuresCount = failuresCount;
|
this.failuresCount = failuresCount;
|
||||||
@ -32,7 +33,17 @@ public class ServiceFailureNotification implements Notification {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getText() {
|
public String getText() {
|
||||||
return String.format("[%s] Failure: %s (number of subsequent failures: %s)", serviceKey, error.getMessage(), failuresCount);
|
String errorMsg = error.getMessage();
|
||||||
|
if (errorMsg == null || errorMsg.equals("null")) {
|
||||||
|
Throwable cause = ExceptionUtils.getRootCause(error);
|
||||||
|
if (cause != null) {
|
||||||
|
errorMsg = cause.getMessage();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (errorMsg == null) {
|
||||||
|
errorMsg = error.getClass().getSimpleName();
|
||||||
|
}
|
||||||
|
return String.format("[%s] Failure: %s (number of subsequent failures: %s)", serviceKey, errorMsg, failuresCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -21,8 +21,6 @@ import lombok.RequiredArgsConstructor;
|
|||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.springframework.beans.factory.annotation.Value;
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
import org.springframework.boot.context.event.ApplicationReadyEvent;
|
|
||||||
import org.springframework.context.event.EventListener;
|
|
||||||
import org.springframework.stereotype.Component;
|
import org.springframework.stereotype.Component;
|
||||||
import org.thingsboard.common.util.JacksonUtil;
|
import org.thingsboard.common.util.JacksonUtil;
|
||||||
import org.thingsboard.monitoring.client.TbClient;
|
import org.thingsboard.monitoring.client.TbClient;
|
||||||
@ -39,8 +37,6 @@ import org.thingsboard.server.common.data.id.EntityIdFactory;
|
|||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
import java.util.concurrent.ScheduledExecutorService;
|
|
||||||
import java.util.concurrent.TimeUnit;
|
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
@ -110,7 +106,10 @@ public class MonitoringReporter {
|
|||||||
latencies.computeIfAbsent(latencyKey, k -> new Latency(latencyKey)).report(latencyInMs);
|
latencies.computeIfAbsent(latencyKey, k -> new Latency(latencyKey)).report(latencyInMs);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void serviceFailure(Object serviceKey, Exception error) {
|
public void serviceFailure(Object serviceKey, Throwable error) {
|
||||||
|
if (log.isDebugEnabled()) {
|
||||||
|
log.error("Error occurred", error);
|
||||||
|
}
|
||||||
int failuresCount = failuresCounters.computeIfAbsent(serviceKey, k -> new AtomicInteger()).incrementAndGet();
|
int failuresCount = failuresCounters.computeIfAbsent(serviceKey, k -> new AtomicInteger()).incrementAndGet();
|
||||||
ServiceFailureNotification notification = new ServiceFailureNotification(serviceKey, error, failuresCount);
|
ServiceFailureNotification notification = new ServiceFailureNotification(serviceKey, error, failuresCount);
|
||||||
log.error(notification.getText());
|
log.error(notification.getText());
|
||||||
|
|||||||
@ -84,13 +84,9 @@ public final class TransportMonitoringService {
|
|||||||
|
|
||||||
@EventListener(ApplicationReadyEvent.class)
|
@EventListener(ApplicationReadyEvent.class)
|
||||||
public void startMonitoring() {
|
public void startMonitoring() {
|
||||||
scheduleCheck(0);
|
scheduler.scheduleWithFixedDelay(() -> {
|
||||||
}
|
|
||||||
|
|
||||||
private void scheduleCheck(int delay) {
|
|
||||||
log.debug("Scheduling next check for {} ms", delay);
|
|
||||||
scheduler.schedule(() -> {
|
|
||||||
try {
|
try {
|
||||||
|
log.debug("Starting transports check");
|
||||||
stopWatch.start();
|
stopWatch.start();
|
||||||
String accessToken = tbClient.logIn();
|
String accessToken = tbClient.logIn();
|
||||||
reporter.reportLatency(Latencies.LOG_IN, stopWatch.getTime());
|
reporter.reportLatency(Latencies.LOG_IN, stopWatch.getTime());
|
||||||
@ -103,11 +99,15 @@ public final class TransportMonitoringService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
reporter.reportLatencies(tbClient);
|
reporter.reportLatencies(tbClient);
|
||||||
} catch (Exception e) {
|
log.debug("Finished transports check");
|
||||||
reporter.serviceFailure(MonitoredServiceKey.GENERAL, e);
|
} catch (Throwable error) {
|
||||||
|
try {
|
||||||
|
reporter.serviceFailure(MonitoredServiceKey.GENERAL, error);
|
||||||
|
} catch (Throwable reportError) {
|
||||||
|
log.error("Error occurred during service failure reporting", reportError);
|
||||||
}
|
}
|
||||||
scheduleCheck(monitoringRateMs);
|
}
|
||||||
}, delay, TimeUnit.MILLISECONDS);
|
}, 0, monitoringRateMs, TimeUnit.MILLISECONDS);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void checkMonitoringTarget(TransportMonitoringConfig config, MonitoringTargetConfig target, TbClient tbClient) {
|
private void checkMonitoringTarget(TransportMonitoringConfig config, MonitoringTargetConfig target, TbClient tbClient) {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user