Monitoring service: error handling improvements
This commit is contained in:
parent
869136da89
commit
43aacd0a30
@ -16,15 +16,16 @@
|
||||
package org.thingsboard.monitoring.data.notification;
|
||||
|
||||
import lombok.Getter;
|
||||
import org.apache.commons.lang3.exception.ExceptionUtils;
|
||||
|
||||
@Getter
|
||||
public class ServiceFailureNotification implements Notification {
|
||||
|
||||
private final Object serviceKey;
|
||||
private final Exception error;
|
||||
private final Throwable error;
|
||||
private final int failuresCount;
|
||||
|
||||
public ServiceFailureNotification(Object serviceKey, Exception error, int failuresCount) {
|
||||
public ServiceFailureNotification(Object serviceKey, Throwable error, int failuresCount) {
|
||||
this.serviceKey = serviceKey;
|
||||
this.error = error;
|
||||
this.failuresCount = failuresCount;
|
||||
@ -32,7 +33,17 @@ public class ServiceFailureNotification implements Notification {
|
||||
|
||||
@Override
|
||||
public String getText() {
|
||||
return String.format("[%s] Failure: %s (number of subsequent failures: %s)", serviceKey, error.getMessage(), failuresCount);
|
||||
String errorMsg = error.getMessage();
|
||||
if (errorMsg == null || errorMsg.equals("null")) {
|
||||
Throwable cause = ExceptionUtils.getRootCause(error);
|
||||
if (cause != null) {
|
||||
errorMsg = cause.getMessage();
|
||||
}
|
||||
}
|
||||
if (errorMsg == null) {
|
||||
errorMsg = error.getClass().getSimpleName();
|
||||
}
|
||||
return String.format("[%s] Failure: %s (number of subsequent failures: %s)", serviceKey, errorMsg, failuresCount);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -21,8 +21,6 @@ import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.boot.context.event.ApplicationReadyEvent;
|
||||
import org.springframework.context.event.EventListener;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.thingsboard.common.util.JacksonUtil;
|
||||
import org.thingsboard.monitoring.client.TbClient;
|
||||
@ -39,8 +37,6 @@ import org.thingsboard.server.common.data.id.EntityIdFactory;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.ScheduledExecutorService;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@ -110,7 +106,10 @@ public class MonitoringReporter {
|
||||
latencies.computeIfAbsent(latencyKey, k -> new Latency(latencyKey)).report(latencyInMs);
|
||||
}
|
||||
|
||||
public void serviceFailure(Object serviceKey, Exception error) {
|
||||
public void serviceFailure(Object serviceKey, Throwable error) {
|
||||
if (log.isDebugEnabled()) {
|
||||
log.error("Error occurred", error);
|
||||
}
|
||||
int failuresCount = failuresCounters.computeIfAbsent(serviceKey, k -> new AtomicInteger()).incrementAndGet();
|
||||
ServiceFailureNotification notification = new ServiceFailureNotification(serviceKey, error, failuresCount);
|
||||
log.error(notification.getText());
|
||||
|
||||
@ -84,13 +84,9 @@ public final class TransportMonitoringService {
|
||||
|
||||
@EventListener(ApplicationReadyEvent.class)
|
||||
public void startMonitoring() {
|
||||
scheduleCheck(0);
|
||||
}
|
||||
|
||||
private void scheduleCheck(int delay) {
|
||||
log.debug("Scheduling next check for {} ms", delay);
|
||||
scheduler.schedule(() -> {
|
||||
scheduler.scheduleWithFixedDelay(() -> {
|
||||
try {
|
||||
log.debug("Starting transports check");
|
||||
stopWatch.start();
|
||||
String accessToken = tbClient.logIn();
|
||||
reporter.reportLatency(Latencies.LOG_IN, stopWatch.getTime());
|
||||
@ -103,11 +99,15 @@ public final class TransportMonitoringService {
|
||||
}
|
||||
}
|
||||
reporter.reportLatencies(tbClient);
|
||||
} catch (Exception e) {
|
||||
reporter.serviceFailure(MonitoredServiceKey.GENERAL, e);
|
||||
log.debug("Finished transports check");
|
||||
} catch (Throwable error) {
|
||||
try {
|
||||
reporter.serviceFailure(MonitoredServiceKey.GENERAL, error);
|
||||
} catch (Throwable reportError) {
|
||||
log.error("Error occurred during service failure reporting", reportError);
|
||||
}
|
||||
}
|
||||
scheduleCheck(monitoringRateMs);
|
||||
}, delay, TimeUnit.MILLISECONDS);
|
||||
}, 0, monitoringRateMs, TimeUnit.MILLISECONDS);
|
||||
}
|
||||
|
||||
private void checkMonitoringTarget(TransportMonitoringConfig config, MonitoringTargetConfig target, TbClient tbClient) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user