Monitoring service: error handling improvements

This commit is contained in:
ViacheslavKlimov 2023-03-09 14:11:37 +02:00
parent 869136da89
commit 43aacd0a30
3 changed files with 28 additions and 18 deletions

View File

@ -16,15 +16,16 @@
package org.thingsboard.monitoring.data.notification; package org.thingsboard.monitoring.data.notification;
import lombok.Getter; import lombok.Getter;
import org.apache.commons.lang3.exception.ExceptionUtils;
@Getter @Getter
public class ServiceFailureNotification implements Notification { public class ServiceFailureNotification implements Notification {
private final Object serviceKey; private final Object serviceKey;
private final Exception error; private final Throwable error;
private final int failuresCount; private final int failuresCount;
public ServiceFailureNotification(Object serviceKey, Exception error, int failuresCount) { public ServiceFailureNotification(Object serviceKey, Throwable error, int failuresCount) {
this.serviceKey = serviceKey; this.serviceKey = serviceKey;
this.error = error; this.error = error;
this.failuresCount = failuresCount; this.failuresCount = failuresCount;
@ -32,7 +33,17 @@ public class ServiceFailureNotification implements Notification {
@Override @Override
public String getText() { public String getText() {
return String.format("[%s] Failure: %s (number of subsequent failures: %s)", serviceKey, error.getMessage(), failuresCount); String errorMsg = error.getMessage();
if (errorMsg == null || errorMsg.equals("null")) {
Throwable cause = ExceptionUtils.getRootCause(error);
if (cause != null) {
errorMsg = cause.getMessage();
}
}
if (errorMsg == null) {
errorMsg = error.getClass().getSimpleName();
}
return String.format("[%s] Failure: %s (number of subsequent failures: %s)", serviceKey, errorMsg, failuresCount);
} }
} }

View File

@ -21,8 +21,6 @@ import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Value; import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.context.event.ApplicationReadyEvent;
import org.springframework.context.event.EventListener;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
import org.thingsboard.common.util.JacksonUtil; import org.thingsboard.common.util.JacksonUtil;
import org.thingsboard.monitoring.client.TbClient; import org.thingsboard.monitoring.client.TbClient;
@ -39,8 +37,6 @@ import org.thingsboard.server.common.data.id.EntityIdFactory;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors; import java.util.stream.Collectors;
@ -110,7 +106,10 @@ public class MonitoringReporter {
latencies.computeIfAbsent(latencyKey, k -> new Latency(latencyKey)).report(latencyInMs); latencies.computeIfAbsent(latencyKey, k -> new Latency(latencyKey)).report(latencyInMs);
} }
public void serviceFailure(Object serviceKey, Exception error) { public void serviceFailure(Object serviceKey, Throwable error) {
if (log.isDebugEnabled()) {
log.error("Error occurred", error);
}
int failuresCount = failuresCounters.computeIfAbsent(serviceKey, k -> new AtomicInteger()).incrementAndGet(); int failuresCount = failuresCounters.computeIfAbsent(serviceKey, k -> new AtomicInteger()).incrementAndGet();
ServiceFailureNotification notification = new ServiceFailureNotification(serviceKey, error, failuresCount); ServiceFailureNotification notification = new ServiceFailureNotification(serviceKey, error, failuresCount);
log.error(notification.getText()); log.error(notification.getText());

View File

@ -84,13 +84,9 @@ public final class TransportMonitoringService {
@EventListener(ApplicationReadyEvent.class) @EventListener(ApplicationReadyEvent.class)
public void startMonitoring() { public void startMonitoring() {
scheduleCheck(0); scheduler.scheduleWithFixedDelay(() -> {
}
private void scheduleCheck(int delay) {
log.debug("Scheduling next check for {} ms", delay);
scheduler.schedule(() -> {
try { try {
log.debug("Starting transports check");
stopWatch.start(); stopWatch.start();
String accessToken = tbClient.logIn(); String accessToken = tbClient.logIn();
reporter.reportLatency(Latencies.LOG_IN, stopWatch.getTime()); reporter.reportLatency(Latencies.LOG_IN, stopWatch.getTime());
@ -103,11 +99,15 @@ public final class TransportMonitoringService {
} }
} }
reporter.reportLatencies(tbClient); reporter.reportLatencies(tbClient);
} catch (Exception e) { log.debug("Finished transports check");
reporter.serviceFailure(MonitoredServiceKey.GENERAL, e); } catch (Throwable error) {
try {
reporter.serviceFailure(MonitoredServiceKey.GENERAL, error);
} catch (Throwable reportError) {
log.error("Error occurred during service failure reporting", reportError);
}
} }
scheduleCheck(monitoringRateMs); }, 0, monitoringRateMs, TimeUnit.MILLISECONDS);
}, delay, TimeUnit.MILLISECONDS);
} }
private void checkMonitoringTarget(TransportMonitoringConfig config, MonitoringTargetConfig target, TbClient tbClient) { private void checkMonitoringTarget(TransportMonitoringConfig config, MonitoringTargetConfig target, TbClient tbClient) {