From fa6a3cf1a6e3676a9bb7c45c729bd786690d2cb4 Mon Sep 17 00:00:00 2001 From: Chris Laprun Date: Thu, 9 Mar 2023 20:24:11 +0100 Subject: [PATCH 01/12] feat: remove CR meters when they are deleted (after a delay) Fixes #1803 --- .../micrometer/MicrometerMetrics.java | 42 +++++++++++++------ 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java b/micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java index 651b012a02..3a07901241 100644 --- a/micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java +++ b/micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java @@ -1,11 +1,10 @@ package io.javaoperatorsdk.operator.monitoring.micrometer; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.Optional; +import java.util.*; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import io.fabric8.kubernetes.api.model.HasMetadata; @@ -17,6 +16,7 @@ import io.javaoperatorsdk.operator.processing.GroupVersionKind; import io.javaoperatorsdk.operator.processing.event.Event; import io.javaoperatorsdk.operator.processing.event.ResourceID; +import io.micrometer.core.instrument.Meter; import io.micrometer.core.instrument.MeterRegistry; import io.micrometer.core.instrument.Tag; import io.micrometer.core.instrument.Timer; @@ -31,9 +31,17 @@ public class MicrometerMetrics implements Metrics { private static final String RECONCILIATIONS_QUEUE_SIZE = PREFIX + RECONCILIATIONS + "queue.size."; private final MeterRegistry registry; private final Map gauges = new ConcurrentHashMap<>(); + private final Map> metersPerResource = new ConcurrentHashMap<>(); + private final ScheduledExecutorService metersCleaner = Executors.newScheduledThreadPool(10); + private final int cleanUpDelayInSeconds; public MicrometerMetrics(MeterRegistry registry) { + this(registry, 300); + } + + public MicrometerMetrics(MeterRegistry registry, int cleanUpDelayInSeconds) { this.registry = registry; + this.cleanUpDelayInSeconds = cleanUpDelayInSeconds; } @Override @@ -116,6 +124,14 @@ public void receivedEvent(Event event, Map metadata) { @Override public void cleanupDoneFor(ResourceID resourceID, Map metadata) { incrementCounter(resourceID, "events.delete", metadata); + + // schedule deletion of meters associated with ResourceID + metersCleaner.schedule(() -> { + final var toClean = metersPerResource.get(resourceID); + if (toClean != null) { + toClean.forEach(registry::remove); + } + }, cleanUpDelayInSeconds, TimeUnit.SECONDS); } @Override @@ -125,11 +141,11 @@ public void reconcileCustomResource(HasMetadata resource, RetryInfo retryInfoNul incrementCounter(ResourceID.fromResource(resource), RECONCILIATIONS + "started", metadata, RECONCILIATIONS + "retries.number", - "" + retryInfo.map(RetryInfo::getAttemptCount).orElse(0), + String.valueOf(retryInfo.map(RetryInfo::getAttemptCount).orElse(0)), RECONCILIATIONS + "retries.last", - "" + retryInfo.map(RetryInfo::isLastAttempt).orElse(true)); + String.valueOf(retryInfo.map(RetryInfo::isLastAttempt).orElse(true))); - AtomicInteger controllerQueueSize = + var controllerQueueSize = gauges.get(RECONCILIATIONS_QUEUE_SIZE + metadata.get(CONTROLLER_NAME)); controllerQueueSize.incrementAndGet(); } @@ -141,18 +157,18 @@ public void finishedReconciliation(HasMetadata resource, Map met @Override public void reconciliationExecutionStarted(HasMetadata resource, Map metadata) { - AtomicInteger reconcilerExecutions = + var reconcilerExecutions = gauges.get(RECONCILIATIONS_EXECUTIONS + metadata.get(CONTROLLER_NAME)); reconcilerExecutions.incrementAndGet(); } @Override public void reconciliationExecutionFinished(HasMetadata resource, Map metadata) { - AtomicInteger reconcilerExecutions = + var reconcilerExecutions = gauges.get(RECONCILIATIONS_EXECUTIONS + metadata.get(CONTROLLER_NAME)); reconcilerExecutions.decrementAndGet(); - AtomicInteger controllerQueueSize = + var controllerQueueSize = gauges.get(RECONCILIATIONS_QUEUE_SIZE + metadata.get(CONTROLLER_NAME)); controllerQueueSize.decrementAndGet(); } @@ -202,6 +218,8 @@ private void incrementCounter(ResourceID id, String counterName, Map new HashSet<>()).add(counter.getId()); + counter.increment(); } } From d8becd145baef6f019bb7704319ec5ace236c01c Mon Sep 17 00:00:00 2001 From: Chris Laprun Date: Mon, 13 Mar 2023 15:36:20 +0100 Subject: [PATCH 02/12] tests: add integration test for meter cleaning --- micrometer-support/pom.xml | 31 +++++++ .../micrometer/MicrometerMetrics.java | 4 + .../micrometer/MetricsCleaningOnDeleteIT.java | 88 +++++++++++++++++++ 3 files changed, 123 insertions(+) create mode 100644 micrometer-support/src/test/java/io/javaoperatorsdk/operator/monitoring/micrometer/MetricsCleaningOnDeleteIT.java diff --git a/micrometer-support/pom.xml b/micrometer-support/pom.xml index ae1be1816f..e28cec6473 100644 --- a/micrometer-support/pom.xml +++ b/micrometer-support/pom.xml @@ -26,6 +26,37 @@ io.javaoperatorsdk operator-framework-core + + org.junit.jupiter + junit-jupiter-api + test + + + org.junit.jupiter + junit-jupiter-engine + test + + + org.assertj + assertj-core + test + + + org.awaitility + awaitility + test + + + io.javaoperatorsdk + operator-framework-junit-5 + ${project.version} + test + + + org.mockito + mockito-core + test + \ No newline at end of file diff --git a/micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java b/micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java index 3a07901241..cf220e3e78 100644 --- a/micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java +++ b/micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java @@ -222,4 +222,8 @@ private void incrementCounter(ResourceID id, String counterName, Map new HashSet<>()).add(counter.getId()); counter.increment(); } + + protected Set recordedMeterIdsFor(ResourceID resourceID) { + return metersPerResource.get(resourceID); + } } diff --git a/micrometer-support/src/test/java/io/javaoperatorsdk/operator/monitoring/micrometer/MetricsCleaningOnDeleteIT.java b/micrometer-support/src/test/java/io/javaoperatorsdk/operator/monitoring/micrometer/MetricsCleaningOnDeleteIT.java new file mode 100644 index 0000000000..550a34e45f --- /dev/null +++ b/micrometer-support/src/test/java/io/javaoperatorsdk/operator/monitoring/micrometer/MetricsCleaningOnDeleteIT.java @@ -0,0 +1,88 @@ +package io.javaoperatorsdk.operator.monitoring.micrometer; + +import java.util.concurrent.TimeUnit; + +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.RegisterExtension; + +import io.fabric8.kubernetes.api.model.ConfigMap; +import io.fabric8.kubernetes.api.model.ConfigMapBuilder; +import io.javaoperatorsdk.operator.api.config.ConfigurationServiceProvider; +import io.javaoperatorsdk.operator.api.reconciler.*; +import io.javaoperatorsdk.operator.junit.LocallyRunOperatorExtension; +import io.javaoperatorsdk.operator.processing.event.ResourceID; +import io.micrometer.core.instrument.MeterRegistry; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.awaitility.Awaitility.await; +import static org.mockito.Mockito.mock; + +public class MetricsCleaningOnDeleteIT { + @RegisterExtension + static LocallyRunOperatorExtension operator = + LocallyRunOperatorExtension.builder().withReconciler(new MetricsCleaningTestReconciler()) + .build(); + + private static final MeterRegistry mockRegistry = mock(MeterRegistry.class); + private static final int testDelay = 15; + private static final TestMetrics metrics = new TestMetrics(mockRegistry, testDelay); + private static final String testResourceName = "cleaning-metrics-cr"; + + @BeforeAll + static void setup() { + ConfigurationServiceProvider.overrideCurrent( + overrider -> overrider.withMetrics(new TestMetrics(mockRegistry, testDelay))); + } + + @AfterAll + static void reset() { + ConfigurationServiceProvider.reset(); + } + + @Test + void addsFinalizerAndCallsCleanupIfCleanerImplemented() { + var testResource = new ConfigMapBuilder() + .withNewMetadata() + .withName(testResourceName) + .endMetadata() + .build(); + final var created = operator.create(testResource); + + var meters = metrics.recordedMeterIdsFor(ResourceID.fromResource(created)); + assertThat(meters).isNotNull(); + assertThat(meters).isNotEmpty(); + + await().until(() -> !operator.get(ConfigMap.class, testResourceName) + .getMetadata().getFinalizers().isEmpty()); + + operator.delete(testResource); + + await().until(() -> operator.get(ConfigMap.class, testResourceName) == null); + + await().atLeast(testDelay + 3, TimeUnit.SECONDS); + meters = metrics.recordedMeterIdsFor(ResourceID.fromResource(created)); + assertThat(meters).isNull(); + } + + private static class MetricsCleaningTestReconciler + implements Reconciler, Cleaner { + @Override + public UpdateControl reconcile(ConfigMap resource, Context context) { + return UpdateControl.noUpdate(); + } + + @Override + public DeleteControl cleanup(ConfigMap resource, Context context) { + return DeleteControl.defaultDelete(); + } + } + + private static class TestMetrics extends MicrometerMetrics { + + public TestMetrics(MeterRegistry registry, int cleanUpDelayInSeconds) { + super(registry, cleanUpDelayInSeconds); + } + } +} From 712d258a02d124cc2ad2c8bde9ecdea8a658c10f Mon Sep 17 00:00:00 2001 From: Chris Laprun Date: Mon, 13 Mar 2023 15:55:09 +0100 Subject: [PATCH 03/12] fix: add http client implementation --- micrometer-support/pom.xml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/micrometer-support/pom.xml b/micrometer-support/pom.xml index e28cec6473..f82c2e4a4c 100644 --- a/micrometer-support/pom.xml +++ b/micrometer-support/pom.xml @@ -57,6 +57,11 @@ mockito-core test + + io.fabric8 + kubernetes-httpclient-vertx + test + \ No newline at end of file From b66712acb52aeb616f132fa044ee1df2f64dcd50 Mon Sep 17 00:00:00 2001 From: Chris Laprun Date: Mon, 13 Mar 2023 16:20:32 +0100 Subject: [PATCH 04/12] feat: make meter cleaning thread number configurable --- .../operator/monitoring/micrometer/MicrometerMetrics.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java b/micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java index cf220e3e78..f9fd82551a 100644 --- a/micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java +++ b/micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java @@ -32,16 +32,17 @@ public class MicrometerMetrics implements Metrics { private final MeterRegistry registry; private final Map gauges = new ConcurrentHashMap<>(); private final Map> metersPerResource = new ConcurrentHashMap<>(); - private final ScheduledExecutorService metersCleaner = Executors.newScheduledThreadPool(10); + private final ScheduledExecutorService metersCleaner; private final int cleanUpDelayInSeconds; public MicrometerMetrics(MeterRegistry registry) { - this(registry, 300); + this(registry, 300, Runtime.getRuntime().availableProcessors()); } - public MicrometerMetrics(MeterRegistry registry, int cleanUpDelayInSeconds) { + public MicrometerMetrics(MeterRegistry registry, int cleanUpDelayInSeconds, int cleaningThreadsNumber) { this.registry = registry; this.cleanUpDelayInSeconds = cleanUpDelayInSeconds; + this.metersCleaner = Executors.newScheduledThreadPool(cleaningThreadsNumber); } @Override From a60cd3147462cc27446c6b96d498d944fbbeff68 Mon Sep 17 00:00:00 2001 From: Chris Laprun Date: Mon, 13 Mar 2023 16:22:32 +0100 Subject: [PATCH 05/12] tests: verify that remove is indeed called on the registry --- .../monitoring/micrometer/MetricsCleaningOnDeleteIT.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/micrometer-support/src/test/java/io/javaoperatorsdk/operator/monitoring/micrometer/MetricsCleaningOnDeleteIT.java b/micrometer-support/src/test/java/io/javaoperatorsdk/operator/monitoring/micrometer/MetricsCleaningOnDeleteIT.java index 550a34e45f..b3582ec623 100644 --- a/micrometer-support/src/test/java/io/javaoperatorsdk/operator/monitoring/micrometer/MetricsCleaningOnDeleteIT.java +++ b/micrometer-support/src/test/java/io/javaoperatorsdk/operator/monitoring/micrometer/MetricsCleaningOnDeleteIT.java @@ -18,6 +18,7 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.awaitility.Awaitility.await; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; public class MetricsCleaningOnDeleteIT { @RegisterExtension @@ -62,6 +63,7 @@ void addsFinalizerAndCallsCleanupIfCleanerImplemented() { await().until(() -> operator.get(ConfigMap.class, testResourceName) == null); await().atLeast(testDelay + 3, TimeUnit.SECONDS); + meters.forEach(id -> verify(mockRegistry.remove(id))); meters = metrics.recordedMeterIdsFor(ResourceID.fromResource(created)); assertThat(meters).isNull(); } @@ -82,7 +84,7 @@ public DeleteControl cleanup(ConfigMap resource, Context context) { private static class TestMetrics extends MicrometerMetrics { public TestMetrics(MeterRegistry registry, int cleanUpDelayInSeconds) { - super(registry, cleanUpDelayInSeconds); + super(registry, cleanUpDelayInSeconds, 2); } } } From c414db7552c2e37cb9420669aeb1b16730b590b7 Mon Sep 17 00:00:00 2001 From: Chris Laprun Date: Mon, 13 Mar 2023 16:23:57 +0100 Subject: [PATCH 06/12] fix: add missing annotation --- .../operator/monitoring/micrometer/MicrometerMetrics.java | 3 ++- .../monitoring/micrometer/MetricsCleaningOnDeleteIT.java | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java b/micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java index f9fd82551a..97a5c3e498 100644 --- a/micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java +++ b/micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java @@ -39,7 +39,8 @@ public MicrometerMetrics(MeterRegistry registry) { this(registry, 300, Runtime.getRuntime().availableProcessors()); } - public MicrometerMetrics(MeterRegistry registry, int cleanUpDelayInSeconds, int cleaningThreadsNumber) { + public MicrometerMetrics(MeterRegistry registry, int cleanUpDelayInSeconds, + int cleaningThreadsNumber) { this.registry = registry; this.cleanUpDelayInSeconds = cleanUpDelayInSeconds; this.metersCleaner = Executors.newScheduledThreadPool(cleaningThreadsNumber); diff --git a/micrometer-support/src/test/java/io/javaoperatorsdk/operator/monitoring/micrometer/MetricsCleaningOnDeleteIT.java b/micrometer-support/src/test/java/io/javaoperatorsdk/operator/monitoring/micrometer/MetricsCleaningOnDeleteIT.java index b3582ec623..ac90675980 100644 --- a/micrometer-support/src/test/java/io/javaoperatorsdk/operator/monitoring/micrometer/MetricsCleaningOnDeleteIT.java +++ b/micrometer-support/src/test/java/io/javaoperatorsdk/operator/monitoring/micrometer/MetricsCleaningOnDeleteIT.java @@ -68,6 +68,7 @@ void addsFinalizerAndCallsCleanupIfCleanerImplemented() { assertThat(meters).isNull(); } + @ControllerConfiguration private static class MetricsCleaningTestReconciler implements Reconciler, Cleaner { @Override From c91fbc2b4bc379905951362e7294ee009a16249b Mon Sep 17 00:00:00 2001 From: Chris Laprun Date: Mon, 13 Mar 2023 21:04:41 +0100 Subject: [PATCH 07/12] fix: remove local associations once the meters have been removed --- .../operator/monitoring/micrometer/MicrometerMetrics.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java b/micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java index 97a5c3e498..6871524efb 100644 --- a/micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java +++ b/micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java @@ -129,10 +129,13 @@ public void cleanupDoneFor(ResourceID resourceID, Map metadata) // schedule deletion of meters associated with ResourceID metersCleaner.schedule(() -> { + // remove each meter final var toClean = metersPerResource.get(resourceID); if (toClean != null) { toClean.forEach(registry::remove); } + // then clean-up local recording of associations + metersPerResource.remove(resourceID); }, cleanUpDelayInSeconds, TimeUnit.SECONDS); } From 7465f3c59afb54b935401c3d9d014fc20245817b Mon Sep 17 00:00:00 2001 From: Chris Laprun Date: Mon, 13 Mar 2023 21:04:57 +0100 Subject: [PATCH 08/12] fix: tests --- micrometer-support/pom.xml | 5 -- .../micrometer/MetricsCleaningOnDeleteIT.java | 50 +++++++++++-------- 2 files changed, 28 insertions(+), 27 deletions(-) diff --git a/micrometer-support/pom.xml b/micrometer-support/pom.xml index f82c2e4a4c..92c2a6fc18 100644 --- a/micrometer-support/pom.xml +++ b/micrometer-support/pom.xml @@ -52,11 +52,6 @@ ${project.version} test - - org.mockito - mockito-core - test - io.fabric8 kubernetes-httpclient-vertx diff --git a/micrometer-support/src/test/java/io/javaoperatorsdk/operator/monitoring/micrometer/MetricsCleaningOnDeleteIT.java b/micrometer-support/src/test/java/io/javaoperatorsdk/operator/monitoring/micrometer/MetricsCleaningOnDeleteIT.java index ac90675980..717f3a11f9 100644 --- a/micrometer-support/src/test/java/io/javaoperatorsdk/operator/monitoring/micrometer/MetricsCleaningOnDeleteIT.java +++ b/micrometer-support/src/test/java/io/javaoperatorsdk/operator/monitoring/micrometer/MetricsCleaningOnDeleteIT.java @@ -1,6 +1,8 @@ package io.javaoperatorsdk.operator.monitoring.micrometer; -import java.util.concurrent.TimeUnit; +import java.time.Duration; +import java.util.HashSet; +import java.util.Set; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; @@ -13,12 +15,11 @@ import io.javaoperatorsdk.operator.api.reconciler.*; import io.javaoperatorsdk.operator.junit.LocallyRunOperatorExtension; import io.javaoperatorsdk.operator.processing.event.ResourceID; -import io.micrometer.core.instrument.MeterRegistry; +import io.micrometer.core.instrument.Meter; +import io.micrometer.core.instrument.simple.SimpleMeterRegistry; import static org.assertj.core.api.Assertions.assertThat; import static org.awaitility.Awaitility.await; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.verify; public class MetricsCleaningOnDeleteIT { @RegisterExtension @@ -26,15 +27,14 @@ public class MetricsCleaningOnDeleteIT { LocallyRunOperatorExtension.builder().withReconciler(new MetricsCleaningTestReconciler()) .build(); - private static final MeterRegistry mockRegistry = mock(MeterRegistry.class); - private static final int testDelay = 15; - private static final TestMetrics metrics = new TestMetrics(mockRegistry, testDelay); + private static final TestSimpleMeterRegistry registry = new TestSimpleMeterRegistry(); + private static final int testDelay = 1; + private static final MicrometerMetrics metrics = new MicrometerMetrics(registry, testDelay, 2); private static final String testResourceName = "cleaning-metrics-cr"; @BeforeAll static void setup() { - ConfigurationServiceProvider.overrideCurrent( - overrider -> overrider.withMetrics(new TestMetrics(mockRegistry, testDelay))); + ConfigurationServiceProvider.overrideCurrent(overrider -> overrider.withMetrics(metrics)); } @AfterAll @@ -43,7 +43,7 @@ static void reset() { } @Test - void addsFinalizerAndCallsCleanupIfCleanerImplemented() { + void removesMetersAssociatedWithResourceAfterItsDeletion() throws InterruptedException { var testResource = new ConfigMapBuilder() .withNewMetadata() .withName(testResourceName) @@ -51,21 +51,23 @@ void addsFinalizerAndCallsCleanupIfCleanerImplemented() { .build(); final var created = operator.create(testResource); - var meters = metrics.recordedMeterIdsFor(ResourceID.fromResource(created)); - assertThat(meters).isNotNull(); - assertThat(meters).isNotEmpty(); - + // make sure the resource is created await().until(() -> !operator.get(ConfigMap.class, testResourceName) .getMetadata().getFinalizers().isEmpty()); - operator.delete(testResource); + // check that we properly recorded meters associated with the resource + final var meters = metrics.recordedMeterIdsFor(ResourceID.fromResource(created)); + assertThat(meters).isNotNull(); + assertThat(meters).isNotEmpty(); + // delete the resource and wait for it to be deleted + operator.delete(testResource); await().until(() -> operator.get(ConfigMap.class, testResourceName) == null); - await().atLeast(testDelay + 3, TimeUnit.SECONDS); - meters.forEach(id -> verify(mockRegistry.remove(id))); - meters = metrics.recordedMeterIdsFor(ResourceID.fromResource(created)); - assertThat(meters).isNull(); + // check that the meters are properly removed after the specified delay + Thread.sleep(Duration.ofSeconds(testDelay).toMillis()); + assertThat(registry.removed).isEqualTo(meters); + assertThat(metrics.recordedMeterIdsFor(ResourceID.fromResource(created))).isNull(); } @ControllerConfiguration @@ -82,10 +84,14 @@ public DeleteControl cleanup(ConfigMap resource, Context context) { } } - private static class TestMetrics extends MicrometerMetrics { + private static class TestSimpleMeterRegistry extends SimpleMeterRegistry { + private final Set removed = new HashSet<>(); - public TestMetrics(MeterRegistry registry, int cleanUpDelayInSeconds) { - super(registry, cleanUpDelayInSeconds, 2); + @Override + public Meter remove(Meter.Id mappedId) { + final var removed = super.remove(mappedId); + this.removed.add(removed.getId()); + return removed; } } } From f2eb5917c1dfa8ab76667a5dedbecb4003318c21 Mon Sep 17 00:00:00 2001 From: Chris Laprun Date: Mon, 13 Mar 2023 22:39:34 +0100 Subject: [PATCH 09/12] feat: support having no delay before deleting associated meters --- .../micrometer/MicrometerMetrics.java | 68 +++++++++++++++---- 1 file changed, 53 insertions(+), 15 deletions(-) diff --git a/micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java b/micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java index 6871524efb..3390fbd992 100644 --- a/micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java +++ b/micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java @@ -32,18 +32,27 @@ public class MicrometerMetrics implements Metrics { private final MeterRegistry registry; private final Map gauges = new ConcurrentHashMap<>(); private final Map> metersPerResource = new ConcurrentHashMap<>(); - private final ScheduledExecutorService metersCleaner; - private final int cleanUpDelayInSeconds; + private final Cleaner cleaner; public MicrometerMetrics(MeterRegistry registry) { - this(registry, 300, Runtime.getRuntime().availableProcessors()); + this(registry, 0); + } + + public MicrometerMetrics(MeterRegistry registry, int cleanUpDelayInSeconds) { + this(registry, cleanUpDelayInSeconds, 0); } public MicrometerMetrics(MeterRegistry registry, int cleanUpDelayInSeconds, int cleaningThreadsNumber) { this.registry = registry; - this.cleanUpDelayInSeconds = cleanUpDelayInSeconds; - this.metersCleaner = Executors.newScheduledThreadPool(cleaningThreadsNumber); + if (cleanUpDelayInSeconds < 0) { + cleaner = new NoDelayCleaner(); + } else { + cleaningThreadsNumber = + cleaningThreadsNumber <= 0 ? Runtime.getRuntime().availableProcessors() + : cleaningThreadsNumber; + cleaner = new DelayedCleaner(cleanUpDelayInSeconds, cleaningThreadsNumber); + } } @Override @@ -127,16 +136,7 @@ public void receivedEvent(Event event, Map metadata) { public void cleanupDoneFor(ResourceID resourceID, Map metadata) { incrementCounter(resourceID, "events.delete", metadata); - // schedule deletion of meters associated with ResourceID - metersCleaner.schedule(() -> { - // remove each meter - final var toClean = metersPerResource.get(resourceID); - if (toClean != null) { - toClean.forEach(registry::remove); - } - // then clean-up local recording of associations - metersPerResource.remove(resourceID); - }, cleanUpDelayInSeconds, TimeUnit.SECONDS); + cleaner.removeMetersFor(resourceID); } @Override @@ -231,4 +231,42 @@ private void incrementCounter(ResourceID id, String counterName, Map recordedMeterIdsFor(ResourceID resourceID) { return metersPerResource.get(resourceID); } + + private interface Cleaner { + void removeMetersFor(ResourceID resourceID); + } + + private void removeMetersFor(ResourceID resourceID) { + // remove each meter + final var toClean = metersPerResource.get(resourceID); + if (toClean != null) { + toClean.forEach(registry::remove); + } + // then clean-up local recording of associations + metersPerResource.remove(resourceID); + } + + private class NoDelayCleaner implements Cleaner { + @Override + public void removeMetersFor(ResourceID resourceID) { + MicrometerMetrics.this.removeMetersFor(resourceID); + } + } + + private class DelayedCleaner implements Cleaner { + private final ScheduledExecutorService metersCleaner; + private final int cleanUpDelayInSeconds; + + private DelayedCleaner(int cleanUpDelayInSeconds, int cleaningThreadsNumber) { + this.cleanUpDelayInSeconds = cleanUpDelayInSeconds; + this.metersCleaner = Executors.newScheduledThreadPool(cleaningThreadsNumber); + } + + @Override + public void removeMetersFor(ResourceID resourceID) { + // schedule deletion of meters associated with ResourceID + metersCleaner.schedule(() -> MicrometerMetrics.this.removeMetersFor(resourceID), + cleanUpDelayInSeconds, TimeUnit.SECONDS); + } + } } From 1fa780504d4483a402bcb4ee7f3449a2d292fdb1 Mon Sep 17 00:00:00 2001 From: Chris Laprun Date: Tue, 14 Mar 2023 22:05:31 +0100 Subject: [PATCH 10/12] feat: add action name to ResourceEvent metrics tags --- .../monitoring/micrometer/MicrometerMetrics.java | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java b/micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java index 3390fbd992..39fb5bb1f7 100644 --- a/micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java +++ b/micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java @@ -16,6 +16,7 @@ import io.javaoperatorsdk.operator.processing.GroupVersionKind; import io.javaoperatorsdk.operator.processing.event.Event; import io.javaoperatorsdk.operator.processing.event.ResourceID; +import io.javaoperatorsdk.operator.processing.event.source.controller.ResourceEvent; import io.micrometer.core.instrument.Meter; import io.micrometer.core.instrument.MeterRegistry; import io.micrometer.core.instrument.Tag; @@ -127,9 +128,17 @@ private static String getScope(ResourceID resourceID) { @Override public void receivedEvent(Event event, Map metadata) { + final String[] tags; + if (event instanceof ResourceEvent) { + tags = new String[] {"event", event.getClass().getSimpleName(), "action", + ((ResourceEvent) event).getAction().toString()}; + } else { + tags = new String[] {"event", event.getClass().getSimpleName()}; + } + incrementCounter(event.getRelatedCustomResourceID(), "events.received", metadata, - "event", event.getClass().getSimpleName()); + tags); } @Override From d9c27e2ab95a05180294d64f0bfae05fac239918 Mon Sep 17 00:00:00 2001 From: Chris Laprun Date: Tue, 14 Mar 2023 22:18:20 +0100 Subject: [PATCH 11/12] docs: add Metrics feature documentation --- docs/documentation/features.md | 45 ++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/docs/documentation/features.md b/docs/documentation/features.md index d69dc4da09..2a27250559 100644 --- a/docs/documentation/features.md +++ b/docs/documentation/features.md @@ -757,6 +757,51 @@ with a `mycrs` plural form will result in 2 files: > Quarkus users using the `quarkus-operator-sdk` extension do not need to add any extra dependency > to get their CRD generated as this is handled by the extension itself. +## Metrics + +JOSDK provides built-in support for metrics reporting on what is happening with your reconcilers in the form of +the `Metrics` interface which can be implemented to connect to your metrics provider of choice, JOSDK calling the +methods as it goes about reconciling resources. By default, a no-operation implementation is provided thus providing a +no-cost sane default. A [micrometer](https://micrometer.io)-based implementation is also provided. + +You can use a different implementation by overriding the default one provided by the default `ConfigurationService`, as +follows: + +```java +Metrics metrics= …; +ConfigurationServiceProvider.overrideCurrent(overrider->overrider.withMetrics(metrics)); +``` + +### Micrometer implementation + +The micrometer implementation records a lot of metrics associated to each resource handled by the operator by default. +In order to be efficient, the implementation removes meters associated with resources when they are deleted. Since it +might be useful to keep these metrics around for a bit before they are deleted, it is possible to configure a delay +before their removal. As this is done asynchronously, it is also possible to configure how many threads you want to +devote to these operations. Both aspects are controlled by the `MicrometerMetrics` constructor so changing the defaults +is a matter of instantiating `MicrometerMetrics` with the desired values and tell `ConfigurationServiceProvider` about +it as shown above. + +The micrometer implementation records the following metrics: + +| Meter name | Type | Tags | Description | +|-----------------------------------------------------------|----------------|------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------| +| operator.sdk.reconciliations.executions. | gauge | group, version, kind | Number of executions of the named reconciler | +| operator.sdk.reconciliations.queue.size. | gauge | group, version, kind | How many resources are queued to get reconciled by named reconciler | +| operator.sdk..size | gauge map size | | Gauge tracking the size of a specified map (currently unused but could be used to monitor caches size) | +| operator.sdk.events.received | counter | group, version, kind, name, namespace, scope, event, action | Number of received Kubernetes events | +| operator.sdk.events.delete | counter | group, version, kind, name, namespace, scope | Number of received Kubernetes delete events | +| operator.sdk.reconciliations.started | counter | group, version, kind, name, namespace, scope, reconciliations.retries.last, reconciliations.retries.number | Number of started reconciliations per resource type | +| operator.sdk.reconciliations.failed | counter | group, version, kind, name, namespace, scope, exception | Number of failed reconciliations per resource type | +| operator.sdk.reconciliations.success | counter | group, version, kind, name, namespace, scope | Number of successful reconciliations per resource type | +| operator.sdk.controllers.execution.reconcile.success | counter | controller, type | Number of successful reconciliations per controller | +| operator.sdk.controllers.execution.reconcile.failure | counter | controller, exception | Number of failed reconciliations per controller | +| operator.sdk.controllers.execution.cleanup.success | counter | controller, type | Number of successful cleanups per controller | +| operator.sdk.controllers.execution.cleanup.failure | counter | controller, exception | Number of failed cleanups per controller | + +As you can see all the recorded metrics start with the `operator.sdk` prefix. + + ## Optimizing Caches One of the ideas around the operator pattern is that all the relevant resources are cached, thus reconciliation is From ddd0d1645b8ebd9dd55e1ff7cf9495e468bb5a97 Mon Sep 17 00:00:00 2001 From: Chris Laprun Date: Wed, 15 Mar 2023 12:56:32 +0100 Subject: [PATCH 12/12] feat: make 1 the default thread count, added javadoc [skip ci] --- .../micrometer/MicrometerMetrics.java | 27 ++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java b/micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java index 39fb5bb1f7..5b3a83a1c6 100644 --- a/micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java +++ b/micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java @@ -35,14 +35,39 @@ public class MicrometerMetrics implements Metrics { private final Map> metersPerResource = new ConcurrentHashMap<>(); private final Cleaner cleaner; + /** + * Creates a non-delayed, micrometer-based Metrics implementation. The non-delayed part refers to + * the cleaning of meters associated with deleted resources. + * + * @param registry the {@link MeterRegistry} instance to use for metrics recording + */ public MicrometerMetrics(MeterRegistry registry) { this(registry, 0); } + /** + * Creates a micrometer-based Metrics implementation that delays cleaning up {@link Meter}s + * associated with deleted resources by the specified amount of seconds, using a single thread for + * that process. + * + * @param registry the {@link MeterRegistry} instance to use for metrics recording + * @param cleanUpDelayInSeconds the number of seconds to wait before meters are removed for + * deleted resources + */ public MicrometerMetrics(MeterRegistry registry, int cleanUpDelayInSeconds) { - this(registry, cleanUpDelayInSeconds, 0); + this(registry, cleanUpDelayInSeconds, 1); } + /** + * Creates a micrometer-based Metrics implementation that delays cleaning up {@link Meter}s + * associated with deleted resources by the specified amount of seconds, using the specified + * (maximally) number of threads for that process. + * + * @param registry the {@link MeterRegistry} instance to use for metrics recording + * @param cleanUpDelayInSeconds the number of seconds to wait before meters are removed for + * deleted resources + * @param cleaningThreadsNumber the number of threads to use for the cleaning process + */ public MicrometerMetrics(MeterRegistry registry, int cleanUpDelayInSeconds, int cleaningThreadsNumber) { this.registry = registry;