feat: Job backend configuration options

* Make Config a subclass of UserDict * Remove stray import * Move metrics report API call into the reporter * Update sample apps * Update Django 2.2.21 sample app with Poetry * Update sample apps' README for clarity * Improve Config.update for nested dicts * Implement support for QUEUES and MAX_QUEUES * Remove unnecessary test script * Remove stray print() * Include information about configuring task processing integrations * Remove internal configuration options from README examples * Move collector config defaults into collector's module
judoscale · Mar 27, 2023 · e2d3262 · e2d3262
1 parent a35a38a
commit e2d3262
Show file tree

Hide file tree

Showing 35 changed files with 690 additions and 118 deletions.
diff --git a/README.md b/README.md
@@ -38,15 +38,8 @@ Optionally, you can customize Judoscale in `settings.py`:
 
 ```python
 JUDOSCALE = {
-    # LOG_LEVEL defaults to ENV["LOG_LEVEL"] or "INFO".
+    # Log level defaults to ENV["LOG_LEVEL"] or "INFO".
     "LOG_LEVEL": "DEBUG",
-
-    # API_BASE_URL defaults to ENV["JUDOSCALE_URL"], which is set for you when you install Judoscale.
-    # This is only exposed for testing purposes.
-    "API_BASE_URL": "https://example.com",
-
-    # REPORT_INTERVAL_SECONDS defaults to 10 seconds.
-    "REPORT_INTERVAL_SECONDS": 5,
 }
 ```
 
@@ -91,15 +84,8 @@ Optionally, you can override Judoscale's own configuration via your application'
 
 ```python
 JUDOSCALE = {
-    # LOG_LEVEL defaults to ENV["LOG_LEVEL"] or "INFO".
+    # Log level defaults to ENV["LOG_LEVEL"] or "INFO".
     "LOG_LEVEL": "DEBUG",
-
-    # API_BASE_URL defaults to ENV["JUDOSCALE_URL"], which is set for you when you install Judoscale.
-    # This is only exposed for testing purposes.
-    "API_BASE_URL": "https://example.com",
-
-    # REPORT_INTERVAL_SECONDS defaults to 10 seconds.
-    "REPORT_INTERVAL_SECONDS": 5,
 }
 ```
 
@@ -143,6 +129,34 @@ If you need to change the Judoscale integration configuration, you can pass a di
 judoscale_celery(celery_app, extra_config={"LOG_LEVEL": "DEBUG"})
 ```
 
+An example configuration dictionary accepted by `extra_config`:
+
+```py
+{
+    "LOG_LEVEL": "INFO",
+
+    # In addition to global configuration options for the Judoscale
+    # integration above, you can also specify the following configuration
+    # options for the Celery integration.
+    "CELERY": {
+        # Enable (default) or disable the Celery integration
+        "ENABLED": True,
+
+        # Report metrics on up to MAX_QUEUES queues.
+        # The list of discovered queues are sorted by the length
+        # of the queue name (shortest first) and metrics are
+        # reported for the first MAX_QUEUES queues.
+        # Defaults to 20.
+        "MAX_QUEUES": 20,
+
+        # Specify a list of known queues to report metrics for.
+        # MAX_QUEUES is still honoured.
+        # Defaults to empty list (report metrics for discovered queues).
+        "QUEUES": []
+    }
+}
+```
+
 > :warning: **NOTE:** Calling `judoscale_celery` turns on sending [`task-sent`](https://docs.celeryq.dev/en/stable/userguide/configuration.html#task-send-sent-event) events. This is required for the Celery integration with Judoscale to work.
 
 ### Judoscale with Celery and Flask
@@ -205,6 +219,33 @@ If you need to change the Judoscale integration configuration, you can pass a di
 judoscale_rq(redis, extra_config={"LOG_LEVEL": "DEBUG"})
 ```
 
+An example configuration dictionary accepted by `extra_config`:
+
+```py
+ {
+    "LOG_LEVEL": "INFO",
+
+    # In addition to global configuration options for the Judoscale
+    # integration above, you can also specify the following configuration
+    # options for the RQ integration.
+    "RQ": {
+        # Enable (default) or disable the RQ integration
+        "ENABLED": True,
+
+        # Report metrics on up to MAX_QUEUES queues.
+        # The list of discovered queues are sorted by the length
+        # of the queue name (shortest first) and metrics are
+        # reported for the first MAX_QUEUES queues.
+        # Defaults to 20.
+        "MAX_QUEUES": 20,
+
+        # Specify a list of known queues to report metrics for.
+        # MAX_QUEUES is still honoured.
+        # Defaults to empty list (report metrics for discovered queues).
+        "QUEUES": []
+}
+```
+
 ### Judoscale with RQ and Flask
 
 The recommended way to initialise Judoscale for RQ is in the application factory:

diff --git a/judoscale/celery/__init__.py b/judoscale/celery/__init__.py
@@ -19,7 +19,7 @@ def before_publish(*args, properties={}, **kwargs):
 def judoscale_celery(celery: Celery, extra_config: Mapping = {}) -> None:
     celery.conf.task_send_sent_event = True
 
-    judoconfig.merge(extra_config)
+    judoconfig.update(extra_config)
     collector = CeleryMetricsCollector(config=judoconfig, broker=celery)
     adapter = Adapter(
         identifier="judoscale-celery",

diff --git a/judoscale/celery/collector.py b/judoscale/celery/collector.py
@@ -3,7 +3,6 @@
 from threading import Thread
 from typing import List, Optional, Set
 
-import redis.exceptions as re
 from celery import Celery
 from kombu import Connection
 from redis import Redis
@@ -13,6 +12,12 @@
 from judoscale.core.metric import Metric
 from judoscale.core.metrics_collectors import JobMetricsCollector
 
+DEFAULTS = {
+    "ENABLED": True,
+    "MAX_QUEUES": 20,
+    "QUEUES": [],
+}
+
 
 class TaskSentHandler(Thread):
     def __init__(
@@ -27,7 +32,7 @@ def __init__(
         super().__init__(*args, daemon=True, **kwargs)
 
     def task_sent(self, event):
-        self.collector.queues.add(event["queue"])
+        self.collector._celery_queues.add(event["queue"])
 
     def run(self):
         logger.debug("Starting TaskSentHandler")
@@ -42,6 +47,8 @@ class CeleryMetricsCollector(JobMetricsCollector):
     def __init__(self, config: Config, broker: Celery):
         super().__init__(config=config)
 
+        self.config["CELERY"] = {**DEFAULTS, **self.config.get("RQ", {})}
+
         self.broker = broker
         connection = self.broker.connection_for_read()
         if connection.transport.driver_name != "redis":
@@ -56,7 +63,7 @@ def __init__(self, config: Config, broker: Celery):
                 "Unsupported Redis server version. Minimum Redis version is 6.0."
             )
 
-        self.queues: Set[str] = set()
+        self._celery_queues: Set[str] = set()
         self.task_sent_handler = TaskSentHandler(self, connection)
         logger.debug(f"Redis is at {self.redis.connection_pool}")
 
@@ -66,14 +73,22 @@ def __init__(self, config: Config, broker: Celery):
             for q in self.redis.scan_iter(match="[^_]*", _type="list")
         }
         logger.debug(f"Found initial queues: {list(user_queues)}")
-        self.queues = user_queues - system_queues
+        self._celery_queues = user_queues - system_queues
         self.task_sent_handler.start()
 
     @property
     def is_supported_redis_version(self):
         major_version = int(self.redis.info()["redis_version"].split(".")[0])
         return major_version >= 6
 
+    @property
+    def adapter_config(self):
+        return self.config["CELERY"]
+
+    @property
+    def _queues(self) -> List[str]:
+        return list(self._celery_queues)
+
     def oldest_task(self, queue: str) -> Optional[dict]:
         """
         Get the oldest task from the queue.

diff --git a/judoscale/core/adapter_api_client.py b/judoscale/core/adapter_api_client.py
diff --git a/judoscale/core/config.py b/judoscale/core/config.py
@@ -1,9 +1,12 @@
 import logging
 import os
+from collections import UserDict
 from typing import Mapping
 
 from judoscale.core.logger import logger
 
+DEFAULTS = {"REPORT_INTERVAL_SECONDS": 10, "LOG_LEVEL": "WARN"}
+
 
 class RuntimeContainer:
     def __init__(self, service_name, instance, service_type):
@@ -23,14 +26,21 @@ def __str__(self):
         return f"{self.service_name}.{self.instance}"
 
 
-class Config:
+class Config(UserDict):
     def __init__(
-        self, runtime_container: RuntimeContainer, api_base_url: str, log_level: str
+        self, runtime_container: RuntimeContainer, api_base_url: str, env: Mapping
     ):
-        self.runtime_container = runtime_container
-        self.log_level = log_level
-        self.report_interval_seconds = 10
-        self.api_base_url = api_base_url
+        initialdata = dict(
+            DEFAULTS,
+            RUNTIME_CONTAINER=runtime_container,
+            API_BASE_URL=api_base_url,
+        )
+
+        for key in {"LOG_LEVEL", "RQ", "CELERY"}:
+            if key in env:
+                initialdata[key] = env[key]
+
+        super().__init__(initialdata)
         self._prepare_logging()
 
     @classmethod
@@ -40,7 +50,7 @@ def initialize(cls, env: Mapping = os.environ):
         elif env.get("RENDER_INSTANCE_ID"):
             return cls.for_render(env)
         else:
-            return cls(None, "", "INFO")
+            return cls(None, "", env)
 
     @classmethod
     def for_heroku(cls, env: Mapping):
@@ -49,8 +59,7 @@ def for_heroku(cls, env: Mapping):
 
         runtime_container = RuntimeContainer(service_name, instance, service_type)
         api_base_url = env.get("JUDOSCALE_URL")
-        log_level = env.get("LOG_LEVEL", "INFO").upper()
-        return cls(runtime_container, api_base_url, log_level)
+        return cls(runtime_container, api_base_url, env)
 
     @classmethod
     def for_render(cls, env: Mapping):
@@ -60,23 +69,30 @@ def for_render(cls, env: Mapping):
 
         runtime_container = RuntimeContainer(service_id, instance, service_type)
         api_base_url = f"https://adapter.judoscale.com/api/{service_id}"
-        log_level = env.get("LOG_LEVEL", "INFO").upper()
-        return cls(runtime_container, api_base_url, log_level)
-
-    def merge(self, settings: Mapping):
-        for key, value in settings.items():
-            setattr(self, key.lower(), value)
+        return cls(runtime_container, api_base_url, env)
+
+    def update(self, new_config: Mapping):
+        for k, v in new_config.items():
+            if k in self and isinstance(self[k], dict) and isinstance(v, dict):
+                self[k].update(v)
+            else:
+                self[k] = v
         self._prepare_logging()
 
+    def merge(self, new_config: Mapping):
+        logger.warning("Config.merge() is deprecated. Use Config.update() instead.")
+        self.update(new_config)
+
+    @property
     def for_report(self):
         # Only include the config options we want to include in the report
         return {
-            "log_level": self.log_level,
-            "report_interval_seconds": self.report_interval_seconds,
+            "log_level": self["LOG_LEVEL"],
+            "report_interval_seconds": self["REPORT_INTERVAL_SECONDS"],
         }
 
     def _prepare_logging(self):
-        log_level = logging.getLevelName(self.log_level.upper())
+        log_level = logging.getLevelName(self["LOG_LEVEL"].upper())
         logger.setLevel(log_level)
 
         if not logger.handlers:

diff --git a/judoscale/core/metrics_collectors.py b/judoscale/core/metrics_collectors.py
@@ -1,6 +1,7 @@
-from typing import List, Protocol
+from typing import List, Protocol, Set
 
 from judoscale.core.config import Config
+from judoscale.core.logger import logger
 from judoscale.core.metric import Metric
 from judoscale.core.metrics_store import MetricsStore
 
@@ -30,7 +31,7 @@ def __init__(self, config: Config):
 
     @property
     def should_collect(self):
-        return self.config.runtime_container.is_web_instance
+        return self.config["RUNTIME_CONTAINER"].is_web_instance
 
     def add(self, metric: Metric):
         """
@@ -47,9 +48,47 @@ def collect(self) -> List[Metric]:
 
 
 class JobMetricsCollector(MetricsCollector):
+    """
+    Base class for job metrics collectors.
+    This class is not meant to be instantiated directly.
+    """
+
     def __init__(self, config: Config):
         super().__init__(config=config)
 
+    @property
+    def adapter_config(self):
+        raise NotImplementedError("Implement `adapter_config` in a subclass.")
+
+    @property
+    def _queues(self) -> List[str]:
+        return list()
+
+    @property
+    def queues(self) -> Set[str]:
+        if configured_queues := self.adapter_config["QUEUES"]:
+            return self.limit_max_queues(configured_queues)
+        else:
+            return self.limit_max_queues(self._queues)
+
     @property
     def should_collect(self):
-        return not self.config.runtime_container.is_redundant_instance
+        return (
+            self.adapter_config["ENABLED"]
+            and not self.config["RUNTIME_CONTAINER"].is_redundant_instance
+        )
+
+    def limit_max_queues(self, queues: List[str]) -> Set[str]:
+        """
+        Limit the number of queues to collect metrics for.
+        """
+
+        max_queues = self.adapter_config["MAX_QUEUES"]
+
+        if len(queues) > max_queues:
+            logger.warning(
+                f"{self.__class__.__name__} reporting only {max_queues} queues max, "
+                f"skipping the rest ({len(queues) - max_queues})."
+            )
+
+        return set(sorted(queues, key=lambda q: len(q))[:max_queues])