From d382521a2fe282ac770e7256a55b91cc124b35eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez=20Mondrag=C3=B3n?= Date: Fri, 19 Jan 2024 18:21:20 -0600 Subject: [PATCH] feat: Expose metric dictionary to make logging metrics as JSON easier --- docs/implementation/logging.md | 33 ++++++++------- poetry.lock | 13 +++++- pyproject.toml | 1 + singer_sdk/default_logging.yml | 13 ++++++ singer_sdk/metrics.py | 40 ++++++++++-------- tests/core/test_metrics.py | 77 +++++++++++++++++++++++++--------- 6 files changed, 126 insertions(+), 51 deletions(-) diff --git a/docs/implementation/logging.md b/docs/implementation/logging.md index da20a57801..e7332ff54e 100644 --- a/docs/implementation/logging.md +++ b/docs/implementation/logging.md @@ -44,36 +44,41 @@ Users of a tap can configure the SDK logging by setting the `SINGER_SDK_LOG_CONF environment variable. The value of this variable should be a path to a YAML file in the [Python logging dict format](https://docs.python.org/3/library/logging.config.html#dictionary-schema-details). -For example, to send [metrics](./metrics.md) (with logger name `singer_sdk.metrics`) to a file, you could use the following config: +### Metrics logging + +The Singer SDK provides a logger named `singer_sdk.metrics` for logging [Singer metrics](./metrics.md). Metric log records contain an extra field `point` which is a dictionary containing the metric data. The `point` field is formatted as JSON by default. + +To send metrics to a file in JSON format, you could use the following config: ```yaml version: 1 disable_existing_loggers: false formatters: metrics: - format: "{asctime} {levelname} {message}" + (): pythonjsonlogger.jsonlogger.JsonFormatter + format: "{created} {message} {point}" style: "{" handlers: metrics: class: logging.FileHandler formatter: metrics - filename: metrics.log + filename: metrics.jsonl loggers: singer_sdk.metrics: level: INFO handlers: [ metrics ] - propagate: yes + propagate: no ``` -This will send metrics to a `metrics.log`: +This will send metrics to a `metrics.jsonl`: -``` -2022-09-29 00:48:52,746 INFO METRIC: {"metric_type": "timer", "metric": "http_request_duration", "value": 0.501743, "tags": {"stream": "continents", "endpoint": "", "http_status_code": 200, "status": "succeeded"}} -2022-09-29 00:48:52,775 INFO METRIC: {"metric_type": "counter", "metric": "http_request_count", "value": 1, "tags": {"stream": "continents", "endpoint": ""}} -2022-09-29 00:48:52,776 INFO METRIC: {"metric_type": "timer", "metric": "sync_duration", "value": 0.7397160530090332, "tags": {"stream": "continents", "context": {}, "status": "succeeded"}} -2022-09-29 00:48:52,776 INFO METRIC: {"metric_type": "counter", "metric": "record_count", "value": 7, "tags": {"stream": "continents", "context": {}}} -2022-09-29 00:48:53,225 INFO METRIC: {"metric_type": "timer", "metric": "http_request_duration", "value": 0.392148, "tags": {"stream": "countries", "endpoint": "", "http_status_code": 200, "status": "succeeded"}} -2022-09-29 00:48:53,302 INFO METRIC: {"metric_type": "counter", "metric": "http_request_count", "value": 1, "tags": {"stream": "countries", "endpoint": ""}} -2022-09-29 00:48:53,302 INFO METRIC: {"metric_type": "timer", "metric": "sync_duration", "value": 0.5258760452270508, "tags": {"stream": "countries", "context": {}, "status": "succeeded"}} -2022-09-29 00:48:53,303 INFO METRIC: {"metric_type": "counter", "metric": "record_count", "value": 250, "tags": {"stream": "countries", "context": {}}} +```json +{"created": 1705709074.883021, "message": "METRIC", "point": {"type": "timer", "metric": "http_request_duration", "value": 0.501743, "tags": {"stream": "continents", "endpoint": "", "http_status_code": 200, "status": "succeeded"}}} +{"created": 1705709074.897184, "message": "METRIC", "point": {"type": "counter", "metric": "http_request_count", "value": 1, "tags": {"stream": "continents", "endpoint": ""}}} +{"created": 1705709074.897256, "message": "METRIC", "point": {"type": "timer", "metric": "sync_duration", "value": 0.7397160530090332, "tags": {"stream": "continents", "context": {}, "status": "succeeded"}}} +{"created": 1705709074.897292, "message": "METRIC", "point": {"type": "counter", "metric": "record_count", "value": 7, "tags": {"stream": "continents", "context": {}}}} +{"created": 1705709075.397254, "message": "METRIC", "point": {"type": "timer", "metric": "http_request_duration", "value": 0.392148, "tags": {"stream": "countries", "endpoint": "", "http_status_code": 200, "status": "succeeded"}}} +{"created": 1705709075.421888, "message": "METRIC", "point": {"type": "counter", "metric": "http_request_count", "value": 1, "tags": {"stream": "countries", "endpoint": ""}}} +{"created": 1705709075.422001, "message": "METRIC", "point": {"type": "timer", "metric": "sync_duration", "value": 0.5258760452270508, "tags": {"stream": "countries", "context": {}, "status": "succeeded"}}} +{"created": 1705709075.422047, "message": "METRIC", "point": {"type": "counter", "metric": "record_count", "value": 250, "tags": {"stream": "countries", "context": {}}}} ``` diff --git a/poetry.lock b/poetry.lock index d232e337db..f79303aac4 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1909,6 +1909,17 @@ files = [ [package.extras] cli = ["click (>=5.0)"] +[[package]] +name = "python-json-logger" +version = "2.0.7" +description = "A python library adding a json log formatter" +optional = false +python-versions = ">=3.6" +files = [ + {file = "python-json-logger-2.0.7.tar.gz", hash = "sha256:23e7ec02d34237c5aa1e29a070193a4ea87583bb4e7f8fd06d3de8264c4b2e1c"}, + {file = "python_json_logger-2.0.7-py3-none-any.whl", hash = "sha256:f380b826a991ebbe3de4d897aeec42760035ac760345e57b812938dc8b35e2bd"}, +] + [[package]] name = "pytz" version = "2023.3.post1" @@ -3062,4 +3073,4 @@ testing = ["pytest", "pytest-durations"] [metadata] lock-version = "2.0" python-versions = ">=3.7.1" -content-hash = "cbbf5badbed96af3d15f4c81e11503578badffc20dbbcc09eaeedcaff2670cf6" +content-hash = "4710aae284257a52e150d6c48df6b13525bf4c767820c373a2987b0e46475ebb" diff --git a/pyproject.toml b/pyproject.toml index 0f5e2ce1a7..44ece7e3df 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,6 +63,7 @@ pendulum = [ PyJWT = "~=2.4" python-dateutil = ">=2.8.2" python-dotenv = ">=0.20" +python-json-logger = ">=2" PyYAML = ">=6.0" requests = ">=2.25.1" simpleeval = ">=0.9.13" diff --git a/singer_sdk/default_logging.yml b/singer_sdk/default_logging.yml index 80b08059e0..53a95b019e 100644 --- a/singer_sdk/default_logging.yml +++ b/singer_sdk/default_logging.yml @@ -4,11 +4,24 @@ formatters: console: format: "{asctime:23s} | {levelname:8s} | {name:20s} | {message}" style: "{" + metrics: + (): singer_sdk.metrics.SingerMetricsFormatter + format: "{asctime:23s} | {levelname:8s} | {name:20s} | {message}: {metric_json}" + style: "{" handlers: default: class: logging.StreamHandler formatter: console stream: ext://sys.stderr + metrics: + class: logging.StreamHandler + formatter: metrics + stream: ext://sys.stderr root: level: INFO handlers: [default] +loggers: + singer_sdk.metrics: + level: INFO + handlers: [ metrics ] + propagate: no diff --git a/singer_sdk/metrics.py b/singer_sdk/metrics.py index 990285ae04..f81422c642 100644 --- a/singer_sdk/metrics.py +++ b/singer_sdk/metrics.py @@ -67,29 +67,35 @@ class Point(t.Generic[_TVal]): value: _TVal tags: dict[str, t.Any] = field(default_factory=dict) - def __str__(self) -> str: - """Get string representation of this measurement. + def to_dict(self) -> dict[str, t.Any]: + """Convert this measure to a dictionary. Returns: - A string representation of this measurement. + A dictionary. """ - return self.to_json() + return { + "type": self.metric_type, + "metric": self.metric.value, + "value": self.value, + "tags": self.tags, + } - def to_json(self) -> str: - """Convert this measure to a JSON object. + +class SingerMetricsFormatter(logging.Formatter): + """Logging formatter that adds a ``metric_json`` field to the log record.""" + + def format(self, record: logging.LogRecord) -> str: + """Format a log record. + + Args: + record: A log record. Returns: - A JSON object. + A formatted log record. """ - return json.dumps( - { - "type": self.metric_type, - "metric": self.metric.value, - "value": self.value, - "tags": self.tags, - }, - default=str, - ) + point = record.__dict__.get("point") + record.__dict__["metric_json"] = json.dumps(point, default=str) if point else "" + return super().format(record) def log(logger: logging.Logger, point: Point) -> None: @@ -99,7 +105,7 @@ def log(logger: logging.Logger, point: Point) -> None: logger: An logger instance. point: A measurement. """ - logger.info("METRIC: %s", point) + logger.info("METRIC", extra={"point": point.to_dict()}) class Meter(metaclass=abc.ABCMeta): diff --git a/tests/core/test_metrics.py b/tests/core/test_metrics.py index a9d6b4d261..4d49c85183 100644 --- a/tests/core/test_metrics.py +++ b/tests/core/test_metrics.py @@ -1,9 +1,10 @@ from __future__ import annotations +import json import logging -import time import pytest +import time_machine from singer_sdk import metrics @@ -17,6 +18,32 @@ def __str__(self) -> str: return f"{self.name}={self.value}" +def test_singer_metrics_formatter(): + formatter = metrics.SingerMetricsFormatter(fmt="{metric_json}", style="{") + + record = logging.LogRecord( + name="test", + level=logging.INFO, + pathname="test.py", + lineno=1, + msg="METRIC", + args=(), + exc_info=None, + ) + + assert formatter.format(record) == "" + + metric_dict = { + "type": "counter", + "metric": "test_metric", + "tags": {"test_tag": "test_value"}, + "value": 1, + } + record.__dict__["point"] = metric_dict + + assert formatter.format(record) == json.dumps(metric_dict) + + def test_meter(): class _MyMeter(metrics.Meter): def __enter__(self): @@ -38,6 +65,9 @@ def __exit__(self, exc_type, exc_val, exc_tb): def test_record_counter(caplog: pytest.LogCaptureFixture): + metrics_logger = logging.getLogger(metrics.METRICS_LOGGER_NAME) + metrics_logger.propagate = True + caplog.set_level(logging.INFO, logger=metrics.METRICS_LOGGER_NAME) custom_object = CustomObject("test", 1) @@ -56,48 +86,57 @@ def test_record_counter(caplog: pytest.LogCaptureFixture): total = 0 assert len(caplog.records) == 100 + 1 + # raise AssertionError for record in caplog.records: assert record.levelname == "INFO" - assert record.msg == "METRIC: %s" - assert "test=1" in record.message + assert record.msg.startswith("METRIC") - point: metrics.Point[int] = record.args[0] - assert point.metric_type == "counter" - assert point.metric == "record_count" - assert point.tags == { + point = record.__dict__["point"] + assert point["type"] == "counter" + assert point["metric"] == "record_count" + assert point["tags"] == { metrics.Tag.STREAM: "test_stream", metrics.Tag.ENDPOINT: "test_endpoint", "custom_tag": "pytest", "custom_obj": custom_object, } - total += point.value + total += point["value"] assert total == 100 def test_sync_timer(caplog: pytest.LogCaptureFixture): + metrics_logger = logging.getLogger(metrics.METRICS_LOGGER_NAME) + metrics_logger.propagate = True + caplog.set_level(logging.INFO, logger=metrics.METRICS_LOGGER_NAME) - with metrics.sync_timer("test_stream", custom_tag="pytest") as timer: - start_time = timer.start_time - for _ in range(1000): - time.sleep(0.001) - end_time = time.time() + + traveller = time_machine.travel(0, tick=False) + traveller.start() + + with metrics.sync_timer("test_stream", custom_tag="pytest"): + traveller.stop() + + traveller = time_machine.travel(10, tick=False) + traveller.start() + + traveller.stop() assert len(caplog.records) == 1 record = caplog.records[0] assert record.levelname == "INFO" - assert record.msg == "METRIC: %s" + assert record.msg.startswith("METRIC") - point: metrics.Point[float] = record.args[0] - assert point.metric_type == "timer" - assert point.metric == "sync_duration" - assert point.tags == { + point = record.__dict__["point"] + assert point["type"] == "timer" + assert point["metric"] == "sync_duration" + assert point["tags"] == { metrics.Tag.STREAM: "test_stream", metrics.Tag.STATUS: "succeeded", "custom_tag": "pytest", } - assert pytest.approx(point.value, rel=0.001) == end_time - start_time + assert pytest.approx(point["value"], rel=0.001) == 10.0