Skip to content

Commit

Permalink
feat: enable metrics, sentry, otel (#1144)
Browse files Browse the repository at this point in the history
  • Loading branch information
narasux authored Aug 8, 2023
1 parent 4c848df commit b602371
Show file tree
Hide file tree
Showing 18 changed files with 1,283 additions and 40 deletions.
11 changes: 11 additions & 0 deletions .github/pull_request_template.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
### Description

Fixes # (issue)

### Checklist

- [ ] 填写 PR 描述及相关 issue (write PR description and related issue)
- [ ] 代码风格检查通过 (code style check passed)
- [ ] PR 中包含单元测试 (include unit test)
- [ ] 单元测试通过 (unit test passed)
- [ ] 本地开发联调环境验证通过 (local development environment verification passed)
4 changes: 1 addition & 3 deletions src/bk-user/bkuser/common/middlewares.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,7 @@


class RequestProvider:
"""request_id中间件
调用链使用
"""
"""request_id中间件(调用链使用)"""

REQUEST_ID_HEADER_KEY = "X-Request-Id"

Expand Down
8 changes: 5 additions & 3 deletions src/bk-user/bkuser/common/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
)
from rest_framework.response import Response
from rest_framework.views import set_rollback
from sentry_sdk import capture_exception

from bkuser.common.error_codes import error_codes
from bkuser.utils.std_error import APIError
Expand All @@ -47,7 +48,7 @@ def one_line_error(error: ValidationError):


def _handle_exception(request, exc) -> APIError: # noqa: ruff: PLR0911
"""统一处理异常,并转换成api error"""
"""统一处理异常,并转换成 APIError"""
if isinstance(exc, (NotAuthenticated, AuthenticationFailed)):
return error_codes.UNAUTHENTICATED

Expand All @@ -68,15 +69,16 @@ def _handle_exception(request, exc) -> APIError: # noqa: ruff: PLR0911
set_rollback()
return exc

# 非预期内的异常(1)记录日志(2)推送Sentry (3) 以系统异常响应
# 非预期内的异常(1)记录日志(2)推送到 sentry (3) 以系统异常响应
logger.exception(
"catch unexpected error, request url->[%s], request method->[%s] request params->[%s]",
request.path,
request.method,
json.dumps(getattr(request, request.method, None)),
)

# TODO: 推送异常到 sentry
# 推送异常到 sentry
capture_exception(exc)

# Note: 系统异常不暴露异常详情信息,避免敏感信息泄露
return error_codes.SYSTEM_ERROR
Expand Down
2 changes: 1 addition & 1 deletion src/bk-user/bkuser/monitoring/healthz/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def healthz(self, request):
token = request.query_params.get("token", "")
if not settings.HEALTHZ_TOKEN:
return Response(
data={"errors": "Token was not configured in settings, request denied"},
data={"errors": "Healthz token was not configured in settings, request denied"},
status=status.HTTP_400_BAD_REQUEST,
)
if not (token and token == settings.HEALTHZ_TOKEN):
Expand Down
10 changes: 10 additions & 0 deletions src/bk-user/bkuser/monitoring/metrics/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# -*- coding: utf-8 -*-
"""
TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-用户管理(Bk-User) available.
Copyright (C) 2017-2021 THL A29 Limited, a Tencent company. All rights reserved.
Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://opensource.org/licenses/MIT
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
"""
15 changes: 15 additions & 0 deletions src/bk-user/bkuser/monitoring/metrics/urls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# -*- coding: utf-8 -*-
"""
TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-用户管理(Bk-User) available.
Copyright (C) 2017-2021 THL A29 Limited, a Tencent company. All rights reserved.
Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://opensource.org/licenses/MIT
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
"""
from django.conf.urls import url

from . import views

urlpatterns = [url(r"^metrics$", views.metric_view, name="prometheus-django-metrics")]
31 changes: 31 additions & 0 deletions src/bk-user/bkuser/monitoring/metrics/views.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# -*- coding: utf-8 -*-
"""
TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-用户管理(Bk-User) available.
Copyright (C) 2017-2021 THL A29 Limited, a Tencent company. All rights reserved.
Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://opensource.org/licenses/MIT
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
"""
from django.conf import settings
from django_prometheus.exports import ExportToDjangoView
from rest_framework import status
from rest_framework.response import Response


def metric_view(request):
"""metric view with basic auth"""
token = request.GET.get("token", "")
if not settings.METRIC_TOKEN:
return Response(
data={"errors": "Metric token was not configured in settings, request denied"},
status=status.HTTP_400_BAD_REQUEST,
)
if not (token and token == settings.METRIC_TOKEN):
return Response(
data={"errors": "Please provide valid token"},
status=status.HTTP_400_BAD_REQUEST,
)

return ExportToDjangoView(request)
10 changes: 10 additions & 0 deletions src/bk-user/bkuser/monitoring/tracing/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# -*- coding: utf-8 -*-
"""
TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-用户管理(Bk-User) available.
Copyright (C) 2017-2021 THL A29 Limited, a Tencent company. All rights reserved.
Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://opensource.org/licenses/MIT
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
"""
42 changes: 42 additions & 0 deletions src/bk-user/bkuser/monitoring/tracing/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# -*- coding: utf-8 -*-
"""
TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-用户管理(Bk-User) available.
Copyright (C) 2017-2021 THL A29 Limited, a Tencent company. All rights reserved.
Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://opensource.org/licenses/MIT
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
"""

from celery.signals import worker_process_init
from django.apps import AppConfig

from .otel import setup_by_settings
from .sentry import init_sentry_sdk


class TracingConfig(AppConfig):
name = "bkuser.monitoring.tracing"

def ready(self):
setup_by_settings()
init_sentry_sdk(
django_integrated=True,
redis_integrated=True,
celery_integrated=True,
)


@worker_process_init.connect(weak=False)
def worker_process_init_otel_trace_setup(*args, **kwargs):
setup_by_settings()


@worker_process_init.connect(weak=False)
def worker_process_init_sentry_setup(*args, **kwargs):
init_sentry_sdk(
django_integrated=True,
redis_integrated=True,
celery_integrated=True,
)
120 changes: 120 additions & 0 deletions src/bk-user/bkuser/monitoring/tracing/hooks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
# -*- coding: utf-8 -*-
"""
TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-用户管理(Bk-User) available.
Copyright (C) 2017-2021 THL A29 Limited, a Tencent company. All rights reserved.
Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://opensource.org/licenses/MIT
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
"""
import json
from typing import Dict

import requests
from django.http import HttpRequest, HttpResponse
from opentelemetry.trace import Span, StatusCode, format_trace_id
from rest_framework import status


def handle_api_error(span: Span, result: Dict):
"""统一处理新版 HTTP API 协议中的错误详情"""
if "error" not in result:
return

err = result["error"]
span.set_attribute("error_code", err.get("code", ""))
span.set_attribute("error_message", err.get("message", ""))
span.set_attribute("error_system", err.get("system", ""))
# 错误详情若存在,则统一存到一个字段中
if err_details := err.get("details", []):
span.set_attribute("error_details", json.dumps(err_details))


def requests_response_hook(span: Span, response: requests.Response):
"""用于处理 requests 库发起的请求响应,需要兼容支持新旧 esb,apigw,新版 HTTP 协议"""
if (
# requests 请求异常, 例如访问超时等
response is None
# 并非所有返回内容都是 json 格式的, 因此需要根据返回头进行判断, 避免处理二进制格式的内容
or response.headers.get("Content-Type") != "application/json"
):
return

try:
result = json.loads(response.content)
except Exception: # pylint: disable=broad-except
return
if not isinstance(result, dict):
return

request_id = (
# new esb and apigateway
response.headers.get("x-bkapi-request-id")
# legacy api
or response.headers.get("x-request-id")
# old esb and other
or result.get("request_id", "")
)
if request_id:
span.set_attribute("request_id", request_id)

if "message" in result:
span.set_attribute("error_message", result["message"])

# 旧版本 API 中,code 为 0/'0'/'00' 表示成功
code = result.get("code")
if code is not None:
span.set_attribute("error_code", str(code))
if str(code) in ["0", "00"]:
span.set_status(StatusCode.OK)
else:
span.set_status(StatusCode.ERROR)

# 后续均为处理新版 API 协议逻辑,因此此处直接 return
return

# 根据新版本 HTTP API 协议,处理错误详情
handle_api_error(span, result)

if status.is_success(response.status_code):
span.set_status(StatusCode.OK)
else:
span.set_status(StatusCode.ERROR)


def django_request_hook(span: Span, request: HttpRequest):
"""在 request 注入 trace_id,方便获取"""
trace_id = span.get_span_context().trace_id
request.otel_trace_id = format_trace_id(trace_id)


def django_response_hook(span: Span, request: HttpRequest, response: HttpResponse):
"""处理 Django 响应,因用户管理已经使用新版本 HTTP 协议,因此仅支持新版协议"""

if (
# requests 请求异常, 例如访问超时等
response is None
# 并非所有返回内容都是 json 格式的, 因此需要根据返回头进行判断, 避免处理二进制格式的内容
or response.headers.get("Content-Type") != "application/json"
):
return

# 新版本协议中按照标准 HTTP 协议,200 <= code < 300 的都是正常
if status.is_success(response.status_code):
span.set_status(StatusCode.OK)
return

span.set_status(StatusCode.ERROR)
try:
result = json.loads(response.content)
except Exception: # pylint: disable=broad-except
return
if not isinstance(result, dict):
return

# 若响应体中包含 request_id,则一并记录
if request_id := result.get("request_id"):
span.set_attribute("request_id", request_id)

handle_api_error(span, result)
59 changes: 59 additions & 0 deletions src/bk-user/bkuser/monitoring/tracing/instrumentor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# -*- coding: utf-8 -*-
"""
TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-用户管理(Bk-User) available.
Copyright (C) 2017-2021 THL A29 Limited, a Tencent company. All rights reserved.
Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://opensource.org/licenses/MIT
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
"""
import logging
from typing import Collection

from django.conf import settings
from opentelemetry.instrumentation import dbapi
from opentelemetry.instrumentation.celery import CeleryInstrumentor
from opentelemetry.instrumentation.django import DjangoInstrumentor
from opentelemetry.instrumentation.instrumentor import BaseInstrumentor # type: ignore
from opentelemetry.instrumentation.logging import LoggingInstrumentor
from opentelemetry.instrumentation.redis import RedisInstrumentor
from opentelemetry.instrumentation.requests import RequestsInstrumentor

from bkuser.monitoring.tracing.hooks import django_request_hook, django_response_hook, requests_response_hook

logger = logging.getLogger(__name__)


class BKUserInstrumentor(BaseInstrumentor):
def instrumentation_dependencies(self) -> Collection[str]:
return []

def _instrument(self, **kwargs):
LoggingInstrumentor().instrument()
logger.info("otel instructment: logging")
RequestsInstrumentor().instrument(response_hook=requests_response_hook)
logger.info("otel instructment: requests")
DjangoInstrumentor().instrument(request_hook=django_request_hook, response_hook=django_response_hook)
logger.info("otel instructment: django")
RedisInstrumentor().instrument()
logger.info("otel instructment: redis")
CeleryInstrumentor().instrument()
logger.info("otel instructment: celery")

if getattr(settings, "OTEL_INSTRUMENT_DB_API", False):
import MySQLdb # noqa

dbapi.wrap_connect(
__name__,
MySQLdb,
"connect",
"mysql",
{"database": "db", "port": "port", "host": "host", "user": "user"},
)
logger.info("otel instructment: database api")

def _uninstrument(self, **kwargs):
for instrumentor in self.instrumentors:
logger.info("otel uninstrument %s", instrumentor)
instrumentor.uninstrument()
Loading

0 comments on commit b602371

Please sign in to comment.