Skip to content

Commit

Permalink
feat(sqlserver): sqlserver巡检任务 TencentBlueKing#8033
Browse files Browse the repository at this point in the history
  • Loading branch information
yksitu committed Nov 22, 2024
1 parent 5d232b4 commit 608888f
Show file tree
Hide file tree
Showing 8 changed files with 609 additions and 2 deletions.
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
"""
TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-DB管理系统(BlueKing-BK-DBM) available.
Copyright (C) 2017-2023 THL A29 Limited, a Tencent company. All rights reserved.
Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at https://opensource.org/licenses/MIT
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
"""

from backend.db_meta.enums import ClusterPhase, ClusterType, InstanceInnerRole, InstanceStatus
from backend.db_meta.models import Cluster, StorageInstance
from backend.db_report.models.sqlserver_check_report import (
CheckAppSettingReport,
CheckJobSyncReport,
CheckLinkServerReport,
CheckSysJobStatuReport,
CheckUserSyncReport,
)
from backend.flow.utils.sqlserver.sqlserver_bk_config import (
get_module_infos,
get_sqlserver_alarm_config,
get_sqlserver_backup_config,
)
from backend.flow.utils.sqlserver.sqlserver_db_function import (
check_ha_config,
check_sys_job_status,
fix_app_setting_data,
get_app_setting_data,
insert_sqlserver_config,
)


class CheckAppSettingData(object):
"""
已dbm元数据为准
检查实例的app_setting表的信息是否符合预期,如果存在信息不一致,则需要已某种方式输出告知相关DBA
"""

def __init__(self):
# 获取所有的online状态的cluster
self.clusters = Cluster.objects.prefetch_related(
"storageinstance_set",
"storageinstance_set__machine",
).filter(phase=ClusterPhase.ONLINE, cluster_type__in=[ClusterType.SqlserverHA, ClusterType.SqlserverSingle])

def check_task(self):
"""
定义巡检逻辑
"""
for cluster in self.clusters:
self.check_app_setting_data(cluster)
self.check_job_is_disabled(cluster)
if cluster.cluster_type == ClusterType.SqlserverHA:
master = cluster.storageinstance_set.get(instance_inner_role=InstanceInnerRole.MASTER)
for s in cluster.storageinstance_set.filter(
status=InstanceStatus.RUNNING, instance_inner_role=InstanceInnerRole.SLAVE
):
self.check_user(master_instance=master, slave_instance=s, cluster=cluster)
self.check_job(master_instance=master, slave_instance=s, cluster=cluster)
self.check_link_server(master_instance=master, slave_instance=s, cluster=cluster)

@staticmethod
def fix_app_setting_data(cluster: Cluster, instance: StorageInstance):
"""
存在不一致元数据,进行修复
"""
is_fix = 0
if fix_app_setting_data(cluster=cluster, instance=instance):
is_fix = 1
CheckAppSettingReport.objects.create(
cluster=cluster.name,
cluster_type=cluster.cluster_type,
instance_host=instance.machine.ip,
instance_port=instance.port,
is_inconsistent=1,
is_fix=is_fix,
)
return True

@staticmethod
def add_app_setting_data(cluster: Cluster, instance: StorageInstance):
"""
插入app_setting数据
"""
is_fix = 0
# 获取集群字符集配置
charset = get_module_infos(
bk_biz_id=cluster.bk_biz_id,
db_module_id=cluster.db_module_id,
cluster_type=ClusterType(cluster.cluster_type),
)["charset"]

# 获取集群的备份配置
backup_config = get_sqlserver_backup_config(
bk_biz_id=cluster.bk_biz_id,
db_module_id=cluster.db_module_id,
cluster_domain=cluster.immute_domain,
)

# 获取集群的个性化配置
alarm_config = get_sqlserver_alarm_config(
bk_biz_id=cluster.bk_biz_id,
db_module_id=cluster.db_module_id,
cluster_domain=cluster.immute_domain,
)

# 配置数据
if insert_sqlserver_config(
cluster=cluster,
storages=[instance],
charset=charset,
backup_config=backup_config,
alarm_config=alarm_config,
):
is_fix = 1

CheckAppSettingReport.objects.create(
cluster=cluster.name,
cluster_type=cluster.cluster_type,
instance_host=instance.machine.ip,
instance_port=instance.port,
is_inconsistent=1,
is_fix=is_fix,
)
return True

def check_app_setting_data(self, cluster: Cluster):
# 按照集群维度查询所有的实例,状态running中的
for instance in cluster.storageinstance_set.filter(status=InstanceStatus.RUNNING):
data = get_app_setting_data(instance=instance, bk_cloud_id=cluster.bk_cloud_id)
if data is None:
# 如果返回是空则,则大概率是访问异常,跳过这次的校验
continue
if len(data) == 0:
# 则说明没有配置app_setting,需要重新执行
self.add_app_setting_data(cluster=cluster, instance=instance)

if (
data["BK_BIZ_ID"] != cluster.bk_biz_id
or data["BK_CLOUD_ID"] != cluster.bk_cloud_id
or data["CLUSTER_ID"] != cluster.id
or data["CLUSTER_DOMAIN"] != cluster.immute_domain
or data["PORT"] != instance.port
or data["ROLE"] != instance.instance_inner_role
):
# 尝试修复数据
self.fix_app_setting_data(cluster=cluster, instance=instance)

@staticmethod
def check_user(master_instance: StorageInstance, slave_instance: StorageInstance, cluster: Cluster):
"""
检查主从的用户是否一致
"""
if not check_ha_config(
master_instance=master_instance,
slave_instance=slave_instance,
bk_cloud_id=cluster.bk_cloud_id,
check_tag="user",
):
CheckUserSyncReport.objects.create(
cluster=cluster.name,
cluster_type=cluster.cluster_type,
instance_host=slave_instance.machine.ip,
instance_port=slave_instance.port,
is_user_inconsistent=1,
)

@staticmethod
def check_job(master_instance: StorageInstance, slave_instance: StorageInstance, cluster: Cluster):
"""
检测主从的业务作业是否一致
"""
if not check_ha_config(
master_instance=master_instance,
slave_instance=slave_instance,
bk_cloud_id=cluster.bk_cloud_id,
check_tag="job",
):
CheckJobSyncReport.objects.create(
cluster=cluster.name,
cluster_type=cluster.cluster_type,
instance_host=slave_instance.machine.ip,
instance_port=slave_instance.port,
is_job_inconsistent=1,
)

@staticmethod
def check_link_server(master_instance: StorageInstance, slave_instance: StorageInstance, cluster: Cluster):
"""
检测主从的link_server是否一致
"""
if not check_ha_config(
master_instance=master_instance,
slave_instance=slave_instance,
bk_cloud_id=cluster.bk_cloud_id,
check_tag="job",
):
CheckLinkServerReport.objects.create(
cluster=cluster.name,
cluster_type=cluster.cluster_type,
instance_host=slave_instance.machine.ip,
instance_port=slave_instance.port,
is_link_server_inconsistent=1,
)

@staticmethod
def check_job_is_disabled(cluster: Cluster):
# 按照集群维度查询所有的实例,状态running中的
for instance in cluster.storageinstance_set.filter(status=InstanceStatus.RUNNING):
if not check_sys_job_status(cluster=cluster, instance=instance):
CheckSysJobStatuReport.objects.create(
cluster=cluster.name,
cluster_type=cluster.cluster_type,
instance_host=instance.machine.ip,
instance_port=instance.port,
is_job_disable=1,
)
28 changes: 28 additions & 0 deletions dbm-ui/backend/db_periodic_task/local_tasks/sqlserver/task.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""
TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-DB管理系统(BlueKing-BK-DBM) available.
Copyright (C) 2017-2023 THL A29 Limited, a Tencent company. All rights reserved.
Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at https://opensource.org/licenses/MIT
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
"""

import logging

from celery.schedules import crontab

from backend.db_periodic_task.local_tasks.register import register_periodic_task
from backend.db_periodic_task.local_tasks.sqlserver.check_app_setting_data import CheckAppSettingData

logger = logging.getLogger("celery")


@register_periodic_task(run_every=crontab(minute=30, hour=6))
def check_instance_app_setting():
"""
检查实例的元数据表(app_setting)是否正常
每条凌晨7点执行
"""
CheckAppSettingData
pass
Loading

0 comments on commit 608888f

Please sign in to comment.