forked from TencentBlueKing/blueking-dbm
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(sqlserver): sqlserver巡检任务 TencentBlueKing#8033
- Loading branch information
Showing
8 changed files
with
609 additions
and
2 deletions.
There are no files selected for viewing
Empty file.
218 changes: 218 additions & 0 deletions
218
dbm-ui/backend/db_periodic_task/local_tasks/sqlserver/check_app_setting_data.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,218 @@ | ||
""" | ||
TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-DB管理系统(BlueKing-BK-DBM) available. | ||
Copyright (C) 2017-2023 THL A29 Limited, a Tencent company. All rights reserved. | ||
Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at https://opensource.org/licenses/MIT | ||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on | ||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
specific language governing permissions and limitations under the License. | ||
""" | ||
|
||
from backend.db_meta.enums import ClusterPhase, ClusterType, InstanceInnerRole, InstanceStatus | ||
from backend.db_meta.models import Cluster, StorageInstance | ||
from backend.db_report.models.sqlserver_check_report import ( | ||
CheckAppSettingReport, | ||
CheckJobSyncReport, | ||
CheckLinkServerReport, | ||
CheckSysJobStatuReport, | ||
CheckUserSyncReport, | ||
) | ||
from backend.flow.utils.sqlserver.sqlserver_bk_config import ( | ||
get_module_infos, | ||
get_sqlserver_alarm_config, | ||
get_sqlserver_backup_config, | ||
) | ||
from backend.flow.utils.sqlserver.sqlserver_db_function import ( | ||
check_ha_config, | ||
check_sys_job_status, | ||
fix_app_setting_data, | ||
get_app_setting_data, | ||
insert_sqlserver_config, | ||
) | ||
|
||
|
||
class CheckAppSettingData(object): | ||
""" | ||
已dbm元数据为准 | ||
检查实例的app_setting表的信息是否符合预期,如果存在信息不一致,则需要已某种方式输出告知相关DBA | ||
""" | ||
|
||
def __init__(self): | ||
# 获取所有的online状态的cluster | ||
self.clusters = Cluster.objects.prefetch_related( | ||
"storageinstance_set", | ||
"storageinstance_set__machine", | ||
).filter(phase=ClusterPhase.ONLINE, cluster_type__in=[ClusterType.SqlserverHA, ClusterType.SqlserverSingle]) | ||
|
||
def check_task(self): | ||
""" | ||
定义巡检逻辑 | ||
""" | ||
for cluster in self.clusters: | ||
self.check_app_setting_data(cluster) | ||
self.check_job_is_disabled(cluster) | ||
if cluster.cluster_type == ClusterType.SqlserverHA: | ||
master = cluster.storageinstance_set.get(instance_inner_role=InstanceInnerRole.MASTER) | ||
for s in cluster.storageinstance_set.filter( | ||
status=InstanceStatus.RUNNING, instance_inner_role=InstanceInnerRole.SLAVE | ||
): | ||
self.check_user(master_instance=master, slave_instance=s, cluster=cluster) | ||
self.check_job(master_instance=master, slave_instance=s, cluster=cluster) | ||
self.check_link_server(master_instance=master, slave_instance=s, cluster=cluster) | ||
|
||
@staticmethod | ||
def fix_app_setting_data(cluster: Cluster, instance: StorageInstance): | ||
""" | ||
存在不一致元数据,进行修复 | ||
""" | ||
is_fix = 0 | ||
if fix_app_setting_data(cluster=cluster, instance=instance): | ||
is_fix = 1 | ||
CheckAppSettingReport.objects.create( | ||
cluster=cluster.name, | ||
cluster_type=cluster.cluster_type, | ||
instance_host=instance.machine.ip, | ||
instance_port=instance.port, | ||
is_inconsistent=1, | ||
is_fix=is_fix, | ||
) | ||
return True | ||
|
||
@staticmethod | ||
def add_app_setting_data(cluster: Cluster, instance: StorageInstance): | ||
""" | ||
插入app_setting数据 | ||
""" | ||
is_fix = 0 | ||
# 获取集群字符集配置 | ||
charset = get_module_infos( | ||
bk_biz_id=cluster.bk_biz_id, | ||
db_module_id=cluster.db_module_id, | ||
cluster_type=ClusterType(cluster.cluster_type), | ||
)["charset"] | ||
|
||
# 获取集群的备份配置 | ||
backup_config = get_sqlserver_backup_config( | ||
bk_biz_id=cluster.bk_biz_id, | ||
db_module_id=cluster.db_module_id, | ||
cluster_domain=cluster.immute_domain, | ||
) | ||
|
||
# 获取集群的个性化配置 | ||
alarm_config = get_sqlserver_alarm_config( | ||
bk_biz_id=cluster.bk_biz_id, | ||
db_module_id=cluster.db_module_id, | ||
cluster_domain=cluster.immute_domain, | ||
) | ||
|
||
# 配置数据 | ||
if insert_sqlserver_config( | ||
cluster=cluster, | ||
storages=[instance], | ||
charset=charset, | ||
backup_config=backup_config, | ||
alarm_config=alarm_config, | ||
): | ||
is_fix = 1 | ||
|
||
CheckAppSettingReport.objects.create( | ||
cluster=cluster.name, | ||
cluster_type=cluster.cluster_type, | ||
instance_host=instance.machine.ip, | ||
instance_port=instance.port, | ||
is_inconsistent=1, | ||
is_fix=is_fix, | ||
) | ||
return True | ||
|
||
def check_app_setting_data(self, cluster: Cluster): | ||
# 按照集群维度查询所有的实例,状态running中的 | ||
for instance in cluster.storageinstance_set.filter(status=InstanceStatus.RUNNING): | ||
data = get_app_setting_data(instance=instance, bk_cloud_id=cluster.bk_cloud_id) | ||
if data is None: | ||
# 如果返回是空则,则大概率是访问异常,跳过这次的校验 | ||
continue | ||
if len(data) == 0: | ||
# 则说明没有配置app_setting,需要重新执行 | ||
self.add_app_setting_data(cluster=cluster, instance=instance) | ||
|
||
if ( | ||
data["BK_BIZ_ID"] != cluster.bk_biz_id | ||
or data["BK_CLOUD_ID"] != cluster.bk_cloud_id | ||
or data["CLUSTER_ID"] != cluster.id | ||
or data["CLUSTER_DOMAIN"] != cluster.immute_domain | ||
or data["PORT"] != instance.port | ||
or data["ROLE"] != instance.instance_inner_role | ||
): | ||
# 尝试修复数据 | ||
self.fix_app_setting_data(cluster=cluster, instance=instance) | ||
|
||
@staticmethod | ||
def check_user(master_instance: StorageInstance, slave_instance: StorageInstance, cluster: Cluster): | ||
""" | ||
检查主从的用户是否一致 | ||
""" | ||
if not check_ha_config( | ||
master_instance=master_instance, | ||
slave_instance=slave_instance, | ||
bk_cloud_id=cluster.bk_cloud_id, | ||
check_tag="user", | ||
): | ||
CheckUserSyncReport.objects.create( | ||
cluster=cluster.name, | ||
cluster_type=cluster.cluster_type, | ||
instance_host=slave_instance.machine.ip, | ||
instance_port=slave_instance.port, | ||
is_user_inconsistent=1, | ||
) | ||
|
||
@staticmethod | ||
def check_job(master_instance: StorageInstance, slave_instance: StorageInstance, cluster: Cluster): | ||
""" | ||
检测主从的业务作业是否一致 | ||
""" | ||
if not check_ha_config( | ||
master_instance=master_instance, | ||
slave_instance=slave_instance, | ||
bk_cloud_id=cluster.bk_cloud_id, | ||
check_tag="job", | ||
): | ||
CheckJobSyncReport.objects.create( | ||
cluster=cluster.name, | ||
cluster_type=cluster.cluster_type, | ||
instance_host=slave_instance.machine.ip, | ||
instance_port=slave_instance.port, | ||
is_job_inconsistent=1, | ||
) | ||
|
||
@staticmethod | ||
def check_link_server(master_instance: StorageInstance, slave_instance: StorageInstance, cluster: Cluster): | ||
""" | ||
检测主从的link_server是否一致 | ||
""" | ||
if not check_ha_config( | ||
master_instance=master_instance, | ||
slave_instance=slave_instance, | ||
bk_cloud_id=cluster.bk_cloud_id, | ||
check_tag="job", | ||
): | ||
CheckLinkServerReport.objects.create( | ||
cluster=cluster.name, | ||
cluster_type=cluster.cluster_type, | ||
instance_host=slave_instance.machine.ip, | ||
instance_port=slave_instance.port, | ||
is_link_server_inconsistent=1, | ||
) | ||
|
||
@staticmethod | ||
def check_job_is_disabled(cluster: Cluster): | ||
# 按照集群维度查询所有的实例,状态running中的 | ||
for instance in cluster.storageinstance_set.filter(status=InstanceStatus.RUNNING): | ||
if not check_sys_job_status(cluster=cluster, instance=instance): | ||
CheckSysJobStatuReport.objects.create( | ||
cluster=cluster.name, | ||
cluster_type=cluster.cluster_type, | ||
instance_host=instance.machine.ip, | ||
instance_port=instance.port, | ||
is_job_disable=1, | ||
) |
28 changes: 28 additions & 0 deletions
28
dbm-ui/backend/db_periodic_task/local_tasks/sqlserver/task.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
""" | ||
TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-DB管理系统(BlueKing-BK-DBM) available. | ||
Copyright (C) 2017-2023 THL A29 Limited, a Tencent company. All rights reserved. | ||
Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at https://opensource.org/licenses/MIT | ||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on | ||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
specific language governing permissions and limitations under the License. | ||
""" | ||
|
||
import logging | ||
|
||
from celery.schedules import crontab | ||
|
||
from backend.db_periodic_task.local_tasks.register import register_periodic_task | ||
from backend.db_periodic_task.local_tasks.sqlserver.check_app_setting_data import CheckAppSettingData | ||
|
||
logger = logging.getLogger("celery") | ||
|
||
|
||
@register_periodic_task(run_every=crontab(minute=30, hour=6)) | ||
def check_instance_app_setting(): | ||
""" | ||
检查实例的元数据表(app_setting)是否正常 | ||
每条凌晨7点执行 | ||
""" | ||
CheckAppSettingData | ||
pass |
Oops, something went wrong.