From 2f57689b0c82806845c7c5ceb84d16f2163cf6d6 Mon Sep 17 00:00:00 2001 From: dcd <1151627903@qq.com> Date: Thu, 26 Dec 2024 10:11:27 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=A3=80=E6=9F=A5Agent=E3=80=81bkmonit?= =?UTF-8?q?orbeat=E5=BC=82=E5=B8=B8=E7=8A=B6=E6=80=81=E5=B9=B6=E5=8F=91?= =?UTF-8?q?=E9=80=81=E9=82=AE=E4=BB=B6=E5=91=8A=E7=9F=A5=E8=BF=90=E7=BB=B4?= =?UTF-8?q?=20(closed=20#2512)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- apps/node_man/constants.py | 2 + apps/node_man/models.py | 2 + apps/node_man/periodic_tasks/__init__.py | 3 + .../periodic_tasks/send_mail_to_maintainer.py | 99 +++++++++++++++++++ common/api/modules/taihu_apis.py | 69 +++++++++++++ config/default.py | 6 ++ .../templates/configmaps/env-configmap.yaml | 5 +- 7 files changed, 185 insertions(+), 1 deletion(-) create mode 100644 apps/node_man/periodic_tasks/send_mail_to_maintainer.py create mode 100644 common/api/modules/taihu_apis.py diff --git a/apps/node_man/constants.py b/apps/node_man/constants.py index cde490d4a..bb3d622e0 100644 --- a/apps/node_man/constants.py +++ b/apps/node_man/constants.py @@ -572,6 +572,8 @@ def _get_member__alias_map(cls) -> Dict[Enum, str]: QUERY_MODULE_ID_THRESHOLD = 15 UPDATE_CMDB_CLOUD_AREA_LIMIT = 50 VERSION_PATTERN = re.compile(r"[vV]?(\d+\.){1,5}\d+(-rc\d)?$") +# 进程表插件查询分片数量 +PROC_CHUNK_SIZE = 30000 # 语义化版本正则,参考:https://semver.org/#is-there-a-suggested-regular-expression-regex-to-check-a-semver-string SEMANTIC_VERSION_PATTERN = re.compile( r"^v?(?P0|[1-9]\d*)\.(?P0|[1-9]\d*)\.(?P0|[1-9]\d*)" diff --git a/apps/node_man/models.py b/apps/node_man/models.py index 1eb2c3d1c..6a866a332 100644 --- a/apps/node_man/models.py +++ b/apps/node_man/models.py @@ -181,6 +181,8 @@ class KeyEnum(Enum): QUERY_PROC_STATUS_HOST_LENS = "QUERY_PROC_STATUS_HOST_LENS" # 业务最大插件版本 PLUGIN_VERSION_CONFIG = "PLUGIN_VERSION_CONFIG" + # 发送邮件业务黑名单 + SEND_MAIL_BIZ_BLACKLIST = "SEND_MAIL_BIZ_BLACKLIST" key = models.CharField(_("键"), max_length=255, db_index=True, primary_key=True) v_json = JSONField(_("值")) diff --git a/apps/node_man/periodic_tasks/__init__.py b/apps/node_man/periodic_tasks/__init__.py index eaffce09a..e83317926 100644 --- a/apps/node_man/periodic_tasks/__init__.py +++ b/apps/node_man/periodic_tasks/__init__.py @@ -27,3 +27,6 @@ if getattr(settings, "CONFIG_POLICY_BY_TENCENT_VPC", False): from .configuration_policy import configuration_policy # noqa + +if all(getattr(settings, attr, False) for attr in ["TAIHU_MAIL_SENDER", "TAIHU_TOKEN", "TAIHU_SEND_MAIL_API"]): + from .send_mail_to_maintainer import send_mail_to_maintainer_periodic_task # noqa diff --git a/apps/node_man/periodic_tasks/send_mail_to_maintainer.py b/apps/node_man/periodic_tasks/send_mail_to_maintainer.py new file mode 100644 index 000000000..ed4e93c5d --- /dev/null +++ b/apps/node_man/periodic_tasks/send_mail_to_maintainer.py @@ -0,0 +1,99 @@ +# -*- coding: utf-8 -*- +""" +TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-节点管理(BlueKing-BK-NODEMAN) available. +Copyright (C) 2017-2022 THL A29 Limited, a Tencent company. All rights reserved. +Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License. +You may obtain a copy of the License at https://opensource.org/licenses/MIT +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. +""" +import collections +from typing import Any, Dict, List, Set + +from celery.schedules import crontab +from celery.task import periodic_task +from django.conf import settings +from django.db.models import QuerySet + +from apps.node_man import constants, models +from common.api import CCApi +from common.api.modules.taihu_apis import taihu_client +from common.log import logger + + +def send_mail_to_maintainer(task_id): + logger.info(f"start send_mail_to_maintainer, task_id -> {task_id}") + + query_kwargs = {"fields": ["bk_biz_id", "bk_biz_name", "bk_biz_maintainer"]} + try: + biz_infos: List[Dict[str, Any]] = CCApi.search_business(query_kwargs)["info"] + # 去除业务运维为空的数据 + biz_infos: List[Dict[str, Any]] = [biz_info for biz_info in biz_infos if biz_info["bk_biz_maintainer"]] + # 构建成业务ID映射业务信息字典 + biz_id_biz_info_map: Dict[int, Dict[str, Any]] = {biz_info["bk_biz_id"]: biz_info for biz_info in biz_infos} + except Exception as e: + logger.exception(f"get business info error: {str(e)}") + return + + # 异常Agent HostID + terminated_agent: QuerySet = models.ProcessStatus.objects.filter( + status=constants.ProcStateType.TERMINATED, name=models.ProcessStatus.GSE_AGENT_PROCESS_NAME + ).values_list("bk_host_id", flat=True) + # 异常bkmonitorbeat HostID + terminated_plugin: QuerySet = models.ProcessStatus.objects.filter( + status=constants.ProcStateType.TERMINATED, name="bkmonitorbeat" + ).values_list("bk_host_id", flat=True) + + agent_counter, plugin_counter = collections.Counter(), collections.Counter() + + for chunk_size in range(0, terminated_agent.count(), constants.PROC_CHUNK_SIZE): + bulk_terminated_agent: Set[int] = set(terminated_agent[chunk_size : chunk_size + constants.PROC_CHUNK_SIZE]) + bk_biz_ids = models.Host.objects.filter(bk_host_id__in=bulk_terminated_agent).values_list( + "bk_biz_id", flat=True + ) + + agent_counter.update(collections.Counter(bk_biz_ids)) + + for chunk_size in range(0, terminated_plugin.count(), constants.PROC_CHUNK_SIZE): + bulk_terminated_plugin: Set[int] = set(terminated_plugin[chunk_size : chunk_size + constants.PROC_CHUNK_SIZE]) + bk_biz_ids = models.Host.objects.filter(bk_host_id__in=bulk_terminated_plugin).values_list( + "bk_biz_id", flat=True + ) + plugin_counter.update(collections.Counter(bk_biz_ids)) + + final_handle_biz = set(agent_counter.keys()) | set(plugin_counter.keys()) + biz_blacklist = models.GlobalSettings.get_config( + key=models.GlobalSettings.KeyEnum.SEND_MAIL_BIZ_BLACKLIST.value, default=[] + ) + for bk_biz_id in final_handle_biz: + biz_info = biz_id_biz_info_map.get(bk_biz_id) + # 没有运维信息的业务、在黑名单中的不发送邮件 + if not biz_info or bk_biz_id in biz_blacklist: + continue + biz_name = biz_info["bk_biz_name"] + biz_maintainer = biz_info["bk_biz_maintainer"] + try: + taihu_client.send_mail( + to=biz_maintainer, + title="业务-{}-ID-{}:Agent-bkmonitorbeat状态异常通知".format(biz_name, bk_biz_id), + content="Agent异常数量: {}, bkmonitorbeat异常数量: {}, 详情点击节点管理".format( + agent_counter[bk_biz_id], plugin_counter[bk_biz_id], settings.BK_NODEMAN_URL + ), + ) + except Exception as e: + logger.exception(f"bk_biz_id -> {bk_biz_id} send mail to maintainer error: {str(e)}") + continue + + logger.info(f"send mail to maintainer success, task_id -> {task_id}") + + +@periodic_task( + queue="default", + options={"queue": "default"}, + run_every=crontab(hour="9", minute="0", day_of_week="*", day_of_month="*", month_of_year="*"), +) +def send_mail_to_maintainer_periodic_task(): + """定时发送邮件给运维""" + task_id = send_mail_to_maintainer_periodic_task.request.id + send_mail_to_maintainer(task_id) diff --git a/common/api/modules/taihu_apis.py b/common/api/modules/taihu_apis.py new file mode 100644 index 000000000..a359e9168 --- /dev/null +++ b/common/api/modules/taihu_apis.py @@ -0,0 +1,69 @@ +# -*- coding: utf-8 -*- +""" +TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-节点管理(BlueKing-BK-NODEMAN) available. +Copyright (C) 2017-2022 THL A29 Limited, a Tencent company. All rights reserved. +Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License. +You may obtain a copy of the License at https://opensource.org/licenses/MIT +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. +""" +import hashlib +import time +from uuid import uuid4 + +import requests +from django.conf import settings + + +class TaiHuApis(object): + def __init__(self): + self.passid = settings.APP_CODE + self.sender = settings.TAIHU_MAIL_SENDER + self.token = settings.TAIHU_TOKEN + self.url_root = settings.TAIHU_API_ROOT + self.session = requests.Session() + + @property + def random_timestamp(self) -> str: + return str(int(time.time())) + + @property + def request_headers(self) -> dict: + """请求头""" + timestamp = self.random_timestamp + nonce = self.random_nonce + hash_obj = hashlib.sha256() + # 签名算法:x-rio-signature= sha256(x-rio-timestamp+Token+x-rio-nonce+x-rio-timestamp).upper() + string = timestamp + self.token + nonce + timestamp + hash_obj.update(string.encode()) + signature = hash_obj.hexdigest().upper() + headers = { + "x-rio-paasid": self.passid, + "x-rio-nonce": nonce, + "x-rio-timestamp": timestamp, + "x-rio-signature": signature, + } + return headers + + @property + def random_nonce(self) -> str: + return str(uuid4()) + + def send_mail(self, to: str, title: str, content: str): + """发送邮件""" + data = { + "From": self.sender, + "To": to, + "Title": title, + "Content": content, + } + headers = self.request_headers + self.session.post(url=self.url_root + "/ebus/tof4_msg/api/v1/Message/SendMailInfo", headers=headers, json=data) + + +# 注:新增太湖API时,请确保环境变量中token和API root已配置 +if all(getattr(settings, attr, False) for attr in ["TAIHU_MAIL_SENDER", "TAIHU_TOKEN", "TAIHU_API_ROOT"]): + taihu_client = TaiHuApis() +else: + taihu_client = object diff --git a/config/default.py b/config/default.py index 9c5e88962..22186d8ce 100644 --- a/config/default.py +++ b/config/default.py @@ -776,6 +776,8 @@ def get_standard_redis_mode(cls, config_redis_mode: str, default: Optional[str] BK_NODEMAN_API_ADDR = os.getenv("BK_NODEMAN_API_ADDR", "") BK_NODEMAN_NGINX_DOWNLOAD_PORT = os.getenv("BK_NODEMAN_NGINX_DOWNLOAD_PORT") or 17980 BK_NODEMAN_NGINX_PROXY_PASS_PORT = os.getenv("BK_NODEMAN_NGINX_PROXY_PASS_PORT") or 17981 +# 节点管理访问地址 +BK_NODEMAN_URL = os.getenv("BK_NODEMAN_URL", "") # 使用标准运维开通策略相关变量 BKAPP_REQUEST_EE_SOPS_APP_CODE = os.getenv("BKAPP_REQUEST_EE_SOPS_APP_CODE") @@ -824,6 +826,10 @@ def get_standard_redis_mode(cls, config_redis_mode: str, default: Optional[str] # 腾讯云endpoint TXY_ENDPOINT = env.TXY_ENDPOINT +# 太湖:邮件发送人、token、API +TAIHU_MAIL_SENDER = os.getenv("TAIHU_MAIL_SENDER") +TAIHU_TOKEN = os.getenv("TAIHU_TOKEN") +TAIHU_API_ROOT = os.getenv("TAIHU_API_ROOT") # ============================================================================== # 可观测 diff --git a/support-files/kubernetes/helm/bk-nodeman/templates/configmaps/env-configmap.yaml b/support-files/kubernetes/helm/bk-nodeman/templates/configmaps/env-configmap.yaml index cf4a9857c..0a8a2af1e 100644 --- a/support-files/kubernetes/helm/bk-nodeman/templates/configmaps/env-configmap.yaml +++ b/support-files/kubernetes/helm/bk-nodeman/templates/configmaps/env-configmap.yaml @@ -137,4 +137,7 @@ data: TXY_ENDPOINT: "{{ .Values.config.TXYEndpoint }}" TXY_SECRETID: "{{ .Values.config.TXYSecretId }}" TXY_SECRETKEY: "{{ .Values.config.TXYSecretKey }}" - BKAPP_UNASSIGNED_CLOUD_ID: "{{ .Values.config.bkAppUnassignedCloudId}}" + BKAPP_UNASSIGNED_CLOUD_ID: "{{ .Values.config.bkAppUnassignedCloudId }}" + TAIHU_MAIL_SENDER: "{{ .Values.config.TaiHuMailSender }}" + TAIHU_TOKEN: "{{ .Values.config.TaiHuToken }}" + TAIHU_API_ROOT: "{{ .Values.config.TaiHuApiRoot }}"