From 71fab7458c31f7ec637f33ceb7eb3f558e82b2bf Mon Sep 17 00:00:00 2001
From: xjxia <syxjxia@gmail.com>
Date: Tue, 14 Jan 2025 15:08:13 +0800
Subject: [PATCH] feat(dbm-services): add global monitor for DBHA close #9055

---
 dbm-services/common/dbha/ha-module/Makefile   |   4 +-
 .../dbha/ha-module/agent/monitor_agent.go     |  38 +-
 .../common/dbha/ha-module/client/client.go    |   5 +-
 .../common/dbha/ha-module/client/cmdb.go      |  95 +++--
 .../common/dbha/ha-module/client/hadb.go      |  92 +++--
 .../common/dbha/ha-module/config/config.go    |  23 +-
 .../dbha/ha-module/constvar/constant.go       |  17 +-
 dbm-services/common/dbha/ha-module/dbha.go    |  28 +-
 .../dbmodule/dbmysql/MySQL_common_switch.go   |   2 +-
 .../dbmodule/mongodb/mongos_callback.go       |   4 +-
 .../ha-module/dbmodule/redis/redis_switch.go  |   5 +
 .../dbha/ha-module/dbmodule/register.go       |   2 +-
 .../globalmonitor/monitor_component.go        | 349 ++++++++++++++++++
 dbm-services/common/dbha/ha-module/gm/gcm.go  |   4 +-
 dbm-services/common/dbha/ha-module/gm/gqa.go  |   5 +
 .../common/dbha/ha-module/monitor/monitor.go  | 144 ++------
 .../common/dbha/ha-module/test/client_test.go |  69 ----
 .../pkg/handler/hastatus/hastatus_handler.go  |   6 +-
 .../cloud/script_template/dbha_template.py    |   2 +
 19 files changed, 573 insertions(+), 321 deletions(-)
 create mode 100644 dbm-services/common/dbha/ha-module/globalmonitor/monitor_component.go
 delete mode 100644 dbm-services/common/dbha/ha-module/test/client_test.go

diff --git a/dbm-services/common/dbha/ha-module/Makefile b/dbm-services/common/dbha/ha-module/Makefile
index 487f462fb2..26c1ec22f8 100644
--- a/dbm-services/common/dbha/ha-module/Makefile
+++ b/dbm-services/common/dbha/ha-module/Makefile
@@ -1,7 +1,7 @@
 SHELL := /bin/bash
 BASE_DIR = $(shell pwd)
-VERSION = 0.0.1
-GITHASH = ""
+VERSION = $(shell git describe --tags --always --dirty)
+GITHASH = $(shell git rev-parse --short HEAD)
 APPNAME = dbha
 GOOS ?= linux
 BUILD_FLAG = " -X main.version=${VERSION} -X main.githash=${GITHASH} "
diff --git a/dbm-services/common/dbha/ha-module/agent/monitor_agent.go b/dbm-services/common/dbha/ha-module/agent/monitor_agent.go
index 24200836da..3225e3b627 100644
--- a/dbm-services/common/dbha/ha-module/agent/monitor_agent.go
+++ b/dbm-services/common/dbha/ha-module/agent/monitor_agent.go
@@ -100,7 +100,7 @@ func NewMonitorAgent(conf *config.Config, detectType string) (*MonitorAgent, err
 // report agent's heartbeat info.
 func (a *MonitorAgent) Process(instances map[string]dbutil.DataBaseDetect) {
 	var wg sync.WaitGroup
-	startTime := time.Now().Unix()
+	startTime := time.Now()
 	sem := make(chan struct{}, a.MaxConcurrency) // 创建一个有缓冲的通道,容量为 maxConcurrency
 	log.Logger.Debugf("[%s] need to detect instances number:%d", a.DetectType, len(a.DBInstance))
 	for _, ins := range instances {
@@ -113,9 +113,10 @@ func (a *MonitorAgent) Process(instances map[string]dbutil.DataBaseDetect) {
 		}(ins)
 	}
 	wg.Wait()
+	interval := int(time.Now().Sub(startTime).Seconds())
 	log.Logger.Debugf("[%s] detected instances number:%d ,cost: %d",
-		a.DetectType, len(a.DBInstance), time.Now().Unix()-startTime)
-	a.DetectPostProcess()
+		a.DetectType, len(a.DBInstance), interval)
+	a.DetectPostProcess(interval)
 	time.Sleep(time.Second)
 }
 
@@ -185,8 +186,8 @@ func (a *MonitorAgent) DoDetectSingle(ins dbutil.DataBaseDetect) {
 }
 
 // DetectPostProcess post agent heartbeat
-func (a *MonitorAgent) DetectPostProcess() {
-	err := a.reporterHeartbeat()
+func (a *MonitorAgent) DetectPostProcess(interval int) {
+	err := a.reporterHeartbeat(interval)
 	if err != nil {
 		log.Logger.Errorf("reporter heartbeat failed. err:%s", err.Error())
 	}
@@ -232,15 +233,19 @@ func (a *MonitorAgent) FetchDBInstance() error {
 	a.HashMod = mod
 	a.HashValue = modValue
 
-	req := client.DBInstanceInfoRequest{
+	req := client.DBInstanceByCityRequest{
 		LogicalCityIDs: []int{a.CityID},
 		HashCnt:        mod,
 		HashValue:      modValue,
 		ClusterTypes:   []string{a.DetectType},
 	}
 
-	rawInfo, err := a.CmDBClient.GetDBInstanceInfoByClusterType(req)
+	rawInfo, err := a.CmDBClient.GetDBInstanceInfoByCityID(req)
 	if err != nil {
+		minInfo := monitor.GetApiAlertInfo(constvar.CmDBInstanceUrl, err.Error())
+		if e := monitor.MonitorSend("get instances failed", minInfo); e != nil {
+			log.Logger.Warnf(e.Error())
+		}
 		log.Logger.Errorf("get instance info from cmdb failed. err:%s", err.Error())
 		return err
 	}
@@ -306,20 +311,20 @@ func (a *MonitorAgent) FetchGMInstance() error {
 			continue
 		}
 		// needn't lock
-		_, ok := a.GMInstance[info.Ip]
+		_, ok := a.GMInstance[info.IP]
 		if ok {
-			a.GMInstance[info.Ip].LastFetchTime = time.Now()
+			a.GMInstance[info.IP].LastFetchTime = time.Now()
 		} else {
-			a.GMInstance[info.Ip] = &GMConnection{
-				Ip:            info.Ip,
+			a.GMInstance[info.IP] = &GMConnection{
+				Ip:            info.IP,
 				Port:          info.Port,
 				LastFetchTime: time.Now(),
 				IsClose:       false,
 			}
-			err = a.GMInstance[info.Ip].Init()
+			err = a.GMInstance[info.IP].Init()
 			if err != nil {
 				log.Logger.Errorf("init gm failed. gm_ip:%s, gm_port:%d, err:%s",
-					info.Ip, info.Port, err.Error())
+					info.Port, info.Port, err.Error())
 				return err
 			}
 		}
@@ -342,6 +347,7 @@ func (a *MonitorAgent) NeedReportGM(ins dbutil.DataBaseDetect) bool {
 		cachedIns := a.ReportGMCache[ip]
 		now := time.Now()
 		if now.Before(cachedIns.ReporterGMTime.Add(time.Second * time.Duration(cachedIns.ExpireInterval))) {
+			log.Logger.Debugf("instance[%s] cached, skip report to gm", cachedIns.Ip)
 			return false
 		}
 	}
@@ -395,6 +401,7 @@ func (a *MonitorAgent) ReportDetectInfoToGM(reporterInstance dbutil.DataBaseDete
 			//do retry
 			continue
 		} else {
+			log.Logger.Debugf("reporter instance[%s#%d] to gm[%s#%d] success", ip, port, gmIns.Ip, gmIns.Port)
 			isReported = true
 			gmIns.Mutex.Unlock()
 			a.ReportGMCache[ip] = &CachedHostInfo{
@@ -484,9 +491,8 @@ func (a *MonitorAgent) registerAgentInfoToHaDB() error {
 }
 
 // reporterHeartbeat send agent heartbeat to HA-DB
-func (a *MonitorAgent) reporterHeartbeat() error {
-	interval := time.Now().Sub(a.heartbeat).Seconds()
-	err := a.HaDBClient.ReporterAgentHeartbeat(a.MonIp, a.DetectType, int(interval), a.HashMod, a.HashValue)
+func (a *MonitorAgent) reporterHeartbeat(interval int) error {
+	err := a.HaDBClient.ReporterAgentHeartbeat(a.MonIp, a.DetectType, interval, a.HashMod, a.HashValue)
 	a.heartbeat = time.Now()
 	return err
 }
diff --git a/dbm-services/common/dbha/ha-module/client/client.go b/dbm-services/common/dbha/ha-module/client/client.go
index bd1205d78e..e5d461fdaa 100644
--- a/dbm-services/common/dbha/ha-module/client/client.go
+++ b/dbm-services/common/dbha/ha-module/client/client.go
@@ -97,8 +97,9 @@ func (c *Client) DoNewForCB(
 	}
 
 	var retryErr error
+	var response interface{}
 	for retryIdx := 0; retryIdx < 5; retryIdx++ {
-		response, retryErr := c.doNewInner(method, url, params, headers, bodyCB)
+		response, retryErr = c.doNewInner(method, url, params, headers, bodyCB)
 		if retryErr == nil {
 			return response, nil
 		}
@@ -211,7 +212,7 @@ func (c *Client) doNewInner(method, url string, params interface{},
 
 	result, err := bodyCB(b)
 	if err != nil {
-		log.Logger.Errorf(err.Error())
+		log.Logger.Errorf(fmt.Sprintf("%s:%s", util.AtWhere(), err.Error()))
 		return nil, err
 	}
 	return result, nil
diff --git a/dbm-services/common/dbha/ha-module/client/cmdb.go b/dbm-services/common/dbha/ha-module/client/cmdb.go
index 8db9fb3e5a..0f201d1871 100644
--- a/dbm-services/common/dbha/ha-module/client/cmdb.go
+++ b/dbm-services/common/dbha/ha-module/client/cmdb.go
@@ -1,14 +1,14 @@
 package client
 
 import (
+	"encoding/json"
+	"fmt"
+	"net/http"
+
 	"dbm-services/common/dbha/ha-module/config"
 	"dbm-services/common/dbha/ha-module/constvar"
 	"dbm-services/common/dbha/ha-module/log"
 	"dbm-services/common/dbha/ha-module/util"
-
-	"encoding/json"
-	"fmt"
-	"net/http"
 )
 
 // CmDBClient client to request cmdb
@@ -16,15 +16,25 @@ type CmDBClient struct {
 	Client
 }
 
-// DBInstanceInfoByAddressRequest fetch instances list from cmdb by ip
-type DBInstanceInfoByAddressRequest struct {
+// DBInstanceByAddressRequest fetch instances list from cmdb by ip
+type DBInstanceByAddressRequest struct {
 	DBCloudToken string   `json:"db_cloud_token"`
 	BKCloudID    int      `json:"bk_cloud_id"`
 	Addresses    []string `json:"addresses"`
 }
 
-// DBInstanceInfoRequest fetch instances list from cmdb by city and status
-type DBInstanceInfoRequest struct {
+// DBInstanceByClusterTypeRequest fetch instances list from cmdb by ip
+type DBInstanceByClusterTypeRequest struct {
+	DBCloudToken string   `json:"db_cloud_token"`
+	BKCloudID    int      `json:"bk_cloud_id"`
+	Statuses     []string `json:"statuses"`
+	HashCnt      int      `json:"hash_cnt"`
+	HashValue    int      `json:"hash_value"`
+	ClusterTypes []string `json:"cluster_types"`
+}
+
+// DBInstanceByCityRequest fetch instances list from cmdb by city and status
+type DBInstanceByCityRequest struct {
 	DBCloudToken   string   `json:"db_cloud_token"`
 	BKCloudID      int      `json:"bk_cloud_id"`
 	LogicalCityIDs []int    `json:"logical_city_ids"`
@@ -135,7 +145,7 @@ func NewCmDBClient(conf *config.APIConfig, cloudId int) *CmDBClient {
 // GetDBInstanceInfoByIp fetch instance info from cmdb by ip
 func (c *CmDBClient) GetDBInstanceInfoByIp(ip string) ([]interface{}, error) {
 	var res []interface{}
-	req := DBInstanceInfoByAddressRequest{
+	req := DBInstanceByAddressRequest{
 		DBCloudToken: c.Conf.BKConf.BkToken,
 		BKCloudID:    c.CloudId,
 		Addresses:    []string{ip},
@@ -156,41 +166,20 @@ func (c *CmDBClient) GetDBInstanceInfoByIp(ip string) ([]interface{}, error) {
 	return res, nil
 }
 
-// GetAllDBInstanceInfo detect running, available status instance
-func (c *CmDBClient) GetAllDBInstanceInfo() ([]interface{}, error) {
-	req := DBInstanceInfoRequest{
-		DBCloudToken: c.Conf.BKConf.BkToken,
-		BKCloudID:    c.CloudId,
-		Statuses:     []string{constvar.RUNNING, constvar.AVAILABLE},
-	}
-
-	response, err := c.DoNew(
-		http.MethodPost, c.SpliceUrlByPrefix(c.Conf.UrlPre, constvar.CmDBInstanceUrl, ""), req, nil)
-	if err != nil {
-		return nil, err
-	}
-	if response.Code != 0 {
-		return nil, fmt.Errorf("%s failed, return code:%d, msg:%s", util.AtWhere(), response.Code, response.Msg)
-	}
-
-	var res []interface{}
-	err = json.Unmarshal(response.Data, &res)
-	if err != nil {
-		return nil, err
-	}
-
-	return res, nil
-}
-
-// GetDBInstanceInfoByCity detect running, available status instance
-func (c *CmDBClient) GetDBInstanceInfoByCity(cityID int) ([]interface{}, error) {
-	req := DBInstanceInfoRequest{
+// GetDBInstanceInfoByCityID detect running, available status instance
+func (c *CmDBClient) GetDBInstanceInfoByCityID(requestInfo DBInstanceByCityRequest) ([]interface{}, error) {
+	req := DBInstanceByCityRequest{
 		DBCloudToken:   c.Conf.BKConf.BkToken,
 		BKCloudID:      c.CloudId,
-		LogicalCityIDs: []int{cityID},
+		LogicalCityIDs: requestInfo.LogicalCityIDs,
 		Statuses:       []string{constvar.RUNNING, constvar.AVAILABLE},
+		HashCnt:        requestInfo.HashCnt,
+		HashValue:      requestInfo.HashValue,
+		ClusterTypes:   requestInfo.ClusterTypes,
 	}
 
+	log.Logger.Debugf("GetDBInstanceInfoByCityID param:%#v", req)
+
 	response, err := c.DoNew(
 		http.MethodPost, c.SpliceUrlByPrefix(c.Conf.UrlPre, constvar.CmDBInstanceUrl, ""), req, nil)
 	if err != nil {
@@ -209,19 +198,18 @@ func (c *CmDBClient) GetDBInstanceInfoByCity(cityID int) ([]interface{}, error)
 	return res, nil
 }
 
-// GetDBInstanceInfoByClusterType detect running, available status instance
-func (c *CmDBClient) GetDBInstanceInfoByClusterType(requestInfo DBInstanceInfoRequest) ([]interface{}, error) {
-	req := DBInstanceInfoRequest{
-		DBCloudToken:   c.Conf.BKConf.BkToken,
-		BKCloudID:      c.CloudId,
-		LogicalCityIDs: requestInfo.LogicalCityIDs,
-		Statuses:       []string{constvar.RUNNING, constvar.AVAILABLE},
-		HashCnt:        requestInfo.HashCnt,
-		HashValue:      requestInfo.HashValue,
-		ClusterTypes:   requestInfo.ClusterTypes,
+// GetDBInstanceByClusterType detect running, available status instance
+func (c *CmDBClient) GetDBInstanceByClusterType(requestInfo DBInstanceByClusterTypeRequest) ([]interface{}, error) {
+	req := DBInstanceByClusterTypeRequest{
+		DBCloudToken: c.Conf.BKConf.BkToken,
+		BKCloudID:    c.CloudId,
+		Statuses:     []string{constvar.RUNNING, constvar.AVAILABLE},
+		HashCnt:      requestInfo.HashCnt,
+		HashValue:    requestInfo.HashValue,
+		ClusterTypes: requestInfo.ClusterTypes,
 	}
 
-	log.Logger.Debugf("GetDBInstanceInfo param:%#v", req)
+	log.Logger.Debugf("GetDBInstanceByClusterType param:%#v", req)
 
 	response, err := c.DoNew(
 		http.MethodPost, c.SpliceUrlByPrefix(c.Conf.UrlPre, constvar.CmDBInstanceUrl, ""), req, nil)
@@ -241,15 +229,16 @@ func (c *CmDBClient) GetDBInstanceInfoByClusterType(requestInfo DBInstanceInfoRe
 	return res, nil
 }
 
-// GetDBInstanceInfoByCluster fetch instance info from cmdb by ip
-func (c *CmDBClient) GetDBInstanceInfoByCluster(clusterName string) ([]interface{}, error) {
+// GetDBInstanceInfoByAddress fetch instance info from cmdb by ip
+func (c *CmDBClient) GetDBInstanceInfoByAddress(clusterName string) ([]interface{}, error) {
 	var res []interface{}
-	req := DBInstanceInfoByAddressRequest{
+	req := DBInstanceByAddressRequest{
 		DBCloudToken: c.Conf.BKConf.BkToken,
 		BKCloudID:    c.CloudId,
 		Addresses:    []string{clusterName},
 	}
 
+	log.Logger.Debugf("GetDBInstanceInfoByAddress param:%#v", req)
 	response, err := c.DoNew(
 		http.MethodPost, c.SpliceUrlByPrefix(c.Conf.UrlPre, constvar.CmDBInstanceUrl, ""), req, nil)
 	if err != nil {
diff --git a/dbm-services/common/dbha/ha-module/client/hadb.go b/dbm-services/common/dbha/ha-module/client/hadb.go
index 936c6dbbfa..8e89c331b5 100644
--- a/dbm-services/common/dbha/ha-module/client/hadb.go
+++ b/dbm-services/common/dbha/ha-module/client/hadb.go
@@ -21,21 +21,6 @@ type HaDBClient struct {
 	Client
 }
 
-// GMInfo gm base info, use to report
-type GMInfo struct {
-	Ip      string `json:"ip"`
-	Port    int    `json:"port"`
-	CityID  int    `json:"city_id"`
-	CloudID int    `json:"cloud_id"`
-}
-
-// AgentInfo gm base info, use to report
-type AgentInfo struct {
-	Ip      string `json:"ip"`
-	CityID  int    `json:"city_id"`
-	CloudID int    `json:"cloud_id"`
-}
-
 // HaStatusRequest request ha status table
 type HaStatusRequest struct {
 	DBCloudToken string          `json:"db_cloud_token"`
@@ -50,8 +35,8 @@ type HaStatusResponse struct {
 	RowsAffected int `json:"rowsAffected"`
 }
 
-// DbStatusRequest request db status
-type DbStatusRequest struct {
+// HaAgentLogsRequest request ha_agent_logs
+type HaAgentLogsRequest struct {
 	DBCloudToken string             `json:"db_cloud_token"`
 	BKCloudID    int                `json:"bk_cloud_id"`
 	Name         string             `json:"name"`
@@ -59,8 +44,8 @@ type DbStatusRequest struct {
 	SetArgs      *model.HAAgentLogs `json:"set_args,omitempty"`
 }
 
-// DbStatusResponse db status response
-type DbStatusResponse struct {
+// HaAgentLogsResponse ha_agent_logs response
+type HaAgentLogsResponse struct {
 	RowsAffected int `json:"rowsAffected"`
 	Uid          int `json:"uid"`
 }
@@ -129,16 +114,15 @@ func NewHaDBClient(conf *config.APIConfig, cloudId int) *HaDBClient {
 	return &HaDBClient{c}
 }
 
-// GetDBDetectInfo get gm info from hadb
-func (c *HaDBClient) GetDBDetectInfo() ([]model.HAAgentLogs, error) {
-	req := DbStatusRequest{
+// GetHADetectInfo get gm info from hadb
+func (c *HaDBClient) GetHADetectInfo() ([]model.HAAgentLogs, error) {
+	req := HaAgentLogsRequest{
 		DBCloudToken: c.Conf.BKConf.BkToken,
 		BKCloudID:    c.CloudId,
 		Name:         constvar.GetInstanceStatus,
-		QueryArgs:    &model.HAAgentLogs{},
 	}
 
-	log.Logger.Debugf("AgentGetGMInfo param:%#v", req)
+	log.Logger.Debugf("GetHADetectInfo param:%#v", util.GraceStructString(req.QueryArgs))
 
 	response, err := c.DoNew(http.MethodPost,
 		c.SpliceUrlByPrefix(c.Conf.UrlPre, constvar.DbStatusUrl, ""), req, nil)
@@ -154,17 +138,18 @@ func (c *HaDBClient) GetDBDetectInfo() ([]model.HAAgentLogs, error) {
 		return nil, err
 	}
 	if len(result) == 0 {
-		return nil, fmt.Errorf("no gm available")
+		log.Logger.Debugf("no detected instance found")
 	}
+
 	return result, nil
 }
 
 // ReportDBStatus report detected instance's status
 func (c *HaDBClient) ReportDBStatus(app, agentIp, ip string, port int, dbType, status, bindGM string) error {
-	var result DbStatusResponse
+	var result HaAgentLogsResponse
 	currentTime := time.Now()
 
-	updateReq := DbStatusRequest{
+	updateReq := HaAgentLogsRequest{
 		DBCloudToken: c.Conf.BKConf.BkToken,
 		BKCloudID:    c.CloudId,
 		Name:         constvar.UpdateInstanceStatus,
@@ -205,7 +190,7 @@ func (c *HaDBClient) ReportDBStatus(app, agentIp, ip string, port int, dbType, s
 		log.Logger.Errorf("bug: update instance status affect rows %d", result.RowsAffected)
 	}
 
-	insertReq := DbStatusRequest{
+	insertReq := HaAgentLogsRequest{
 		DBCloudToken: c.Conf.BKConf.BkToken,
 		BKCloudID:    c.CloudId,
 		Name:         constvar.InsertInstanceStatus,
@@ -295,9 +280,10 @@ func (c *HaDBClient) RegisterDBHAInfo(
 		BKCloudID:    c.CloudId,
 		Name:         constvar.RegisterDBHAInfo,
 		QueryArgs: &model.HaStatus{
-			IP:     ip,
-			Module: module,
-			DbType: dbType,
+			CloudID: c.CloudId,
+			IP:      ip,
+			Module:  module,
+			DbType:  dbType,
 		},
 		SetArgs: &model.HaStatus{
 			IP:        ip,
@@ -368,8 +354,8 @@ func (c *HaDBClient) GetAliveAgentInfo(cityID int, dbType string, interval int)
 	return result, nil
 }
 
-// GetAliveHAComponent get alive gm instance from ha_status table
-func (c *HaDBClient) GetAliveHAComponent(module string, interval int) ([]GMInfo, error) {
+// GetAliveHAComponent get alive ha component instance from ha_status table
+func (c *HaDBClient) GetAliveHAComponent(module string, interval int) ([]model.HaStatus, error) {
 	currentTime := time.Now().Add(-time.Second * time.Duration(interval))
 	req := HaStatusRequest{
 		DBCloudToken: c.Conf.BKConf.BkToken,
@@ -382,7 +368,7 @@ func (c *HaDBClient) GetAliveHAComponent(module string, interval int) ([]GMInfo,
 		},
 	}
 
-	log.Logger.Debugf("GetAliveHAInfo param:%#v", util.GraceStructString(req))
+	log.Logger.Debugf("GetAliveHAInfo param:%#v", util.GraceStructString(req.QueryArgs))
 
 	response, err := c.DoNew(http.MethodPost,
 		c.SpliceUrlByPrefix(c.Conf.UrlPre, constvar.HaStatusUrl, ""), req, nil)
@@ -394,7 +380,7 @@ func (c *HaDBClient) GetAliveHAComponent(module string, interval int) ([]GMInfo,
 		return nil, fmt.Errorf("%s failed, return code:%d, msg:%s", util.AtWhere(), response.Code, response.Msg)
 	}
 
-	result := make([]GMInfo, 0)
+	result := make([]model.HaStatus, 0)
 	err = json.Unmarshal(response.Data, &result)
 	if err != nil {
 		log.Logger.Errorf("GetAliveHAInfo failed, unmarshal failed, err:%s, data:%s", err.Error(), response.Data)
@@ -481,6 +467,42 @@ func (c *HaDBClient) ReporterGMHeartbeat(gmIP, module string, interval int) erro
 	return nil
 }
 
+// ReporterMonitorHeartbeat report global monitor heartbeat to ha_status
+func (c *HaDBClient) ReporterMonitorHeartbeat(monIP, detectType string) error {
+	var result HaStatusResponse
+
+	currentTime := time.Now()
+	req := HaStatusRequest{
+		DBCloudToken: c.Conf.BKConf.BkToken,
+		BKCloudID:    c.CloudId,
+		Name:         constvar.ReporterMonitorHeartbeat,
+		QueryArgs: &model.HaStatus{
+			IP:     monIP,
+			DbType: detectType,
+		},
+		SetArgs: &model.HaStatus{
+			LastTime: &currentTime,
+		},
+	}
+
+	log.Logger.Debugf("ReporterMonitorHeartbeat param:%#v", util.GraceStructString(req))
+
+	response, err := c.DoNew(http.MethodPost,
+		c.SpliceUrlByPrefix(c.Conf.UrlPre, constvar.HaStatusUrl, ""), req, nil)
+	if err != nil {
+		return err
+	}
+	if response.Code != 0 {
+		return fmt.Errorf("%s failed, return code:%d, msg:%s", util.AtWhere(), response.Code, response.Msg)
+	}
+	err = json.Unmarshal(response.Data, &result)
+	if err != nil {
+		return err
+	}
+
+	return nil
+}
+
 // QuerySingleTotal check same instance's switch number in a given time period
 func (c *HaDBClient) QuerySingleTotal(ip string, port int, interval int) (int, error) {
 	var result struct {
diff --git a/dbm-services/common/dbha/ha-module/config/config.go b/dbm-services/common/dbha/ha-module/config/config.go
index 5d8107d64f..7e06746137 100644
--- a/dbm-services/common/dbha/ha-module/config/config.go
+++ b/dbm-services/common/dbha/ha-module/config/config.go
@@ -34,6 +34,8 @@ type Config struct {
 	Timezone TimezoneConfig `yaml:"timezone"`
 	// configure for password service
 	PasswdConf APIConfig `yaml:"password_conf"`
+	// configure for Global Monitor
+	GlobalMonitorConf *GlobalMonitorConfig `yaml:"global_monitor_conf"`
 }
 
 // LogConfig configure for log
@@ -86,6 +88,21 @@ type GMConfig struct {
 	GCM            GCMConfig `yaml:"GCM"`
 }
 
+// GlobalMonitorConfig configure for agent component
+type GlobalMonitorConfig struct {
+	// active type list for db detect, valid type in constant.go
+	ActiveClusterType []string `yaml:"active_db_type"`
+	// instance campus for detect
+	Campus string `yaml:"campus"`
+	// cloud id for agent, value 0 allowed, so required tag could not assign
+	CloudID        int    `yaml:"cloud_id"`
+	ReportInterval int    `yaml:"reporter_interval"`
+	LocalIP        string `yaml:"local_ip"`
+	// hash mod use to batch fetch cmdb instances
+	HashMod        int   `yaml:"hash_mod"`
+	IgnoreCityList []int `yaml:"ignore_city_list"`
+}
+
 // GDMConfig configure for GDM component
 type GDMConfig struct {
 	DupExpire    int `yaml:"dup_expire"`
@@ -200,7 +217,7 @@ type BKConfig struct {
 	BkToken string `yaml:"bk_token"`
 }
 
-// MonitorConfig monitor configure
+// MonitorConfig monitor configure for alert
 type MonitorConfig struct {
 	BkDataId     int    `yaml:"bk_data_id"`
 	AccessToken  string `yaml:"access_token"`
@@ -209,10 +226,6 @@ type MonitorConfig struct {
 	LocalIP      string `yaml:"local_ip"`
 	//value 0 allowed, so required tag could not assign
 	CloudID int `yaml:"cloud_id"`
-	//interval(second) for global monitor
-	MonitorInterval int `yaml:"monitor_interval"`
-	// active cluster type list for agent detect
-	ActiveDBType []string `yaml:"active_db_type"`
 }
 
 // TimezoneConfig support config timezone
diff --git a/dbm-services/common/dbha/ha-module/constvar/constant.go b/dbm-services/common/dbha/ha-module/constvar/constant.go
index f7f0ff26b5..be622cb924 100644
--- a/dbm-services/common/dbha/ha-module/constvar/constant.go
+++ b/dbm-services/common/dbha/ha-module/constvar/constant.go
@@ -64,6 +64,15 @@ const (
 
 	// Mongos MONGOS = EnumField("mongos", _("mongos"))  # mongos
 	Mongos = "mongos"
+
+	// TendisCacheMetaType storage layer type name in PredixyRedisCluster
+	TendisCacheMetaType = "tendiscache"
+	// TendisPlusMetaType storage layer type name in PredixyTendisplusCluster
+	TendisPlusMetaType = "tendisplus"
+	// MongodbMetaType storage layer type name in MongoShardedCluster
+	MongodbMetaType = "mongodb"
+	// MongoConfigMetaType storage layer type name in MongoShardedCluster
+	MongoConfigMetaType = "mongo_config"
 )
 
 // instance role in cmdb
@@ -113,7 +122,7 @@ const (
 	SqlserverHA = "sqlserver_ha"
 
 	// MongoShardedCluster = EnumField("MongoShardedCluster", _("Mongo分片集群"))
-	MongoShardCluster = "MongoShardedCluster"
+	MongoShardedCluster = "MongoShardedCluster"
 )
 
 // wrapper name in TenDBCluster
@@ -168,6 +177,8 @@ const (
 	ReporterAgentHeartbeat = "reporter_agent_heartbeat"
 	// ReporterGMHeartbeat TODO
 	ReporterGMHeartbeat = "reporter_gm_heartbeat"
+	// ReporterMonitorHeartbeat TODO
+	ReporterMonitorHeartbeat = "reporter_monitor_heartbeat"
 	// QuerySingleTotal TODO
 	QuerySingleTotal = "query_single_total"
 	// QueryIntervalTotal TODO
@@ -377,6 +388,8 @@ const (
 	DBHAEventDoubleCheckAuth = "dbha_doublecheck_auth_fail"
 	// DBHAEventGlobalMonitor TODO
 	DBHAEventGlobalMonitor = "dbha_global_monitor"
+	// DBHAEventApiFailed TODO
+	DBHAEventApiFailed = "dbha_call_api_fail"
 
 	// MonitorInfoSwitch TODO
 	MonitorInfoSwitch = 0
@@ -384,6 +397,8 @@ const (
 	MonitorInfoDetect = 1
 	// MonitorInfoGlobal global monitor for component work normal
 	MonitorInfoGlobal = 2
+	// MonitorInfoAPI event name for api alert
+	MonitorInfoAPI = 3
 
 	// MonitorReportType TODO
 	MonitorReportType = "agent"
diff --git a/dbm-services/common/dbha/ha-module/dbha.go b/dbm-services/common/dbha/ha-module/dbha.go
index 902c23d11c..a8eb401276 100644
--- a/dbm-services/common/dbha/ha-module/dbha.go
+++ b/dbm-services/common/dbha/ha-module/dbha.go
@@ -4,11 +4,11 @@ import (
 	"flag"
 	"fmt"
 	"os"
-	"time"
 
 	"dbm-services/common/dbha/ha-module/agent"
 	"dbm-services/common/dbha/ha-module/config"
 	"dbm-services/common/dbha/ha-module/constvar"
+	"dbm-services/common/dbha/ha-module/globalmonitor"
 	"dbm-services/common/dbha/ha-module/gm"
 	"dbm-services/common/dbha/ha-module/log"
 	"dbm-services/common/dbha/ha-module/monitor"
@@ -17,16 +17,27 @@ import (
 
 var dbhaType string
 var configFile string
+var showVersion bool
+var version = "1.0.0"
+var githash = "unknown"
 
 // Init TODO
 func Init() {
 	flag.StringVar(&dbhaType, "type", "", `Input dbha type, ["agent","gm","monitor"]`)
 	flag.StringVar(&configFile, "config_file", "", "Input config file path")
+	flag.BoolVar(&showVersion, "version", false, "Show version")
 }
 
 func main() {
 	Init()
 	flag.Parse()
+
+	if showVersion {
+		fmt.Println("Version:", version)
+		fmt.Println("Git hash info:", githash)
+		os.Exit(0)
+	}
+
 	if flag.NFlag() != 2 {
 		fmt.Println("args wrong.")
 		os.Exit(1)
@@ -81,16 +92,13 @@ func main() {
 			os.Exit(1)
 		}
 	case constvar.MONITOR:
-		for {
-			if monInfo, err := monitor.CheckHAComponent(conf); err != nil {
-				if err = monitor.MonitorSend(err.Error(), monInfo); err != nil {
-					log.Logger.Fatalf("global monitor run failed. err:%s", err.Error())
-					os.Exit(1)
-				}
-			}
-			time.Sleep(time.Duration(conf.Monitor.MonitorInterval) * time.Second)
+		mon := globalmonitor.NewMonitorComponent(conf)
+		if err = mon.RegisterMonitorInfoToHaDB(); err != nil {
+			log.Logger.Fatalf("global monitor register failed:%s", err.Error())
+		}
+		if err = mon.Run(); err != nil {
+			log.Logger.Fatalf("global monitor run failed:%s", err.Error())
 		}
-
 	default:
 		log.Logger.Fatalf("unknow dbha type")
 		os.Exit(1)
diff --git a/dbm-services/common/dbha/ha-module/dbmodule/dbmysql/MySQL_common_switch.go b/dbm-services/common/dbha/ha-module/dbmodule/dbmysql/MySQL_common_switch.go
index a9951852cf..b70e054c55 100644
--- a/dbm-services/common/dbha/ha-module/dbmodule/dbmysql/MySQL_common_switch.go
+++ b/dbm-services/common/dbha/ha-module/dbmodule/dbmysql/MySQL_common_switch.go
@@ -934,7 +934,7 @@ func (ins *SpiderCommonSwitch) GetPrimary() error {
 // SetSpiderNodes get all spider nodes from dbmeta
 func (ins *SpiderCommonSwitch) SetSpiderNodes() error {
 	cmdbClient := client.NewCmDBClient(&ins.Config.DBConf.CMDB, ins.Config.GetCloudId())
-	rawData, err := cmdbClient.GetDBInstanceInfoByCluster(ins.ClusterName)
+	rawData, err := cmdbClient.GetDBInstanceInfoByAddress(ins.ClusterName)
 	if err != nil {
 		return fmt.Errorf("get all cluster instance info failed:%s", err.Error())
 	}
diff --git a/dbm-services/common/dbha/ha-module/dbmodule/mongodb/mongos_callback.go b/dbm-services/common/dbha/ha-module/dbmodule/mongodb/mongos_callback.go
index ff3cbebc89..7e9ffbcf44 100644
--- a/dbm-services/common/dbha/ha-module/dbmodule/mongodb/mongos_callback.go
+++ b/dbm-services/common/dbha/ha-module/dbmodule/mongodb/mongos_callback.go
@@ -33,7 +33,7 @@ func NewMongosInstanceByCmDB(instances []interface{}, Conf *config.Config) ([]db
 	)
 
 	if unmarshalIns, err = UnMarshalMongosInstanceByCmdb(instances,
-		constvar.MongoShardCluster, constvar.Mongos); err != nil {
+		constvar.MongoShardedCluster, constvar.Mongos); err != nil {
 		return nil, err
 	}
 
@@ -55,7 +55,7 @@ func DeserializeMongos(jsonInfo []byte, conf *config.Config) (dbutil.DataBaseDet
 	}
 	var ret dbutil.DataBaseDetect
 	// gm将agent上报的数据结构转换为gdm通道接收的数据结构
-	ret = NewMongosDetectInstanceForGdm(&response, constvar.MongoShardCluster, conf)
+	ret = NewMongosDetectInstanceForGdm(&response, constvar.MongoShardedCluster, conf)
 	return ret, nil
 }
 
diff --git a/dbm-services/common/dbha/ha-module/dbmodule/redis/redis_switch.go b/dbm-services/common/dbha/ha-module/dbmodule/redis/redis_switch.go
index acaf72f68d..29d20e8be6 100644
--- a/dbm-services/common/dbha/ha-module/dbmodule/redis/redis_switch.go
+++ b/dbm-services/common/dbha/ha-module/dbmodule/redis/redis_switch.go
@@ -20,6 +20,7 @@ import (
 	"dbm-services/common/dbha/ha-module/constvar"
 	"dbm-services/common/dbha/ha-module/dbutil"
 	"dbm-services/common/dbha/ha-module/log"
+	"dbm-services/common/dbha/ha-module/monitor"
 	"dbm-services/common/dbha/ha-module/util"
 )
 
@@ -356,6 +357,10 @@ func (ins *RedisSwitch) DoKickTwemproxy(proxy dbutil.ProxyInfo) error {
 	ins.ReportLogs(constvar.InfoResult, fmt.Sprintf("kickoff twemproxy: start kickoff by [%s:%d]", proxy.Ip, proxy.Port))
 	infos, err := ins.CmDBClient.GetDBInstanceInfoByIp(proxy.Ip)
 	if err != nil {
+		minInfo := monitor.GetApiAlertInfo(constvar.CmDBInstanceUrl, err.Error())
+		if e := monitor.MonitorSend("get instances failed", minInfo); e != nil {
+			log.Logger.Warnf(e.Error())
+		}
 		redisErr := fmt.Errorf("kickoff twemproxy: get twemproxy[%s:%d:%d] from cmdb failed",
 			proxy.Ip, proxy.Port, proxy.AdminPort)
 		ins.ReportLogs(constvar.FailResult, redisErr.Error())
diff --git a/dbm-services/common/dbha/ha-module/dbmodule/register.go b/dbm-services/common/dbha/ha-module/dbmodule/register.go
index 408da1efb9..006e9b6519 100644
--- a/dbm-services/common/dbha/ha-module/dbmodule/register.go
+++ b/dbm-services/common/dbha/ha-module/dbmodule/register.go
@@ -99,7 +99,7 @@ func init() {
 	}
 
 	// Mongos used
-	DBCallbackMap[constvar.MongoShardCluster] = Callback{
+	DBCallbackMap[constvar.MongoShardedCluster] = Callback{
 		FetchDBCallback:              mongodb.NewMongosInstanceByCmDB,
 		DeserializeCallback:          mongodb.DeserializeMongos,
 		GetSwitchInstanceInformation: mongodb.NewMongosSwitchInstance,
diff --git a/dbm-services/common/dbha/ha-module/globalmonitor/monitor_component.go b/dbm-services/common/dbha/ha-module/globalmonitor/monitor_component.go
new file mode 100644
index 0000000000..88f4cf6aae
--- /dev/null
+++ b/dbm-services/common/dbha/ha-module/globalmonitor/monitor_component.go
@@ -0,0 +1,349 @@
+// Package globalmonitor monitor whether component work normal
+package globalmonitor
+
+import (
+	"encoding/json"
+	"fmt"
+	"strings"
+	"time"
+
+	"dbm-services/bigdata/db-tools/dbactuator/pkg/util"
+	"dbm-services/common/dbha/ha-module/client"
+	"dbm-services/common/dbha/ha-module/config"
+	"dbm-services/common/dbha/ha-module/constvar"
+	"dbm-services/common/dbha/ha-module/log"
+	"dbm-services/common/dbha/ha-module/monitor"
+	"dbm-services/common/dbha/hadb-api/model"
+)
+
+// MachineInfo instance detail info from cmdb api
+type MachineInfo struct {
+	IP            string `json:"ip"`
+	LogicalCityID int    `json:"logical_city_id"`
+	ClusterType   string `json:"cluster_type"`
+	MachineType   string `json:"machine_type"`
+}
+
+// MonitorComponent global monitor work struct
+type MonitorComponent struct {
+	// active type list for db detect, valid type in constant.go
+	ActiveClusterType []string `yaml:"active_db_type"`
+	//monitor  ip
+	MonIp string
+	// all configure file
+	Conf *config.Config
+	// global monitor configure fie
+	MonitorConf *config.GlobalMonitorConfig
+	// API client to access cmdb metadata
+	CmDBClient *client.CmDBClient
+	// API client to access hadb
+	HaDBClient *client.HaDBClient
+	//cmdb need detect ip list
+	NeedDetectMachines map[string]struct{}
+	//cmdb need detect city list
+	NeedDetectCities map[int]struct{}
+	//HA detected ip list
+	DetectedMachines map[string]struct{}
+	//HA detected city list
+	DetectedCities map[int]struct{}
+	//HA agent list
+	AgentList []model.HaStatus
+	//HA gm list
+	GmList []model.HaStatus
+	//alert info to bk
+	AlertInfo monitor.MonitorInfo
+	//hash mod use to batch fetch cmdb instance
+	HashMod int
+	//skip statistics city list
+	IgnoreCityList []int
+}
+
+// NewMonitorComponent create new global monitor component
+func NewMonitorComponent(conf *config.Config) *MonitorComponent {
+	return &MonitorComponent{
+		ActiveClusterType:  conf.GlobalMonitorConf.ActiveClusterType,
+		Conf:               conf,
+		MonitorConf:        conf.GlobalMonitorConf,
+		CmDBClient:         client.NewCmDBClient(&conf.DBConf.CMDB, conf.GetCloudId()),
+		HaDBClient:         client.NewHaDBClient(&conf.DBConf.HADB, conf.GetCloudId()),
+		MonIp:              conf.GlobalMonitorConf.LocalIP,
+		NeedDetectMachines: make(map[string]struct{}),
+		NeedDetectCities:   make(map[int]struct{}),
+		DetectedMachines:   make(map[string]struct{}),
+		DetectedCities:     make(map[int]struct{}),
+		HashMod:            conf.GlobalMonitorConf.HashMod,
+		IgnoreCityList:     conf.GlobalMonitorConf.IgnoreCityList,
+		AlertInfo: monitor.MonitorInfo{
+			EventName:       constvar.DBHAEventGlobalMonitor,
+			MonitorInfoType: constvar.MonitorInfoGlobal,
+			Global: monitor.GlobalMonitor{
+				ServerIp:           conf.Monitor.LocalIP,
+				UnCoveredInsNumber: 0,
+				UnCoveredCityIDs:   nil,
+				NeedDetectNumber:   0,
+				HADetectedNumber:   0,
+			},
+		},
+	}
+}
+
+// Run global monitor
+func (m *MonitorComponent) Run() error {
+	for {
+		time.Sleep(10 * time.Second)
+		log.Logger.Infof("------------------global monitor run start-----------------")
+		log.Logger.Debugf("try to get all ha componentinfo")
+		if err := m.getAllHaComponentInfo(); err != nil {
+			log.Logger.Errorf("get all HA component info failed:%s", err.Error())
+			continue
+		}
+		if err := m.getAllDetectedMachineInfo(); err != nil {
+			log.Logger.Errorf("get all HA detected machine failed:%s", err.Error())
+			continue
+		}
+		if err := m.getAllNeedDetectMachineInfo(); err != nil {
+			log.Logger.Errorf("get all CMDB need detect machine failed:%s", err.Error())
+			continue
+		}
+
+		m.checkAllCovered()
+		m.checkComponentNormal()
+		m.reportHeartbeat()
+
+		log.Logger.Infof("------------------global monitor run finish-----------------")
+		time.Sleep(time.Duration(m.MonitorConf.ReportInterval) * time.Second)
+	}
+}
+
+// RegisterMonitorInfoToHaDB register current agent info
+func (m *MonitorComponent) RegisterMonitorInfoToHaDB() error {
+	err := m.HaDBClient.RegisterDBHAInfo(
+		m.MonIp,
+		0,
+		constvar.MONITOR,
+		0,
+		"",
+		"ALL")
+	if err != nil {
+		return err
+	}
+	return nil
+}
+
+// reporterHeartbeat send agent heartbeat to HA-DB
+func (m *MonitorComponent) reportHeartbeat() {
+	err := m.HaDBClient.ReporterMonitorHeartbeat(m.MonIp, strings.Join(m.ActiveClusterType, ","))
+	if err != nil {
+		log.Logger.Errorf("report heartbeat failed:%s", err.Error())
+	}
+}
+
+// checkAllCovered check if all instances is covered
+func (m *MonitorComponent) checkAllCovered() {
+	//undetected instances
+	unCoveredMachineMap := map[string]struct{}{}
+	//undetected logical_city_ids
+	unCoveredCityMap := map[int]struct{}{}
+	m.AlertInfo.Global.NeedDetectNumber = len(m.NeedDetectMachines)
+	m.AlertInfo.Global.HADetectedNumber = len(m.DetectedMachines)
+	log.Logger.Infof("all detected city num:%d", len(m.DetectedCities))
+	log.Logger.Infof("all detected machine num:%d", len(m.DetectedMachines))
+	log.Logger.Infof("all need detect city num:%d", len(m.NeedDetectCities))
+	log.Logger.Infof("all need detect machine num:%d", len(m.NeedDetectMachines))
+
+	for city, _ := range m.NeedDetectCities {
+		if _, ok := m.DetectedCities[city]; ok {
+			continue
+		} else {
+			unCoveredCityMap[city] = struct{}{}
+		}
+	}
+
+	for ip := range m.NeedDetectMachines {
+		if _, ok := m.DetectedMachines[ip]; ok {
+			continue
+		} else {
+			unCoveredMachineMap[ip] = struct{}{}
+		}
+	}
+
+	if len(unCoveredMachineMap) > 0 {
+		log.Logger.Errorf("uncovered machine list:%#v", unCoveredMachineMap)
+		if err := monitor.MonitorSend(fmt.Sprintf("%d machines not covered by dbha",
+			len(unCoveredMachineMap)), m.AlertInfo); err != nil {
+			log.Logger.Warnf(err.Error())
+		}
+	}
+
+	if len(unCoveredCityMap) > 0 {
+		for k := range unCoveredCityMap {
+			m.AlertInfo.Global.UnCoveredCityIDs = append(m.AlertInfo.Global.UnCoveredCityIDs, k)
+		}
+		log.Logger.Errorf("uncovered city list:%#v", unCoveredCityMap)
+		if err := monitor.MonitorSend(fmt.Sprintf("%d logical_city_ids not covered by dbha",
+			len(unCoveredCityMap)), m.AlertInfo); err != nil {
+			log.Logger.Warnf(err.Error())
+		}
+	}
+
+	log.Logger.Debugf("global monitor info: %#v", m.AlertInfo.Global)
+}
+
+// checkComponentNormal check if all components is work normal
+func (m *MonitorComponent) checkComponentNormal() {
+	for _, agent := range m.AgentList {
+		if agent.ReportInterval > 20 {
+			msg := fmt.Sprintf("agent:%s, cluster_type:%s detect too slow:%d",
+				agent.IP, agent.DbType, agent.ReportInterval)
+			log.Logger.Errorf(msg)
+			if err := monitor.MonitorSend(msg, m.AlertInfo); err != nil {
+				log.Logger.Warnf(err.Error())
+			}
+			continue
+		}
+	}
+	for _, gm := range m.GmList {
+		if gm.ReportInterval > 300 {
+			msg := fmt.Sprintf("gm:%s, Campuse:%s report too slow:%d", gm.IP, gm.Campus, gm.ReportInterval)
+			log.Logger.Errorf(msg)
+			if err := monitor.MonitorSend(msg, m.AlertInfo); err != nil {
+				log.Logger.Warnf(err.Error())
+			}
+			continue
+		}
+	}
+}
+
+// getCmDBMachineByCluster get all cmdb instance by cluster type
+func (m *MonitorComponent) getCmDBMachineByCluster(clusterType string, hashMod, hashValue int) error {
+	num := 0
+	req := client.DBInstanceByClusterTypeRequest{
+		HashCnt:      hashMod,
+		HashValue:    hashValue,
+		ClusterTypes: []string{clusterType},
+	}
+
+	//get all instances by cluster type
+	rawInfo, err := m.CmDBClient.GetDBInstanceByClusterType(req)
+	if err != nil {
+		minInfo := monitor.GetApiAlertInfo(constvar.CmDBInstanceUrl, err.Error())
+		if e := monitor.MonitorSend("get instances failed", minInfo); e != nil {
+			log.Logger.Warnf(e.Error())
+		}
+		return fmt.Errorf("fetch all cmdb instance failed:%s", err.Error())
+	}
+
+	for _, v := range rawInfo {
+		cmdbIns := MachineInfo{}
+		rawIns, jsonErr := json.Marshal(v)
+		if jsonErr != nil {
+			log.Logger.Errorf("marshal db instance info failed:%s", jsonErr.Error())
+			return fmt.Errorf("get cmdb instance info failed:%s", jsonErr.Error())
+		}
+		if jsonErr = json.Unmarshal(rawIns, &cmdbIns); jsonErr != nil {
+			log.Logger.Errorf("unmarshal db instance info failed:%s", jsonErr.Error())
+			return fmt.Errorf("get cmdb instance info failed:%s", jsonErr.Error())
+		}
+
+		//should ignore some city
+		if util.HasElem(cmdbIns.LogicalCityID, m.IgnoreCityList) {
+			continue
+		}
+
+		//some cluster type only Proxy layer need HA, so we should skip its storage layer
+		//should skip PredixyRedisCluster's storage layer
+		if cmdbIns.ClusterType == constvar.PredixyRedisCluster &&
+			cmdbIns.MachineType == constvar.TendisCacheMetaType {
+			continue
+		}
+		//should skip TendisplusCluster's storage layer
+		if cmdbIns.ClusterType == constvar.TendisplusCluster &&
+			cmdbIns.MachineType == constvar.TendisplusMetaType {
+			continue
+		}
+		//should skip MongoShardedCluster's storage layer
+		if cmdbIns.ClusterType == constvar.MongoShardedCluster &&
+			(cmdbIns.MachineType == constvar.MongodbMetaType ||
+				cmdbIns.MachineType == constvar.MongoConfigMetaType) {
+			continue
+		}
+
+		if _, ok := m.NeedDetectMachines[cmdbIns.IP]; !ok {
+			m.NeedDetectMachines[cmdbIns.IP] = struct{}{}
+		}
+		if _, ok := m.NeedDetectCities[cmdbIns.LogicalCityID]; !ok {
+			m.NeedDetectCities[cmdbIns.LogicalCityID] = struct{}{}
+			num += 1
+		}
+	}
+	log.Logger.Debugf("cluster type:%s, hash_mod:%d, hash_value:%d, need detect machine number:%d",
+		clusterType, hashMod, hashValue, num)
+
+	return nil
+}
+
+//getAllNeedDetectMachineInfo get all need detect machine from cmdb
+func (m *MonitorComponent) getAllNeedDetectMachineInfo() error {
+	for _, clusterType := range m.ActiveClusterType {
+		log.Logger.Infof("try to get all instances by cluster type:%s", clusterType)
+		for i := 0; i < m.HashMod; i++ {
+			if err := m.getCmDBMachineByCluster(clusterType, m.HashMod, i); err != nil {
+				return err
+			}
+		}
+	}
+	log.Logger.Debugf("all need detect city info:%#v", m.NeedDetectCities)
+
+	return nil
+}
+
+// getAllHaComponentInfo get all alive component from hadb
+func (m *MonitorComponent) getAllHaComponentInfo() error {
+	interval := m.MonitorConf.ReportInterval
+	log.Logger.Infof("try to get alive agent info in latest %d second", interval)
+	agentInfo, err := m.HaDBClient.GetAliveHAComponent(constvar.Agent, interval)
+	if err != nil {
+		return fmt.Errorf("get alive agent info failed:%s", err.Error())
+	}
+	m.AgentList = agentInfo
+	log.Logger.Debugf("agent list:%#v", m.AgentList)
+	for _, agent := range m.AgentList {
+		if _, ok := m.DetectedCities[agent.CityID]; ok {
+			continue
+		} else {
+			m.DetectedCities[agent.CityID] = struct{}{}
+		}
+	}
+	log.Logger.Infof("all detected city list:%#v", m.DetectedCities)
+
+	log.Logger.Infof("try to get alive gm info in latest %d second", interval)
+	gmInfo, err := m.HaDBClient.GetAliveHAComponent(constvar.GM, interval)
+	if err != nil {
+		return fmt.Errorf("get alive agent info failed:%s", err.Error())
+	}
+	m.GmList = gmInfo
+	log.Logger.Debugf("gm list:%#v", m.GmList)
+
+	return nil
+}
+
+//getAllDetectedMachineInfo get all detected machine from HADB
+func (m *MonitorComponent) getAllDetectedMachineInfo() error {
+	log.Logger.Infof("try to get all detected instances info from hadb")
+	detectInfo, err := m.HaDBClient.GetHADetectInfo()
+	if err != nil {
+		return err
+	}
+	for _, ins := range detectInfo {
+		if _, ok := m.DetectedMachines[ins.IP]; ok {
+			continue
+		} else {
+			if ins.LastTime.Before(time.Now()) && time.Since(*ins.LastTime) <= 5*time.Minute {
+				m.DetectedMachines[ins.IP] = struct{}{}
+			}
+		}
+	}
+	log.Logger.Debugf("all detected machine info:%#v", m.DetectedMachines)
+
+	return nil
+}
diff --git a/dbm-services/common/dbha/ha-module/gm/gcm.go b/dbm-services/common/dbha/ha-module/gm/gcm.go
index 423f62e00b..af699b49ef 100644
--- a/dbm-services/common/dbha/ha-module/gm/gcm.go
+++ b/dbm-services/common/dbha/ha-module/gm/gcm.go
@@ -1,8 +1,6 @@
 package gm
 
 import (
-	"dbm-services/common/dbha/ha-module/util"
-	"dbm-services/common/dbha/hadb-api/model"
 	"fmt"
 	"time"
 
@@ -12,6 +10,8 @@ import (
 	"dbm-services/common/dbha/ha-module/dbutil"
 	"dbm-services/common/dbha/ha-module/log"
 	"dbm-services/common/dbha/ha-module/monitor"
+	"dbm-services/common/dbha/ha-module/util"
+	"dbm-services/common/dbha/hadb-api/model"
 )
 
 // GCM gcm work struct
diff --git a/dbm-services/common/dbha/ha-module/gm/gqa.go b/dbm-services/common/dbha/ha-module/gm/gqa.go
index 1d1250ec80..bdc56f3f4f 100644
--- a/dbm-services/common/dbha/ha-module/gm/gqa.go
+++ b/dbm-services/common/dbha/ha-module/gm/gqa.go
@@ -10,6 +10,7 @@ import (
 	"dbm-services/common/dbha/ha-module/dbmodule"
 	"dbm-services/common/dbha/ha-module/dbutil"
 	"dbm-services/common/dbha/ha-module/log"
+	"dbm-services/common/dbha/ha-module/monitor"
 )
 
 // GQA work struct
@@ -200,6 +201,10 @@ func (gqa *GQA) getAllInstanceFromCMDB(
 	ip, _ := instance.db.GetAddress()
 	instances, err := gqa.CmDBClient.GetDBInstanceInfoByIp(ip)
 	if err != nil {
+		minInfo := monitor.GetApiAlertInfo(constvar.CmDBInstanceUrl, err.Error())
+		if e := monitor.MonitorSend("get instances failed", minInfo); e != nil {
+			log.Logger.Warnf(e.Error())
+		}
 		log.Logger.Errorf("get mysql instance failed. err:%s", err.Error())
 		return nil, err
 	}
diff --git a/dbm-services/common/dbha/ha-module/monitor/monitor.go b/dbm-services/common/dbha/ha-module/monitor/monitor.go
index 4297545b0e..3d9b77da1d 100644
--- a/dbm-services/common/dbha/ha-module/monitor/monitor.go
+++ b/dbm-services/common/dbha/ha-module/monitor/monitor.go
@@ -2,11 +2,8 @@
 package monitor
 
 import (
-	"encoding/json"
-	"fmt"
 	"strconv"
 
-	"dbm-services/common/dbha/ha-module/client"
 	"dbm-services/common/dbha/ha-module/config"
 	"dbm-services/common/dbha/ha-module/constvar"
 	"dbm-services/common/dbha/ha-module/dbutil"
@@ -49,7 +46,6 @@ type DetectMonitor struct {
 
 // GlobalMonitor HA global monitor struct
 type GlobalMonitor struct {
-	CloudId  int
 	ServerIp string
 	//not detect logical_city_ids
 	UnCoveredCityIDs []int
@@ -59,6 +55,13 @@ type GlobalMonitor struct {
 	NeedDetectNumber int
 	//HA detected instances number
 	HADetectedNumber int
+	Content          string
+}
+
+// APIMonitor api monitor struct
+type APIMonitor struct {
+	ApiName string
+	Message string
 }
 
 // MonitorInfo the struct of monitor information
@@ -68,7 +71,8 @@ type MonitorInfo struct {
 	Switch          SwitchMonitor
 	Detect          DetectMonitor
 	//global monitor
-	Global GlobalMonitor
+	Global  GlobalMonitor
+	ApiInfo APIMonitor
 }
 
 // MonitorInit init monitor moudule by config
@@ -107,7 +111,8 @@ func MonitorSendDetect(ins dbutil.DataBaseDetect, eventName string, content stri
 // MonitorSend send dbha monitor information
 func MonitorSend(content string, info MonitorInfo) error {
 	addDimension := make(map[string]interface{})
-	if info.MonitorInfoType == constvar.MonitorInfoSwitch {
+	switch info.MonitorInfoType {
+	case constvar.MonitorInfoSwitch:
 		// switch monitor information dimension add
 		addDimension["instance_role"] = info.Switch.Role
 		addDimension["appid"] = info.Switch.Bzid
@@ -127,7 +132,7 @@ func MonitorSend(content string, info MonitorInfo) error {
 			addDimension[constvar.NewMasterHost] = info.Switch.NewMasterHost
 			addDimension[constvar.NewMasterPort] = info.Switch.NewMasterPort
 		}
-	} else if info.MonitorInfoType == constvar.MonitorInfoDetect {
+	case constvar.MonitorInfoDetect:
 		// detect monitor information dimension add
 		addDimension["appid"] = info.Detect.Bzid
 		addDimension["server_ip"] = info.Detect.ServerIp
@@ -136,14 +141,15 @@ func MonitorSend(content string, info MonitorInfo) error {
 		addDimension["cluster_domain"] = info.Detect.Cluster
 		addDimension["machine_type"] = info.Detect.MachineType
 		addDimension["cluster_type"] = info.Detect.ClusterType
-	} else if info.MonitorInfoType == constvar.MonitorInfoGlobal {
-		addDimension["cloud_id"] = info.Global.CloudId
+	case constvar.MonitorInfoGlobal:
 		addDimension["server_ip"] = info.Global.ServerIp
-		addDimension["cloud_id"] = info.Global.CloudId
-		addDimension["uncovered_num"] = info.Global.UnCoveredInsNumber
+		addDimension["uncovered_ins_num"] = info.Global.UnCoveredInsNumber
 		addDimension["need_detect_num"] = info.Global.NeedDetectNumber
-		addDimension["ha_detect__num"] = info.Global.HADetectedNumber
+		addDimension["ha_detect_num"] = info.Global.HADetectedNumber
 		addDimension["uncovered_city_ids"] = util.IntSlice2String(info.Global.UnCoveredCityIDs, ",")
+	case constvar.MonitorInfoAPI:
+		addDimension["api_name"] = info.ApiInfo.ApiName
+		addDimension["api_message"] = info.ApiInfo.Message
 	}
 
 	return SendEvent(info.EventName, content, addDimension)
@@ -247,113 +253,13 @@ func GetMonitorInfoByDetect(ins dbutil.DataBaseDetect, eventName string) Monitor
 	}
 }
 
-// CheckHAComponent check whether HA component work normal
-// 1. all need detect CMDB instances should detect
-// 2. alive agent should found
-func CheckHAComponent(conf *config.Config) (MonitorInfo, error) {
-	cmdbClient := client.NewCmDBClient(&conf.DBConf.CMDB, conf.GetCloudId())
-	hadbClient := client.NewHaDBClient(&conf.DBConf.HADB, conf.GetCloudId())
-	monitorInfo := MonitorInfo{
-		EventName:       constvar.DBHAEventGlobalMonitor,
-		MonitorInfoType: constvar.MonitorInfoGlobal,
-		Global: GlobalMonitor{
-			CloudId:            conf.Monitor.CloudID,
-			ServerIp:           conf.Monitor.LocalIP,
-			UnCoveredInsNumber: 0,
-			UnCoveredCityIDs:   nil,
-			NeedDetectNumber:   0,
-			HADetectedNumber:   0,
+func GetApiAlertInfo(apiName, message string) MonitorInfo {
+	return MonitorInfo{
+		EventName:       constvar.DBHAEventApiFailed,
+		MonitorInfoType: constvar.MonitorInfoAPI,
+		ApiInfo: APIMonitor{
+			ApiName: apiName,
+			Message: message,
 		},
 	}
-
-	//undetected instances
-	unCoveredIns := map[string]struct{}{}
-	//undetected logical_city_ids
-	unCoveredCityIDs := map[int]struct{}{}
-	//all logical_city_ids detected by agent
-	allDetectCityIDs := map[int]struct{}{}
-
-	log.Logger.Infof("try to get alive agent info latest 10 minutes")
-	if agentInfo, err := hadbClient.GetAliveHAComponent(constvar.Agent, 600); err != nil {
-		return monitorInfo, fmt.Errorf("get alive agent info failed:%s", err.Error())
-	} else {
-		log.Logger.Debugf("all agent info:%#v", agentInfo)
-		for _, agent := range agentInfo {
-			allDetectCityIDs[agent.CityID] = struct{}{}
-		}
-	}
-
-	//2. uncovered logic_city_id
-	log.Logger.Infof("try to get all need detect instances info from cmdb")
-	if rawInfo, err := cmdbClient.GetAllDBInstanceInfo(); err != nil {
-		return monitorInfo, fmt.Errorf("fetch all cmdb instance failed:%s", err.Error())
-	} else {
-		needDetectIpMap := map[string]struct{}{}
-		log.Logger.Debugf("all cmdb instances number:%d", len(rawInfo))
-
-		log.Logger.Infof("try to get all detected instances info from hadb")
-		detectInfo, err := hadbClient.GetDBDetectInfo()
-		if err != nil {
-			return monitorInfo, fmt.Errorf("fetch all detected instances from hadb failed:%s", err.Error())
-		}
-		log.Logger.Debugf("HA detected instances number:%d", len(detectInfo))
-		monitorInfo.Global.HADetectedNumber = len(detectInfo)
-
-		for _, v := range rawInfo {
-			found := false
-			cmdbIns := dbutil.DBInstanceInfoDetail{}
-			rawIns, jsonErr := json.Marshal(v)
-			if jsonErr != nil {
-				log.Logger.Errorf("marshal db instance info failed:%s", jsonErr.Error())
-				return monitorInfo, fmt.Errorf("get cmdb instance info failed:%s", jsonErr.Error())
-			}
-			if jsonErr = json.Unmarshal(rawIns, &cmdbIns); jsonErr != nil {
-				log.Logger.Errorf("unmarshal db instance info failed:%s", jsonErr.Error())
-				return monitorInfo, fmt.Errorf("get cmdb instance info failed:%s", jsonErr.Error())
-			}
-
-			//TODO, API filter active cluster type more efficient
-			if _, ok := needDetectIpMap[cmdbIns.IP]; ok ||
-				!util.HasElem(cmdbIns.ClusterType, conf.Monitor.ActiveDBType) {
-				continue
-			} else {
-				needDetectIpMap[cmdbIns.IP] = struct{}{}
-			}
-
-			for _, detectIns := range detectInfo {
-				if cmdbIns.IP == detectIns.IP {
-					found = true
-					break
-				}
-			}
-			if !found {
-				unCoveredIns[cmdbIns.IP] = struct{}{}
-				if _, ok := allDetectCityIDs[cmdbIns.LogicalCityID]; !ok {
-					unCoveredCityIDs[cmdbIns.LogicalCityID] = struct{}{}
-				}
-			}
-		}
-		monitorInfo.Global.NeedDetectNumber = len(needDetectIpMap)
-	}
-
-	if len(unCoveredIns) > 0 {
-		log.Logger.Errorf("uncovered instances list:%#v", unCoveredIns)
-		return monitorInfo, fmt.Errorf("%d instances not covered by dbha", len(unCoveredIns))
-	}
-
-	if len(unCoveredCityIDs) > 0 {
-		for k := range unCoveredCityIDs {
-			monitorInfo.Global.UnCoveredCityIDs = append(monitorInfo.Global.UnCoveredCityIDs, k)
-		}
-		return monitorInfo, fmt.Errorf("%d logical_city_ids not covered by dbha", len(unCoveredCityIDs))
-	}
-
-	if monitorInfo.Global.HADetectedNumber != monitorInfo.Global.NeedDetectNumber {
-		return monitorInfo, fmt.Errorf("need detect number:%d not equal HA detect number:%d",
-			monitorInfo.Global.NeedDetectNumber, monitorInfo.Global.HADetectedNumber)
-	}
-
-	log.Logger.Debugf("global monitor info: %#v", monitorInfo)
-
-	return monitorInfo, nil
 }
diff --git a/dbm-services/common/dbha/ha-module/test/client_test.go b/dbm-services/common/dbha/ha-module/test/client_test.go
deleted file mode 100644
index f354d428f6..0000000000
--- a/dbm-services/common/dbha/ha-module/test/client_test.go
+++ /dev/null
@@ -1,69 +0,0 @@
-package test
-
-import (
-	"fmt"
-	"net/http"
-	"testing"
-
-	"dbm-services/common/dbha/ha-module/client"
-	"dbm-services/common/dbha/ha-module/config"
-	"dbm-services/common/dbha/ha-module/constvar"
-	"dbm-services/common/dbha/ha-module/dbmodule"
-)
-
-func TestNewClientByAddrs(t *testing.T) {
-	addr := "http://127.0.0.1:8080"
-	c, _ := client.NewClientByAddrs([]string{addr}, constvar.CmDBName)
-	param := c.ConvertParamForGetRequest(map[string]string{
-		"apps": "test1",
-	})
-	result, err := c.DoNew(http.MethodGet, "/cmdb/cluster/query?"+param, nil, nil)
-	if err != nil {
-		fmt.Printf("requst failed:%s", err.Error())
-	}
-	fmt.Printf("%s", string(result.Data))
-}
-
-func TestGetInstanceByCity(t *testing.T) {
-	GlobalConfig, err := config.ParseConfigureFile("../monitor_agent.yaml")
-	if err != nil {
-		fmt.Printf("get config failed. err:%s", err.Error())
-		t.FailNow()
-	}
-	addr := "http://127.0.0.1:8080"
-	c, _ := client.NewClientByAddrs([]string{addr}, constvar.CmDBName)
-	cmdbC := client.CmDBClient{
-		Client: *c,
-	}
-	rawList, err := cmdbC.GetDBInstanceInfoByCity(2)
-	if err != nil {
-		fmt.Printf("get instance failed. err:%s", err.Error())
-		t.FailNow()
-	}
-	dbs, err := dbmodule.DBCallbackMap[constvar.DetectTenDBHA].FetchDBCallback(rawList, GlobalConfig)
-	for _, info := range dbs {
-		ip, port := info.GetAddress()
-		fmt.Printf("%s, %d, %s, %s, %s\n", ip, port, info.GetDBType(), info.GetStatus(), info.GetApp())
-	}
-}
-
-func TestGetInstanceByIp(t *testing.T) {
-	addr := "http://127.0.0.1:8080"
-	c, _ := client.NewClientByAddrs([]string{addr}, constvar.CmDBName)
-	cmdbC := client.CmDBClient{
-		Client: *c,
-	}
-	inf, err := cmdbC.GetDBInstanceInfoByIp("127.0.0.6")
-	if err != nil {
-		fmt.Printf("get instance failed. err:%s", err.Error())
-		t.FailNow()
-	}
-	list, err := dbmodule.DBCallbackMap[constvar.DetectTenDBHA].GetSwitchInstanceInformation(inf, nil)
-	if err != nil {
-		fmt.Printf("get switch instance failed. err:%s", err.Error())
-		t.FailNow()
-	}
-	for _, info := range list {
-		fmt.Printf("%v\n", info)
-	}
-}
diff --git a/dbm-services/common/dbha/hadb-api/pkg/handler/hastatus/hastatus_handler.go b/dbm-services/common/dbha/hadb-api/pkg/handler/hastatus/hastatus_handler.go
index a68af4b1a3..b6ba44a2cc 100644
--- a/dbm-services/common/dbha/hadb-api/pkg/handler/hastatus/hastatus_handler.go
+++ b/dbm-services/common/dbha/hadb-api/pkg/handler/hastatus/hastatus_handler.go
@@ -52,6 +52,8 @@ const (
 	GetAgentInfo = "agent_get_agent_info"
 	// UpdateAgentInfo TODO
 	UpdateAgentInfo = "reporter_agent_heartbeat"
+	// UpdateMonitorInfo TODO
+	UpdateMonitorInfo = "reporter_monitor_heartbeat"
 	// UpdateGMInfo TODO
 	UpdateGMInfo = "reporter_gm_heartbeat"
 	// GetAliveAgentInfo TODO
@@ -77,9 +79,7 @@ func Handler(ctx *fasthttp.RequestCtx) {
 	switch param.Name {
 	case GetGmInfo, GetAgentInfo:
 		GetHaInfo(ctx, param.QueryArgs)
-	case UpdateAgentInfo:
-		UpdateHaInfo(ctx, param.QueryArgs, param.SetArgs)
-	case UpdateGMInfo:
+	case UpdateAgentInfo, UpdateGMInfo, UpdateMonitorInfo:
 		UpdateHaInfo(ctx, param.QueryArgs, param.SetArgs)
 	case GetAliveHAInfo:
 		GetAliveHAByModule(ctx, param.QueryArgs)
diff --git a/dbm-ui/backend/flow/utils/cloud/script_template/dbha_template.py b/dbm-ui/backend/flow/utils/cloud/script_template/dbha_template.py
index ebd2371324..3ef26dd08c 100644
--- a/dbm-ui/backend/flow/utils/cloud/script_template/dbha_template.py
+++ b/dbm-ui/backend/flow/utils/cloud/script_template/dbha_template.py
@@ -306,6 +306,8 @@
 cp /data/install/{{dbha_conf}} $path/dbha/{{dbha_type}};
 cp /data/install/dbha $path/dbha/{{dbha_type}};
 chmod -R 777 $path/dbha;
+systemctl start ntpd
+systemctl enable ntpd
 
 # 部署dbha服务
 cd $path/dbha/{{dbha_type}}