diff --git a/VERSION b/VERSION index 1e66a61..01e994d 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -v0.3.1 \ No newline at end of file +v0.4.0 \ No newline at end of file diff --git a/chart/Chart.yaml b/chart/Chart.yaml index 623de55..1e1a62c 100644 --- a/chart/Chart.yaml +++ b/chart/Chart.yaml @@ -3,10 +3,10 @@ name: bmc-operator description: A Helm chart for BMC Operator # This is the chart version, which will be taken from VERSION file -version: 0.3.1 +version: 0.4.0 # This is the version number of the application being deployed, which will be taken from VERSION file -appVersion: "0.3.1" +appVersion: "0.4.0" type: application diff --git a/chart/crds/bmc.spidernet.io_hoststatuses.yaml b/chart/crds/bmc.spidernet.io_hoststatuses.yaml index 68abb15..d43560a 100644 --- a/chart/crds/bmc.spidernet.io_hoststatuses.yaml +++ b/chart/crds/bmc.spidernet.io_hoststatuses.yaml @@ -92,12 +92,35 @@ spec: type: object lastUpdateTime: type: string + log: + properties: + lastestLog: + properties: + message: + type: string + time: + type: string + required: + - message + - time + type: object + totalLogAccount: + format: int32 + type: integer + warningLogAccount: + format: int32 + type: integer + required: + - totalLogAccount + - warningLogAccount + type: object required: - basic - clusterAgent - healthy - info - lastUpdateTime + - log type: object type: object served: true diff --git a/chart/templates/agent-templates.yaml b/chart/templates/agent-templates.yaml index 637446c..f8da298 100644 --- a/chart/templates/agent-templates.yaml +++ b/chart/templates/agent-templates.yaml @@ -56,6 +56,10 @@ data: value: {{ .Values.clusterAgent.feature.hostStatusUpdateInterval | quote }} - name: LOG_LEVEL value: {{ .Values.clusterAgent.feature.logLevel | quote }} + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace args: - --metrics-bind-address=:8080 - --health-probe-bind-address=:8081 @@ -142,6 +146,9 @@ data: rules: - apiGroups: [""] resources: ["pods", "services", "configmaps", "secrets"] + verbs: ["get", "list", "watch"] + - apiGroups: [""] + resources: ["events"] verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] - apiGroups: ["apps"] resources: ["deployments"] diff --git a/doc/usage/quickstart.md b/doc/usage/quickstart.md index d55286e..92344a4 100644 --- a/doc/usage/quickstart.md +++ b/doc/usage/quickstart.md @@ -308,3 +308,25 @@ test-hostendpoint bmc-clusteragent true 192.168.0.50 ho 1. 进入 agent pod 中,查看 DHCP server 的实时 IP 分配文件 `/var/lib/dhcp/bmc-clusteragent-dhcpd.leases`,确认和删除其中期望解除绑定的 IP 地址 2. `kubectl get hoststatus -l status.basic.ipAddr=` 查看 hoststatus 对象,确认其中的 IP 和 MAC 地址符合删除预期,然后手动删除对应的 hoststatus 对象 `kubectl delete hoststatus -l status.basic.ipAddr=192.168.0.101` 3. 后端会自动更新 DHCP server 的配置,实现 IP 和 MAC 地址的解绑(可进入 agent pod 中,查看文件 `/etc/dhcp/dhcpd.conf` 确认) + +3. 查看 BMC 主机的日志 + +```bash +# 获取所有 BMC 主机的日志 +kubectl get events -n bmc --field-selector reason=BMCLogEntry + +# 获取指定 BMC 主机的日志 +kubectl get events -n bmc --field-selector reason=BMCLogEntry,involvedObject.name=${HoststatusName} + +# 获取指定 BMC 主机的日志统计 +kubectl get hoststatus ${HoststatusName} -n bmc -o jsonpath='{.status.log}' | jq . + { + "lastestLog": { + "message": "[][2018-08-31T13:33:54+00:00][]: [ PS1 Status ] Power Supply Failure", + "time": "2018-08-31T13:33:54+00:00" + }, + "totalLogAccount": 67, + "warningLogAccount": 67 + } + +``` diff --git a/pkg/agent/config/config.go b/pkg/agent/config/config.go index 99c96f5..5504521 100644 --- a/pkg/agent/config/config.go +++ b/pkg/agent/config/config.go @@ -29,6 +29,8 @@ type AgentConfig struct { Password string // 主机状态更新间隔(秒) HostStatusUpdateInterval int + // pod namespace + PodNamespace string } // ValidateEndpointConfig validates the endpoint configuration @@ -175,6 +177,8 @@ func LoadAgentConfig(k8sClient *kubernetes.Clientset) (*AgentConfig, error) { return nil, fmt.Errorf("CLUSTERAGENT_NAME environment variable not set") } + ns := os.Getenv("POD_NAMESPACE") + updateInterval := 60 // 默认 60 秒 intervalStr := os.Getenv("HOST_STATUS_UPDATE_INTERVAL") if intervalStr == "" { @@ -225,6 +229,7 @@ func LoadAgentConfig(k8sClient *kubernetes.Clientset) (*AgentConfig, error) { ClusterAgentName: agentName, AgentObjSpec: clusterAgent.Spec, HostStatusUpdateInterval: updateInterval, + PodNamespace: ns, } // Validate endpoint configuration diff --git a/pkg/agent/hostendpoint/controller.go b/pkg/agent/hostendpoint/controller.go index 7ea7606..f9d2516 100644 --- a/pkg/agent/hostendpoint/controller.go +++ b/pkg/agent/hostendpoint/controller.go @@ -176,6 +176,11 @@ func (r *HostEndpointReconciler) handleHostEndpoint(ctx context.Context, hostEnd Port: *hostEndpoint.Spec.Port, }, Info: map[string]string{}, + Log: bmcv1beta1.LogStruct{ + TotalLogAccount: 0, + WarningLogAccount: 0, + LastestLog: nil, + }, } if err := r.client.Status().Update(ctx, hostStatus); err != nil { diff --git a/pkg/agent/hoststatus/HostStatusReconcile.go b/pkg/agent/hoststatus/HostStatusReconcile.go index a564050..66f3aed 100644 --- a/pkg/agent/hoststatus/HostStatusReconcile.go +++ b/pkg/agent/hoststatus/HostStatusReconcile.go @@ -12,7 +12,11 @@ import ( //"github.com/spidernet-io/bmc/pkg/lock" "github.com/spidernet-io/bmc/pkg/log" "github.com/spidernet-io/bmc/pkg/redfish" + + gofishredfish "github.com/stmcginnis/gofish/redfish" + "go.uber.org/zap" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" @@ -24,6 +28,58 @@ var hostStatusLock = &sync.Mutex{} // ------------------------------ update the spec.info of the hoststatus +// GenerateEvents creates Kubernetes events from Redfish log entries and returns the latest message and count +func (c *hostStatusController) GenerateEvents(logEntrys []*gofishredfish.LogEntry, hostStatusName string, lastLogTime string) (newLastestTime, newLastestMsg string, totalMsgCount, warningMsgCount, newLogAccount int) { + totalMsgCount = 0 + warningMsgCount = 0 + newLogAccount = 0 + newLastestTime = "" + newLastestMsg = "" + + if len(logEntrys) == 0 { + return + } + + totalMsgCount = len(logEntrys) + for m, entry := range logEntrys { + //log.Logger.Debugf("log service entries[%d] timestamp: %+v", m, entry.Created) + //log.Logger.Debugf("log service entries[%d] severity: %+v", m, entry.Severity) + //log.Logger.Debugf("log service entries[%d] oemSensorType: %+v", m, entry.OemSensorType) + //log.Logger.Debugf("log service entries[%d] message: %+v", m, entry.Message) + + msg := fmt.Sprintf("[%s][%s]: %s %s", entry.Created, entry.Severity, entry.OemSensorType, entry.Message) + + ty := corev1.EventTypeNormal + if entry.Severity != gofishredfish.OKEventSeverity && entry.Severity != "" { + ty = corev1.EventTypeWarning + warningMsgCount++ + } + + // 所有的新日志,生成 event + if entry.Created != lastLogTime { + newLogAccount++ + log.Logger.Infof("find new log for hostStatus %s: %s", hostStatusName, msg) + + // 确认是否有新日志了 + if m == 0 { + newLastestTime = entry.Created + newLastestMsg = msg + } + + // Create event + t := &corev1.ObjectReference{ + Kind: bmcv1beta1.KindHostStatus, + Name: hostStatusName, + Namespace: c.config.PodNamespace, + APIVersion: bmcv1beta1.APIVersion, + } + c.recorder.Event(t, ty, "BMCLogEntry", msg) + + } + } + return +} + // this is called by UpdateHostStatusAtInterval and UpdateHostStatusWrapper func (c *hostStatusController) UpdateHostStatusInfo(name string, d *hoststatusdata.HostConnectCon) (bool, error) { @@ -79,6 +135,29 @@ func (c *hostStatusController) UpdateHostStatusInfo(name string, d *hoststatusda log.Logger.Infof("HostStatus %s change from %v to %v , update status", name, existing.Status.Healthy, healthy) } + // 获取日志 + if healthy { + logEntrys, err := client.GetLog() + if err != nil { + log.Logger.Errorf("Failed to get logs of HostStatus %s: %v", name, err) + } else { + lastLogTime := "" + if updated.Status.Log.LastestLog != nil { + lastLogTime = updated.Status.Log.LastestLog.Time + } + newLastestTime, newLastestMsg, totalMsgCount, warningMsgCount, newLogAccount := c.GenerateEvents(logEntrys, name, lastLogTime) + if newLastestTime != "" { + updated.Status.Log.TotalLogAccount = int32(totalMsgCount) + updated.Status.Log.WarningLogAccount = int32(warningMsgCount) + updated.Status.Log.LastestLog = &bmcv1beta1.LogEntry{ + Time: newLastestTime, + Message: newLastestMsg, + } + log.Logger.Infof("find %d new logs for hostStatus %s", newLogAccount, name) + } + } + } + // 更新 HostStatus if !compareHostStatus(updated.Status, existing.Status, log.Logger) { log.Logger.Debugf("status changed, existing: %v, updated: %v", existing.Status, updated.Status) diff --git a/pkg/agent/hoststatus/dhcp.go b/pkg/agent/hoststatus/dhcp.go index 993709e..eafb611 100644 --- a/pkg/agent/hoststatus/dhcp.go +++ b/pkg/agent/hoststatus/dhcp.go @@ -148,6 +148,11 @@ func (c *hostStatusController) handleDHCPAdd(client dhcptypes.ClientInfo) error ActiveDhcpClient: true, }, Info: map[string]string{}, + Log: bmcv1beta1.LogStruct{ + TotalLogAccount: 0, + WarningLogAccount: 0, + LastestLog: nil, + }, } if c.config.AgentObjSpec.Endpoint.SecretName != "" { hostStatus.Status.Basic.SecretName = c.config.AgentObjSpec.Endpoint.SecretName diff --git a/pkg/agent/hoststatus/manager.go b/pkg/agent/hoststatus/manager.go index b8b31a3..58a90f3 100644 --- a/pkg/agent/hoststatus/manager.go +++ b/pkg/agent/hoststatus/manager.go @@ -4,6 +4,9 @@ import ( "sync" "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/record" + typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1" + corev1 "k8s.io/api/core/v1" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -29,10 +32,17 @@ type hostStatusController struct { deleteChan chan types.ClientInfo stopCh chan struct{} wg sync.WaitGroup + recorder record.EventRecorder } func NewHostStatusController(kubeClient kubernetes.Interface, config *config.AgentConfig, mgr ctrl.Manager) HostStatusController { log.Logger.Debugf("Creating new HostStatus controller for cluster agent: %s", config.ClusterAgentName) + + // Create event recorder + eventBroadcaster := record.NewBroadcaster() + eventBroadcaster.StartRecordingToSink(&typedcorev1.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")}) + recorder := eventBroadcaster.NewRecorder(mgr.GetScheme(), corev1.EventSource{Component: "bmc-controller"}) + controller := &hostStatusController{ client: mgr.GetClient(), kubeClient: kubeClient, @@ -40,6 +50,7 @@ func NewHostStatusController(kubeClient kubernetes.Interface, config *config.Age addChan: make(chan types.ClientInfo), deleteChan: make(chan types.ClientInfo), stopCh: make(chan struct{}), + recorder: recorder, } log.Logger.Debugf("HostStatus controller created successfully") diff --git a/pkg/k8s/apis/bmc.spidernet.io/v1beta1/hoststatus_types.go b/pkg/k8s/apis/bmc.spidernet.io/v1beta1/hoststatus_types.go index 70f8169..02ee4ef 100644 --- a/pkg/k8s/apis/bmc.spidernet.io/v1beta1/hoststatus_types.go +++ b/pkg/k8s/apis/bmc.spidernet.io/v1beta1/hoststatus_types.go @@ -34,6 +34,20 @@ type HostStatusStatus struct { LastUpdateTime string `json:"lastUpdateTime"` Basic BasicInfo `json:"basic"` Info map[string]string `json:"info"` + Log LogStruct `json:"log"` +} + +type LogStruct struct { + // +kubebuilder:validation:Required + TotalLogAccount int32 `json:"totalLogAccount"` + WarningLogAccount int32 `json:"warningLogAccount"` + // +optional + LastestLog *LogEntry `json:"lastestLog,omitempty"` +} + +type LogEntry struct { + Time string `json:"time"` + Message string `json:"message"` } type BasicInfo struct { diff --git a/pkg/k8s/apis/bmc.spidernet.io/v1beta1/zz_generated.deepcopy.go b/pkg/k8s/apis/bmc.spidernet.io/v1beta1/zz_generated.deepcopy.go index 723ab09..cc677a1 100644 --- a/pkg/k8s/apis/bmc.spidernet.io/v1beta1/zz_generated.deepcopy.go +++ b/pkg/k8s/apis/bmc.spidernet.io/v1beta1/zz_generated.deepcopy.go @@ -357,6 +357,7 @@ func (in *HostStatusStatus) DeepCopyInto(out *HostStatusStatus) { (*out)[key] = val } } + in.Log.DeepCopyInto(&out.Log) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HostStatusStatus. @@ -368,3 +369,38 @@ func (in *HostStatusStatus) DeepCopy() *HostStatusStatus { in.DeepCopyInto(out) return out } + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *LogEntry) DeepCopyInto(out *LogEntry) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LogEntry. +func (in *LogEntry) DeepCopy() *LogEntry { + if in == nil { + return nil + } + out := new(LogEntry) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *LogStruct) DeepCopyInto(out *LogStruct) { + *out = *in + if in.LastestLog != nil { + in, out := &in.LastestLog, &out.LastestLog + *out = new(LogEntry) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LogStruct. +func (in *LogStruct) DeepCopy() *LogStruct { + if in == nil { + return nil + } + out := new(LogStruct) + in.DeepCopyInto(out) + return out +} diff --git a/pkg/redfish/interface.go b/pkg/redfish/interface.go index 31e1a18..9043440 100644 --- a/pkg/redfish/interface.go +++ b/pkg/redfish/interface.go @@ -2,6 +2,7 @@ package redfish import ( "fmt" + "github.com/stmcginnis/gofish/redfish" "reflect" "github.com/spidernet-io/bmc/pkg/agent/hoststatus/data" @@ -13,6 +14,7 @@ import ( type RefishClient interface { Power(string) error GetInfo() (map[string]string, error) + GetLog() ([]*redfish.LogEntry, error) } // redfishClient 实现了 Client 接口 diff --git a/pkg/redfish/log.go b/pkg/redfish/log.go new file mode 100644 index 0000000..2436f69 --- /dev/null +++ b/pkg/redfish/log.go @@ -0,0 +1,60 @@ +package redfish + +import ( + "fmt" + + "github.com/stmcginnis/gofish/redfish" +) + +func (c *redfishClient) GetLog() ([]*redfish.LogEntry, error) { + + result := []*redfish.LogEntry{} + + // Attached the client to service root + service := c.client.Service + + // Query the computer systems + ss, err := service.Systems() + if err != nil { + c.logger.Errorf("failed to Query the computer systems: %+v", err) + return nil, err + } else if len(ss) == 0 { + c.logger.Errorf("failed to get system") + return nil, fmt.Errorf("failed to get system") + } + c.logger.Debugf("system amount: %d", len(ss)) + // for n, t := range ss { + // c.logger.Debugf("systems[%d]: %+v", n, *t) + // } + + // for barel metal case, + system := ss[0] + + ls, err := system.LogServices() + if err != nil { + c.logger.Errorf("failed to Query the log services: %+v", err) + return nil, err + } else if len(ls) == 0 { + c.logger.Errorf("failed to get log service") + return nil, nil + } + c.logger.Debugf("log service amount: %d", len(ls)) + for _, t := range ls { + if t.Status.State != "Enabled" { + c.logger.Debugf("log service %s is disabled", t.Name) + continue + } + + entries, err := t.Entries() + if err != nil { + c.logger.Errorf("failed to Query the log service entries: %+v", err) + return nil, err + } else if len(entries) > 0 { + c.logger.Debugf("log service entries amount: %d", len(entries)) + result = append(result, entries...) + } + } + + return result, nil + +}