Skip to content

Commit

Permalink
feat: health check endpoint (#971)
Browse files Browse the repository at this point in the history
* chore: move Nostr relay readiness methods to service.go

* feat: basic health checks implemented

* chore: rename alby info variable

* feat: add basic health check ui, remove unnecessary alarms

* fix: health indicator layout on mobile

---------

Co-authored-by: Roland Bewick <[email protected]>
  • Loading branch information
rdmitr and rolznz authored Jan 17, 2025
1 parent fae02f3 commit ff9ef56
Show file tree
Hide file tree
Showing 18 changed files with 253 additions and 46 deletions.
44 changes: 44 additions & 0 deletions api/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -1024,6 +1024,50 @@ func (api *api) GetLogOutput(ctx context.Context, logType string, getLogRequest
return &GetLogOutputResponse{Log: string(logData)}, nil
}

func (api *api) Health(ctx context.Context) (*HealthResponse, error) {
var alarms []HealthAlarm

albyInfo, err := api.albyOAuthSvc.GetInfo(ctx)
if err != nil {
return nil, err
}
if !albyInfo.Healthy {
alarms = append(alarms, NewHealthAlarm(HealthAlarmKindAlbyService, albyInfo.Incidents))
}

isNostrRelayReady := api.svc.IsRelayReady()
if !isNostrRelayReady {
alarms = append(alarms, NewHealthAlarm(HealthAlarmKindNostrRelayOffline, nil))
}

lnClient := api.svc.GetLNClient()

if lnClient != nil {
nodeStatus, err := lnClient.GetNodeStatus(ctx)
if err != nil {
return nil, err
}
if nodeStatus == nil || !nodeStatus.IsReady {
alarms = append(alarms, NewHealthAlarm(HealthAlarmKindNodeNotReady, nodeStatus))
}

channels, err := lnClient.ListChannels(ctx)
if err != nil {
return nil, err
}

offlineChannels := slices.DeleteFunc(channels, func(channel lnclient.Channel) bool {
return channel.Active
})

if len(offlineChannels) > 0 {
alarms = append(alarms, NewHealthAlarm(HealthAlarmKindChannelsOffline, nil))
}
}

return &HealthResponse{Alarms: alarms}, nil
}

func (api *api) parseExpiresAt(expiresAtString string) (*time.Time, error) {
var expiresAt *time.Time
if expiresAtString != "" {
Expand Down
26 changes: 26 additions & 0 deletions api/models.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ type API interface {
RestoreBackup(unlockPassword string, r io.Reader) error
MigrateNodeStorage(ctx context.Context, to string) error
GetWalletCapabilities(ctx context.Context) (*WalletCapabilitiesResponse, error)
Health(ctx context.Context) (*HealthResponse, error)
}

type App struct {
Expand Down Expand Up @@ -366,3 +367,28 @@ type Channel struct {
type MigrateNodeStorageRequest struct {
To string `json:"to"`
}

type HealthAlarmKind string

const (
HealthAlarmKindAlbyService HealthAlarmKind = "alby_service"
HealthAlarmKindNodeNotReady = "node_not_ready"
HealthAlarmKindChannelsOffline = "channels_offline"
HealthAlarmKindNostrRelayOffline = "nostr_relay_offline"
)

type HealthAlarm struct {
Kind HealthAlarmKind `json:"kind"`
RawDetails any `json:"rawDetails,omitempty"`
}

func NewHealthAlarm(kind HealthAlarmKind, rawDetails any) HealthAlarm {
return HealthAlarm{
Kind: kind,
RawDetails: rawDetails,
}
}

type HealthResponse struct {
Alarms []HealthAlarm `json:"alarms,omitempty"`
}
67 changes: 65 additions & 2 deletions frontend/src/components/layouts/AppLayout.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,15 @@ import {
} from "src/components/ui/tooltip";
import { useAlbyMe } from "src/hooks/useAlbyMe";

import clsx from "clsx";
import { useAlbyInfo } from "src/hooks/useAlbyInfo";
import { useHealthCheck } from "src/hooks/useHealthCheck";
import { useInfo } from "src/hooks/useInfo";
import { useNotifyReceivedPayments } from "src/hooks/useNotifyReceivedPayments";
import { useRemoveSuccessfulChannelOrder } from "src/hooks/useRemoveSuccessfulChannelOrder";
import { deleteAuthToken } from "src/lib/auth";
import { cn } from "src/lib/utils";
import { HealthAlarm } from "src/types";
import { isHttpMode } from "src/utils/isHttpMode";
import { openLink } from "src/utils/openLink";
import ExternalLink from "../ExternalLink";
Expand Down Expand Up @@ -230,6 +233,7 @@ export default function AppLayout() {
<AlbyHubLogo className="text-foreground" />
</Link>
<AppVersion />
<HealthIndicator />
</div>
<MainMenuContent />
</nav>
Expand Down Expand Up @@ -285,9 +289,9 @@ export default function AppLayout() {
<Link to="/">
<AlbyHubLogo className="text-foreground" />
</Link>
{/* align shield with x icon */}
<div className="mr-2">
<div className="mr-1 flex gap-2 items-center justify-center">
<AppVersion />
<HealthIndicator />
</div>
</div>
<MainMenuContent />
Expand Down Expand Up @@ -369,6 +373,65 @@ function AppVersion() {
);
}

function HealthIndicator() {
const { data: health } = useHealthCheck();
if (!health) {
return null;
}

const ok = !health.alarms?.length;

function getAlarmTitle(alarm: HealthAlarm) {
// TODO: could show extra data from alarm.rawDetails
// for some alarm types
switch (alarm.kind) {
case "alby_service":
return "One or more Alby Services are offline";
case "channels_offline":
return "One or more channels are offline";
case "node_not_ready":
return "Node is not ready";
case "nostr_relay_offline":
return "Could not connect to relay";
default:
return "Unknown error";
}
}

return (
<TooltipProvider>
<Tooltip>
<TooltipTrigger>
<span className="text-xs flex items-center text-muted-foreground">
<div
className={clsx(
"w-2 h-2 rounded-full",
ok ? "bg-green-300" : "bg-destructive"
)}
/>
</span>
</TooltipTrigger>
<TooltipContent>
{ok ? (
<p>Alby Hub is running</p>
) : (
<div>
<p className="font-semibold">
{health.alarms.length} issues were found
</p>
<ul className="mt-2 max-w-xs whitespace-pre-wrap list-disc list-inside">
{health.alarms.map((alarm) => (
<li key={alarm.kind}>{getAlarmTitle(alarm)}</li>
))}
</ul>
</div>
)}
</TooltipContent>
</Tooltip>
</TooltipProvider>
);
}

const MenuItem = ({
to,
children,
Expand Down
16 changes: 16 additions & 0 deletions frontend/src/hooks/useHealthCheck.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import useSWR, { SWRConfiguration } from "swr";

import { HealthResponse } from "src/types";
import { swrFetcher } from "src/utils/swr";

const pollConfiguration: SWRConfiguration = {
refreshInterval: 30000,
};

export function useHealthCheck(poll = true) {
return useSWR<HealthResponse>(
"/api/health",
swrFetcher,
poll ? pollConfiguration : undefined
);
}
15 changes: 15 additions & 0 deletions frontend/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,21 @@ export interface InfoResponse {
autoUnlockPasswordEnabled: boolean;
}

export type HealthAlarmKind =
| "alby_service"
| "node_not_ready"
| "channels_offline"
| "nostr_relay_offline";

export type HealthAlarm = {
kind: HealthAlarmKind;
rawDetails: unknown;
};

export type HealthResponse = {
alarms: HealthAlarm[];
};

export type Network = "bitcoin" | "testnet" | "signet";

export type AppMetadata = { app_store_app_id?: string } & Record<
Expand Down
12 changes: 12 additions & 0 deletions http/http_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ func (httpSvc *HttpService) RegisterSharedRoutes(e *echo.Echo) {
restrictedGroup.POST("/api/send-payment-probes", httpSvc.sendPaymentProbesHandler)
restrictedGroup.POST("/api/send-spontaneous-payment-probes", httpSvc.sendSpontaneousPaymentProbesHandler)
restrictedGroup.GET("/api/log/:type", httpSvc.getLogOutputHandler)
restrictedGroup.GET("/api/health", httpSvc.healthHandler)

httpSvc.albyHttpSvc.RegisterSharedRoutes(restrictedGroup, e)
}
Expand Down Expand Up @@ -1072,3 +1073,14 @@ func (httpSvc *HttpService) restoreBackupHandler(c echo.Context) error {

return c.NoContent(http.StatusNoContent)
}

func (httpSvc *HttpService) healthHandler(c echo.Context) error {
healthResponse, err := httpSvc.api.Health(c.Request().Context())
if err != nil {
return c.JSON(http.StatusInternalServerError, ErrorResponse{
Message: fmt.Sprintf("Failed to check node health: %v", err),
})
}

return c.JSON(http.StatusOK, healthResponse)
}
4 changes: 3 additions & 1 deletion lnclient/breez/breez.go
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,9 @@ func (bs *BreezService) GetLogOutput(ctx context.Context, maxLen int) ([]byte, e
}

func (bs *BreezService) GetNodeStatus(ctx context.Context) (nodeStatus *lnclient.NodeStatus, err error) {
return nil, nil
return &lnclient.NodeStatus{
IsReady: true,
}, nil
}

func (bs *BreezService) SignMessage(ctx context.Context, message string) (string, error) {
Expand Down
4 changes: 3 additions & 1 deletion lnclient/cashu/cashu.go
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,9 @@ func (cs *CashuService) GetNetworkGraph(ctx context.Context, nodeIds []string) (
func (cs *CashuService) UpdateLastWalletSyncRequest() {}

func (cs *CashuService) GetNodeStatus(ctx context.Context) (nodeStatus *lnclient.NodeStatus, err error) {
return nil, nil
return &lnclient.NodeStatus{
IsReady: true,
}, nil
}

func (cs *CashuService) SendPaymentProbes(ctx context.Context, invoice string) error {
Expand Down
4 changes: 3 additions & 1 deletion lnclient/greenlight/greenlight.go
Original file line number Diff line number Diff line change
Expand Up @@ -659,7 +659,9 @@ func (gs *GreenlightService) GetStorageDir() (string, error) {
}

func (gs *GreenlightService) GetNodeStatus(ctx context.Context) (nodeStatus *lnclient.NodeStatus, err error) {
return nil, nil
return &lnclient.NodeStatus{
IsReady: true,
}, nil
}

func (gs *GreenlightService) GetNetworkGraph(ctx context.Context, nodeIds []string) (lnclient.NetworkGraphResponse, error) {
Expand Down
61 changes: 33 additions & 28 deletions lnclient/ldk/ldk.go
Original file line number Diff line number Diff line change
Expand Up @@ -255,35 +255,38 @@ func NewLDKService(ctx context.Context, cfg config.Config, eventPublisher events
}).Info("LDK node synced successfully")

if ls.network == "bitcoin" {
// try to connect to some peers to retrieve P2P gossip data. TODO: Remove once LDK can correctly do gossip with CLN and Eclair nodes
// see https://github.com/lightningdevkit/rust-lightning/issues/3075
peers := []string{
"031b301307574bbe9b9ac7b79cbe1700e31e544513eae0b5d7497483083f99e581@45.79.192.236:9735", // Olympus
"0364913d18a19c671bb36dd04d6ad5be0fe8f2894314c36a9db3f03c2d414907e1@192.243.215.102:9735", // LQwD
"035e4ff418fc8b5554c5d9eea66396c227bd429a3251c8cbc711002ba215bfc226@170.75.163.209:9735", // WoS
"02fcc5bfc48e83f06c04483a2985e1c390cb0f35058baa875ad2053858b8e80dbd@35.239.148.251:9735", // Blink
"027100442c3b79f606f80f322d98d499eefcb060599efc5d4ecb00209c2cb54190@3.230.33.224:9735", // c=
"038a9e56512ec98da2b5789761f7af8f280baf98a09282360cd6ff1381b5e889bf@64.23.162.51:9735", // Megalith LSP
}
logger.Logger.Info("Connecting to some peers to retrieve P2P gossip data")
for _, peer := range peers {
parts := strings.FieldsFunc(peer, func(r rune) bool { return r == '@' || r == ':' })
port, err := strconv.ParseUint(parts[2], 10, 16)
if err != nil {
logger.Logger.WithError(err).Error("Failed to parse port number")
continue
go func() {
// try to connect to some peers in the background to retrieve P2P gossip data.
// TODO: Remove once LDK can correctly do gossip with CLN and Eclair nodes
// see https://github.com/lightningdevkit/rust-lightning/issues/3075
peers := []string{
"031b301307574bbe9b9ac7b79cbe1700e31e544513eae0b5d7497483083f99e581@45.79.192.236:9735", // Olympus
"0364913d18a19c671bb36dd04d6ad5be0fe8f2894314c36a9db3f03c2d414907e1@192.243.215.102:9735", // LQwD
"035e4ff418fc8b5554c5d9eea66396c227bd429a3251c8cbc711002ba215bfc226@170.75.163.209:9735", // WoS
"02fcc5bfc48e83f06c04483a2985e1c390cb0f35058baa875ad2053858b8e80dbd@35.239.148.251:9735", // Blink
// "027100442c3b79f606f80f322d98d499eefcb060599efc5d4ecb00209c2cb54190@3.230.33.224:9735", // c=
"038a9e56512ec98da2b5789761f7af8f280baf98a09282360cd6ff1381b5e889bf@64.23.162.51:9735", // Megalith LSP
}
err = ls.ConnectPeer(ctx, &lnclient.ConnectPeerRequest{
Pubkey: parts[0],
Address: parts[1],
Port: uint16(port),
})
if err != nil {
logger.Logger.WithFields(logrus.Fields{
"peer": peer,
}).WithError(err).Error("Failed to connect to peer")
logger.Logger.Info("Connecting to some peers to retrieve P2P gossip data")
for _, peer := range peers {
parts := strings.FieldsFunc(peer, func(r rune) bool { return r == '@' || r == ':' })
port, err := strconv.ParseUint(parts[2], 10, 16)
if err != nil {
logger.Logger.WithError(err).Error("Failed to parse port number")
continue
}
err = ls.ConnectPeer(ctx, &lnclient.ConnectPeerRequest{
Pubkey: parts[0],
Address: parts[1],
Port: uint16(port),
})
if err != nil {
logger.Logger.WithFields(logrus.Fields{
"peer": peer,
}).WithError(err).Error("Failed to connect to peer")
}
}
}
}()
}

// setup background sync
Expand Down Expand Up @@ -1644,8 +1647,10 @@ func deleteOldLDKLogs(ldkLogDir string) {
}

func (ls *LDKService) GetNodeStatus(ctx context.Context) (nodeStatus *lnclient.NodeStatus, err error) {
status := ls.node.Status()
return &lnclient.NodeStatus{
InternalNodeStatus: ls.node.Status(),
IsReady: status.IsRunning && status.IsListening,
InternalNodeStatus: status,
}, nil
}

Expand Down
3 changes: 2 additions & 1 deletion lnclient/lnd/lnd.go
Original file line number Diff line number Diff line change
Expand Up @@ -1060,7 +1060,7 @@ func lndPaymentToTransaction(payment *lnrpc.Payment) (*lnclient.Transaction, err
DescriptionHash: descriptionHash,
ExpiresAt: expiresAt,
SettledAt: settledAt,
//TODO: Metadata: (e.g. keysend),
// TODO: Metadata: (e.g. keysend),
}, nil
}

Expand Down Expand Up @@ -1131,6 +1131,7 @@ func (svc *LNDService) GetNodeStatus(ctx context.Context) (nodeStatus *lnclient.
}

return &lnclient.NodeStatus{
IsReady: true, // Assuming that, if GetNodeInfo() succeeds, the node is online and accessible.
InternalNodeStatus: map[string]interface{}{
"info": info,
"config": debugInfo.Config,
Expand Down
1 change: 1 addition & 0 deletions lnclient/models.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ type Channel struct {
}

type NodeStatus struct {
IsReady bool `json:"isReady"`
InternalNodeStatus interface{} `json:"internalNodeStatus"`
}

Expand Down
4 changes: 3 additions & 1 deletion lnclient/phoenixd/phoenixd.go
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,9 @@ func (svc *PhoenixService) GetLogOutput(ctx context.Context, maxLen int) ([]byte
}

func (svc *PhoenixService) GetNodeStatus(ctx context.Context) (nodeStatus *lnclient.NodeStatus, err error) {
return nil, nil
return &lnclient.NodeStatus{
IsReady: true,
}, nil
}

func (svc *PhoenixService) GetStorageDir() (string, error) {
Expand Down
Loading

0 comments on commit ff9ef56

Please sign in to comment.