-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathocc_manager.hpp
503 lines (430 loc) · 16.3 KB
/
occ_manager.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
#pragma once
#include "occ_pass_through.hpp"
#include "occ_status.hpp"
#ifdef PLDM
#include "pldm.hpp"
#ifdef PHAL_SUPPORT
#include <libphal.H>
#endif
#endif
#include "powercap.hpp"
#include "utils.hpp"
#ifdef POWER10
#include "powermode.hpp"
#endif
#include <sdbusplus/bus.hpp>
#include <sdeventplus/event.hpp>
#include <sdeventplus/utility/timer.hpp>
#include <cstring>
#include <functional>
#include <vector>
namespace sdbusRule = sdbusplus::bus::match::rules;
namespace open_power
{
namespace occ
{
#ifdef READ_OCC_SENSORS
enum occFruType
{
processorCore = 0,
internalMemCtlr = 1,
dimm = 2,
memCtrlAndDimm = 3,
VRMVdd = 6,
PMIC = 7,
memCtlrExSensor = 8,
processorIoRing = 9
};
#endif
/** @brief Default time, in seconds, between OCC poll commands */
#ifndef POWER10
constexpr unsigned int defaultPollingInterval = 1;
#else
constexpr unsigned int defaultPollingInterval = 5;
#endif
constexpr auto AMBIENT_PATH =
"/xyz/openbmc_project/sensors/temperature/Ambient_Virtual_Temp";
constexpr auto AMBIENT_INTERFACE = "xyz.openbmc_project.Sensor.Value";
constexpr auto AMBIENT_PROP = "Value";
constexpr auto ALTITUDE_PATH = "/xyz/openbmc_project/sensors/altitude/Altitude";
constexpr auto ALTITUDE_INTERFACE = "xyz.openbmc_project.Sensor.Value";
constexpr auto ALTITUDE_PROP = "Value";
constexpr auto EXTN_LABEL_PWRM_MEMORY_POWER = "5057524d";
constexpr auto EXTN_LABEL_PWRP_PROCESSOR_POWER = "50575250";
/** @class Manager
* @brief Builds and manages OCC objects
*/
struct Manager
{
public:
Manager() = delete;
Manager(const Manager&) = delete;
Manager& operator=(const Manager&) = delete;
Manager(Manager&&) = delete;
Manager& operator=(Manager&&) = delete;
~Manager() = default;
/** @brief Adds OCC pass-through and status objects on the bus
* when corresponding CPU inventory is created.
*
* @param[in] event - Unique ptr reference to sd_event
*/
explicit Manager(EventPtr& event) :
event(event), pollInterval(defaultPollingInterval),
sdpEvent(sdeventplus::Event::get_default()),
_pollTimer(
std::make_unique<
sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>(
sdpEvent, std::bind(&Manager::pollerTimerExpired, this))),
ambientPropChanged(
utils::getBus(),
sdbusRule::member("PropertiesChanged") +
sdbusRule::path(AMBIENT_PATH) +
sdbusRule::argN(0, AMBIENT_INTERFACE) +
sdbusRule::interface("org.freedesktop.DBus.Properties"),
std::bind(&Manager::ambientCallback, this, std::placeholders::_1))
#ifdef POWER10
,
discoverTimer(
std::make_unique<
sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>(
sdpEvent, std::bind(&Manager::findAndCreateObjects, this))),
waitForAllOccsTimer(
std::make_unique<
sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>(
sdpEvent, std::bind(&Manager::occsNotAllRunning, this)))
#ifdef PLDM
,
throttlePldmTraceTimer(
std::make_unique<
sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>(
sdpEvent, std::bind(&Manager::throttlePldmTraceExpired, this)))
#endif
#endif // POWER10
{
#ifdef I2C_OCC
// I2C OCC status objects are initialized directly
initStatusObjects();
#else
findAndCreateObjects();
#endif
readAltitude();
}
void createPldmHandle();
/** @brief Return the number of bound OCCs */
inline auto getNumOCCs() const
{
return activeCount;
}
#ifdef PLDM
/** @brief Called by a Device to report that the SBE timed out
* and appropriate action should be taken
*
* @param[in] instance - the OCC instance id
*/
void sbeTimeout(unsigned int instance);
#endif
/** @brief Return the latest ambient and altitude readings
*
* @param[out] ambientValid - true if ambientTemp is valid
* @param[out] ambient - ambient temperature in degrees C
* @param[out] altitude - altitude in meters
*/
void getAmbientData(bool& ambientValid, uint8_t& ambientTemp,
uint16_t& altitude) const;
/** @brief Notify pcap object to update bounds */
void updatePcapBounds() const;
/**
* @brief Set all sensor values of this OCC to NaN.
* @param[in] id - Id of the OCC.
* */
void setSensorValueToNaN(uint32_t id) const;
/** @brief Set all sensor values of this OCC to NaN and non functional.
*
* @param[in] id - Id of the OCC.
*/
void setSensorValueToNonFunctional(uint32_t id) const;
private:
/** @brief Creates the OCC D-Bus objects.
*/
void findAndCreateObjects();
/** @brief Callback that responds to cpu creation in the inventory -
* by creating the needed objects.
*
* @param[in] msg - bus message
*
* @returns 0 to indicate success
*/
int cpuCreated(sdbusplus::message_t& msg);
/** @brief Create child OCC objects.
*
* @param[in] occ - the occ name, such as occ0.
*/
void createObjects(const std::string& occ);
/** @brief Callback handler invoked by Status object when the OccActive
* property is changed. This is needed to make sure that the
* error detection is started only after all the OCCs are bound.
* Similarly, when one of the OCC gets its OccActive property
* un-set, then the OCC error detection needs to be stopped on
* all the OCCs
*
* @param[in] status - OccActive status
*/
void statusCallBack(instanceID instance, bool status);
/** @brief Set flag that a PM Complex reset is needed (to be initiated
* later) */
void resetOccRequest(instanceID instance);
/** @brief Initiate the request to reset the PM Complex (PLDM -> HBRT) */
void initiateOccRequest(instanceID instance);
/** @brief Sends a Heartbeat command to host control command handler */
void sendHeartBeat();
/** @brief reference to sd_event wrapped in unique_ptr */
EventPtr& event;
/** @brief OCC pass-through objects */
std::vector<std::unique_ptr<PassThrough>> passThroughObjects;
/** @brief OCC Status objects */
std::vector<std::unique_ptr<Status>> statusObjects;
/** @brief Power cap monitor and occ notification object */
std::unique_ptr<open_power::occ::powercap::PowerCap> pcap;
#ifdef POWER10
/** @brief Power mode monitor and notification object */
std::unique_ptr<open_power::occ::powermode::PowerMode> pmode;
#endif
/** @brief sbdbusplus match objects */
std::vector<sdbusplus::bus::match_t> cpuMatches;
/** @brief Number of OCCs that are bound */
uint8_t activeCount = 0;
/** @brief Number of seconds between poll commands */
uint8_t pollInterval;
/** @brief Ambient temperature of the system in degrees C */
uint8_t ambient = 0xFF; // default: not available
/** @brief Altitude of the system in meters */
uint16_t altitude = 0xFFFF; // default: not available
/** @brief Poll timer event */
sdeventplus::Event sdpEvent;
/** @brief Flags to indicate if waiting for all of the OCC active sensors to
* come online */
bool waitingForAllOccActiveSensors = false;
/** @brief Set containing intance numbers of any OCCs that became active
* while waiting for status objects to be created */
std::set<uint8_t> queuedActiveState;
/**
* @brief The timer to be used once the OCC goes active. When it expires,
* a POLL command will be sent to the OCC and then timer restarted.
*/
std::unique_ptr<
sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>
_pollTimer;
/** @brief Subscribe to ambient temperature changed events */
sdbusplus::bus::match_t ambientPropChanged;
/** @brief Flag to indicate that a PM complex reset needs to happen */
bool resetRequired = false;
/** @brief Instance number of the OCC/processor that triggered the reset */
uint8_t resetInstance = 255;
/** @brief Set when a PM complex reset has been issued (to prevent multiple
* requests) */
bool resetInProgress = false;
#ifdef I2C_OCC
/** @brief Init Status objects for I2C OCC devices
*
* It iterates in /sys/bus/i2c/devices, finds all occ hwmon devices
* and creates status objects.
*/
void initStatusObjects();
#endif
#ifdef PLDM
/** @brief Callback handler invoked by the PLDM event handler when state of
* the OCC is toggled by the host. The caller passes the instance
* of the OCC and state of the OCC.
*
* @param[in] instance - instance of the OCC
* @param[in] status - true when the OCC goes active and false when the OCC
* goes inactive
*
* @return true if setting the state of OCC is successful and false if it
* fails.
*/
bool updateOCCActive(instanceID instance, bool status);
/** @brief Callback handler invoked by the PLDM event handler when mode of
* the OCC SAFE MODE is inacted or cleared.
*/
void updateOccSafeMode(bool safeState);
/** @brief Callback handler invoked by PLDM sensor change when
* the HRESET succeeds or fails.
*
* @param[in] instance - the SBE instance id
* @param[in] success - true if the HRESET succeeded, otherwise false
*/
void sbeHRESETResult(instanceID instance, bool success);
#ifdef PHAL_SUPPORT
/** @brief Helper function to check whether an SBE dump should be collected
* now.
*
* @param[in] instance - the SBE instance id
*
* @return true if an SBE dump should be collected and false if not
*/
bool sbeCanDump(unsigned int instance);
/** @brief Helper function to set the SBE state through PDBG/PHAL
*
* @param[in] instance - instance of the SBE
* @param[in] state - the state to which the SBE should be set
*
*/
void setSBEState(unsigned int instance, enum sbe_state state);
/** @brief Helper function to get the SBE instance PDBG processor target
*
* @param[in] instance - the SBE instance id
*
* @return a pointer to the PDBG target
*/
struct pdbg_target* getPdbgTarget(unsigned int instance);
/** @brief Whether pdbg_targets_init has been called */
bool pdbgInitialized = false;
#endif
std::unique_ptr<pldm::Interface> pldmHandle = nullptr;
#endif
#ifdef POWER10
/**
* @brief Timer used when discovering OCCs in /dev.
*/
std::unique_ptr<
sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>
discoverTimer;
/**
* @brief Used when discovering /dev/occ objects to know if
* any were added since the last check.
*/
std::vector<int> prevOCCSearch;
/**
* @brief Timer used when waiting for OCCs to go active.
*/
std::unique_ptr<
sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>
waitForAllOccsTimer;
#ifdef PLDM
/**
* @brief Timer used to throttle PLDM traces when there are problems
determining the OCC status via pldm. Used to prevent excessive
journal traces.
*/
std::unique_ptr<
sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>
throttlePldmTraceTimer;
/**
* @brief onPldmTimeoutCreatePel flag will be used to indicate if
* a PEL should get created when the throttlePldmTraceTimer expires.
* The first time the throttlePldmTraceTimer expires, the traces
* will be throttled and then the timer gets restarted. The
* next time the timer expires, a PEL will get created.
*/
bool onPldmTimeoutCreatePel = false;
/** @brief Check if all of the OCC Active sensors are available and if not
* restart the discoverTimer
*/
void throttlePldmTraceExpired();
/** @brief Create a PEL when the code is not able to obtain the OCC PDRs
* via PLDM. This is called when the throttlePldmTraceTimer expires.
*/
void createPldmSensorPEL();
#endif
/** @brief Called when code times out waiting for all OCCs to be running or
* after the app is restarted (Status does not callback into
* Manager).
*/
void occsNotAllRunning();
/** @brief Check if all of the OCC Active sensors are available and if not
* restart the discoverTimer
*/
void checkAllActiveSensors();
#endif // POWER10
/**
* @brief Called when poll timer expires and forces a POLL command to the
* OCC. The poll timer will then be restarted.
* */
void pollerTimerExpired();
/**
* @brief Finds the OCC devices in /dev
*
* @return The IDs of the OCCs - 0, 1, etc.
*/
std::vector<int> findOCCsInDev();
#ifdef READ_OCC_SENSORS
/**
* @brief Gets the occ sensor values.
* @param[in] occ - pointer to OCCs Status object
* */
void getSensorValues(std::unique_ptr<Status>& occ);
/**
* @brief Trigger OCC driver to read the temperature sensors.
* @param[in] path - path of the OCC sensors.
* @param[in] id - Id of the OCC.
* */
void readTempSensors(const fs::path& path, uint32_t id);
/**
* @brief Trigger OCC driver to read the extended sensors.
* @param[in] path - path of the OCC sensors.
* @param[in] id - Id of the OCC.
* */
void readExtnSensors(const fs::path& path, uint32_t id);
/**
* @brief Trigger OCC driver to read the power sensors.
* @param[in] path - path of the OCC sensors.
* @param[in] id - Id of the OCC.
* */
void readPowerSensors(const fs::path& path, uint32_t id);
/** @brief Store the existing OCC sensors on D-BUS */
std::map<std::string, uint32_t> existingSensors;
/** @brief Get FunctionID from the `powerX_label` file.
* @param[in] value - the value of the `powerX_label` file.
* @returns FunctionID of the power sensors.
*/
std::optional<std::string> getPowerLabelFunctionID(
const std::string& value);
/** @brief The power sensor names map */
const std::map<std::string, std::string> powerSensorName = {
{"system", "total_power"}, {"1", "p0_mem_power"},
{"2", "p1_mem_power"}, {"3", "p2_mem_power"},
{"4", "p3_mem_power"}, {"5", "p0_power"},
{"6", "p1_power"}, {"7", "p2_power"},
{"8", "p3_power"}, {"9", "p0_cache_power"},
{"10", "p1_cache_power"}, {"11", "p2_cache_power"},
{"12", "p3_cache_power"}, {"13", "io_a_power"},
{"14", "io_b_power"}, {"15", "io_c_power"},
{"16", "fans_a_power"}, {"17", "fans_b_power"},
{"18", "storage_a_power"}, {"19", "storage_b_power"},
{"23", "mem_cache_power"}, {"25", "p0_mem_0_power"},
{"26", "p0_mem_1_power"}, {"27", "p0_mem_2_power"},
{"35", "pcie_dcm0_power"}, {"36", "pcie_dcm1_power"},
{"37", "pcie_dcm2_power"}, {"38", "pcie_dcm3_power"},
{"39", "io_dcm0_power"}, {"40", "io_dcm1_power"},
{"41", "io_dcm2_power"}, {"42", "io_dcm3_power"},
{"43", "avdd_total_power"}};
/** @brief The dimm temperature sensor names map */
const std::map<uint32_t, std::string> dimmTempSensorName = {
{internalMemCtlr, "_intmb_temp"},
{dimm, "_dram_temp"},
{memCtrlAndDimm, "_dram_extmb_temp"},
{PMIC, "_pmic_temp"},
{memCtlrExSensor, "_extmb_temp"}};
/** @brief The dimm DVFS temperature sensor names map */
const std::map<uint32_t, std::string> dimmDVFSSensorName = {
{internalMemCtlr, "dimm_intmb_dvfs_temp"},
{dimm, "dimm_dram_dvfs_temp"},
{memCtrlAndDimm, "dimm_dram_extmb_dvfs_temp"},
{PMIC, "dimm_pmic_dvfs_temp"},
{memCtlrExSensor, "dimm_extmb_dvfs_temp"}};
#endif
/** @brief Read the altitude from DBus */
void readAltitude();
/** @brief Callback function when ambient temperature changes
*
* @param[in] msg - Data associated with subscribed signal
*/
void ambientCallback(sdbusplus::message_t& msg);
/** @brief Confirm that a single OCC master was found and start presence
* monitoring
*/
void validateOccMaster();
};
} // namespace occ
} // namespace open_power