Skip to content

Commit

Permalink
chnages to single feature tree building that should not affect outputs
Browse files Browse the repository at this point in the history
  • Loading branch information
paulbkoch committed Jan 5, 2025
1 parent 4480350 commit f9b42c2
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 94 deletions.
25 changes: 16 additions & 9 deletions shared/libebm/BoosterCore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -823,20 +823,26 @@ ErrorEbm BoosterCore::Create(void* const rng,
}

const size_t cBytesPerMainBin = GetBinSize<FloatMain, UIntMain>(true, true, bHessian, cScores);

if(IsAddError(cBytesPerMainBin, sizeof(void*))) {
LOG_0(Trace_Warning, "WARNING BoosterCore::Create IsAddError(cBytesPerMainBin, sizeof(void*))");
return Error_OutOfMemory;
}
const size_t cBytesMainBinPlusPointer = cBytesPerMainBin + sizeof(void*);
if(IsMultiplyError(cBytesMainBinPlusPointer, cMainBinsMax)) {
if(IsMultiplyError(cBytesPerMainBin, cMainBinsMax)) {
LOG_0(Trace_Warning, "WARNING BoosterCore::Create IsMultiplyError(cBytesPerMainBin, cMainBinsMax)");
return Error_OutOfMemory;
}
// we also allocate enough space to create an additional array of pointers
pBoosterCore->m_cBytesMainBins = cBytesMainBinPlusPointer * cMainBinsMax;
size_t cBytesMainBins = cBytesPerMainBin * cMainBinsMax;

if(0 != cSingleDimensionBinsMax) {
if(IsAddError(cBytesPerMainBin, sizeof(void*))) {
LOG_0(Trace_Warning, "WARNING BoosterCore::Create IsAddError(cBytesPerMainBin, sizeof(void*))");
return Error_OutOfMemory;
}
const size_t cBytesMainBinPlusPointer = cBytesPerMainBin + sizeof(void*);
if(IsMultiplyError(cBytesMainBinPlusPointer, cSingleDimensionBinsMax)) {
LOG_0(Trace_Warning,
"WARNING BoosterCore::Create IsMultiplyError(cBytesMainBinPlusPointer, cSingleDimensionBinsMax)");
return Error_OutOfMemory;
}
// we also allocate enough space to create an additional array of pointers
cBytesMainBins = EbmMax(cBytesMainBins, cBytesMainBinPlusPointer * cSingleDimensionBinsMax);

if(IsOverflowTreeNodeSize(bHessian, cScores) || IsOverflowSplitPositionSize(bHessian, cScores)) {
LOG_0(Trace_Warning, "WARNING BoosterCore::Create bin tracking size overflow");
return Error_OutOfMemory;
Expand Down Expand Up @@ -881,6 +887,7 @@ ErrorEbm BoosterCore::Create(void* const rng,
EBM_ASSERT(0 == pBoosterCore->m_cBytesSplitPositions);
EBM_ASSERT(0 == pBoosterCore->m_cBytesTreeNodes);
}
pBoosterCore->m_cBytesMainBins = cBytesMainBins;
}
if(0 != cTerms) {
error = InitializeTensors(cTerms, pBoosterCore->m_apTerms, cScores, &pBoosterCore->m_apCurrentTermTensors);
Expand Down
6 changes: 3 additions & 3 deletions shared/libebm/CalcInteractionStrength.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,16 +178,16 @@ EBM_API_BODY ErrorEbm EBM_CALLING_CONVENTION CalcInteractionStrength(Interaction
LOG_0(Trace_Warning, "WARNING CalcInteractionStrength maxCardinality can't be less than 0. Turning off.");
}

size_t cSamplesLeafMin = size_t{0}; // this is the min value
if(IntEbm{0} <= minSamplesLeaf) {
size_t cSamplesLeafMin = size_t{1}; // this is the min value
if(IntEbm{1} <= minSamplesLeaf) {
cSamplesLeafMin = static_cast<size_t>(minSamplesLeaf);
if(IsConvertError<size_t>(minSamplesLeaf)) {
// we can never exceed a size_t number of samples, so let's just set it to the maximum if we were going to
// overflow because it will generate the same results as if we used the true number
cSamplesLeafMin = std::numeric_limits<size_t>::max();
}
} else {
LOG_0(Trace_Warning, "WARNING CalcInteractionStrength minSamplesLeaf can't be less than 0. Adjusting to 0.");
LOG_0(Trace_Warning, "WARNING CalcInteractionStrength minSamplesLeaf can't be less than 1. Adjusting to 1.");
}

FloatCalc hessianMin = static_cast<FloatCalc>(minHessian);
Expand Down
12 changes: 6 additions & 6 deletions shared/libebm/GenerateTermUpdate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -781,16 +781,16 @@ EBM_API_BODY ErrorEbm EBM_CALLING_CONVENTION GenerateTermUpdate(void* rng,
LOG_0(Trace_Warning, "WARNING GenerateTermUpdate learningRate is negative");
}

size_t cSamplesLeafMin = size_t{0}; // this is the min value
if(IntEbm{0} <= minSamplesLeaf) {
size_t cSamplesLeafMin = size_t{1}; // this is the min value
if(IntEbm{1} <= minSamplesLeaf) {
cSamplesLeafMin = static_cast<size_t>(minSamplesLeaf);
if(IsConvertError<size_t>(minSamplesLeaf)) {
// we can never exceed a size_t number of samples, so let's just set it to the maximum if we were going to
// overflow because it will generate the same results as if we used the true number
cSamplesLeafMin = std::numeric_limits<size_t>::max();
}
} else {
LOG_0(Trace_Warning, "WARNING GenerateTermUpdate minSamplesLeaf can't be less than 0. Adjusting to 0.");
LOG_0(Trace_Warning, "WARNING GenerateTermUpdate minSamplesLeaf can't be less than 1. Adjusting to 1.");
}

FloatCalc hessianMin = static_cast<FloatCalc>(minHessian);
Expand Down Expand Up @@ -820,16 +820,16 @@ EBM_API_BODY ErrorEbm EBM_CALLING_CONVENTION GenerateTermUpdate(void* rng,
deltaStepMax = std::numeric_limits<FloatCalc>::infinity();
}

size_t cCategorySamplesMin = size_t{0}; // this is the min value
if(IntEbm{0} <= minCategorySamples) {
size_t cCategorySamplesMin = size_t{1}; // this is the min value
if(IntEbm{1} <= minCategorySamples) {
cCategorySamplesMin = static_cast<size_t>(minCategorySamples);
if(IsConvertError<size_t>(minCategorySamples)) {
// we can never exceed a size_t number of samples, so let's just set it to the maximum if we were going to
// overflow because it will generate the same results as if we used the true number
cCategorySamplesMin = std::numeric_limits<size_t>::max();
}
} else {
LOG_0(Trace_Warning, "WARNING GenerateTermUpdate minSamplesLeaf can't be less than 0. Adjusting to 0.");
LOG_0(Trace_Warning, "WARNING GenerateTermUpdate minSamplesLeaf can't be less than 1. Adjusting to 1.");
}

FloatCalc categoricalSmoothingCalc = static_cast<FloatCalc>(categoricalSmoothing);
Expand Down
Loading

0 comments on commit f9b42c2

Please sign in to comment.