Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Starting on the matrix #337

Merged
merged 4 commits into from
Nov 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/HTML/DataFrame.html
Original file line number Diff line number Diff line change
Expand Up @@ -622,7 +622,7 @@ <H2 ID="2"><font color="blue">API Reference with code samples <font size="+4">&#
</tr>

<tr class="item" onmouseover="this.style.backgroundColor='#ffff66';" onmouseout="this.style.backgroundColor='#d4e3e5';">
<td title="These are other functionalities of DataFrame" style="text-align:center;background-color:LightGrey;color:DarkBlue">Gears &nbsp;&nbsp; <font size="+3">&#x2699;</font></td>
<td title="These are other functionalities of DataFrame" style="text-align:center;background-color:LightGrey;color:DarkBlue">Gears &amp; Stuff &nbsp;&nbsp; <font size="+3">&#x2699;</font></td>
</tr>

<tr class="item" onmouseover="this.style.backgroundColor='#ffff66';" onmouseout="this.style.backgroundColor='#d4e3e5';">
Expand Down
2 changes: 1 addition & 1 deletion docs/HTML/self_contained.html
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
It also has some disadvantages:
<UL>
<LI>There might be functionalities that are hard/time-consuming to implement that are already there</LI>
<LI>If you find a battle-test library, the debugging is already done for you</LI>
<LI>If you find a battle-tested library, the debugging is already done for you</LI>
<LI>There might be industry-wide standards/trends that you want to follow by using a reputed library</LI>
</UL>
<BR>
Expand Down
62 changes: 31 additions & 31 deletions include/DataFrame/DataFrameStatsVisitors.h
Original file line number Diff line number Diff line change
Expand Up @@ -6382,16 +6382,12 @@ struct LinearFitVisitor {
const H &x_begin, const H &x_end,
const H &y_begin, const H &y_end) {

const size_type col_s = std::distance(x_begin, x_end);
const size_type col_s =
std::min(std::distance(x_begin, x_end),
std::distance(y_begin, y_end));
const auto thread_level = (col_s < ThreadPool::MUL_THR_THHOLD)
? 0L : ThreadGranularity::get_thread_level();

#ifdef HMDF_SANITY_EXCEPTIONS
if (col_s != size_type(std::distance(y_begin, y_end)))
throw DataFrameError("LinearFitVisitor: two columns must be "
"of equal sizes");
#endif // HMDF_SANITY_EXCEPTIONS

value_type sum_x { 0 }; // Sum of all observed x
value_type sum_y { 0 }; // Sum of all observed y
value_type sum_x2 { 0 }; // Sum of all observed x squared
Expand Down Expand Up @@ -7543,30 +7539,32 @@ is_normal(const V &column, double epsl, bool check_for_standard) {
svisit.post();

const value_type mean = static_cast<value_type>(svisit.get_mean());
const value_type std = static_cast<value_type>(svisit.get_std());
const value_type high_band_1 = static_cast<value_type>(mean + std);
const value_type low_band_1 = static_cast<value_type>(mean - std);
const value_type stdev = static_cast<value_type>(svisit.get_std());
const value_type high_band_1 = static_cast<value_type>(mean + stdev);
const value_type low_band_1 = static_cast<value_type>(mean - stdev);
double count_1 = 0.0;
const value_type high_band_2 =
static_cast<value_type>(mean + std * 2.0);
const value_type low_band_2 = static_cast<value_type>(mean - std * 2.0);
static_cast<value_type>(mean + stdev * 2.0);
const value_type low_band_2 =
static_cast<value_type>(mean - stdev * 2.0);
double count_2 = 0.0;
const value_type high_band_3 =
static_cast<value_type>(mean + std * 3.0);
const value_type low_band_3 = static_cast<value_type>(mean - std * 3.0);
static_cast<value_type>(mean + stdev * 3.0);
const value_type low_band_3 =
static_cast<value_type>(mean - stdev * 3.0);
double count_3 = 0.0;

for (auto citer : column) [[likely]] {
if (citer >= low_band_1 && citer < high_band_1) {
for (const auto &val : column) [[likely]] {
if (val >= low_band_1 && val < high_band_1) {
count_3 += 1;
count_2 += 1;
count_1 += 1;
}
else if (citer >= low_band_2 && citer < high_band_2) {
else if (val >= low_band_2 && val < high_band_2) {
count_3 += 1;
count_2 += 1;
}
else if (citer >= low_band_3 && citer < high_band_3) {
else if (val >= low_band_3 && val < high_band_3) {
count_3 += 1;
}
}
Expand All @@ -7578,7 +7576,7 @@ is_normal(const V &column, double epsl, bool check_for_standard) {
std::fabs((count_3 / col_s) - 0.997) <= epsl) {
if (check_for_standard)
return (std::fabs(mean - 0) <= epsl &&
std::fabs(std - 1.0) <= epsl);
std::fabs(stdev - 1.0) <= epsl);
return (true);
}
return (false);
Expand All @@ -7597,28 +7595,30 @@ is_lognormal(const V &column, double epsl) {
StatsVisitor<value_type, int> log_visit;

svisit.pre();
for (auto citer : column) [[likely]] {
svisit(dummy_idx, static_cast<value_type>(std::log(citer)));
log_visit(dummy_idx, citer);
for (auto val : column) [[likely]] {
svisit(dummy_idx, static_cast<value_type>(std::log(val)));
log_visit(dummy_idx, val);
}
svisit.post();

const value_type mean = static_cast<value_type>(svisit.get_mean());
const value_type std = static_cast<value_type>(svisit.get_std());
const value_type high_band_1 = static_cast<value_type>(mean + std);
const value_type low_band_1 = static_cast<value_type>(mean - std);
const value_type stdev = static_cast<value_type>(svisit.get_std());
const value_type high_band_1 = static_cast<value_type>(mean + stdev);
const value_type low_band_1 = static_cast<value_type>(mean - stdev);
double count_1 = 0.0;
const value_type high_band_2 =
static_cast<value_type>(mean + std * 2.0);
const value_type low_band_2 = static_cast<value_type>(mean - std * 2.0);
static_cast<value_type>(mean + stdev * 2.0);
const value_type low_band_2 =
static_cast<value_type>(mean - stdev * 2.0);
double count_2 = 0.0;
const value_type high_band_3 =
static_cast<value_type>(mean + std * 3.0);
const value_type low_band_3 = static_cast<value_type>(mean - std * 3.0);
static_cast<value_type>(mean + stdev * 3.0);
const value_type low_band_3 =
static_cast<value_type>(mean - stdev * 3.0);
double count_3 = 0.0;

for (auto citer : column) [[likely]] {
const auto log_val = std::log(citer);
for (const auto &val : column) [[likely]] {
const auto log_val = std::log(val);

if (log_val >= low_band_1 && log_val < high_band_1) {
count_3 += 1;
Expand Down
Loading
Loading