Skip to content

Commit

Permalink
Merge pull request #337 from hosseinmoein/Hossein/Matrix
Browse files Browse the repository at this point in the history
Starting on the matrix
  • Loading branch information
hosseinmoein authored Nov 13, 2024
2 parents b7b58e9 + 266e347 commit 6bf1500
Show file tree
Hide file tree
Showing 8 changed files with 1,659 additions and 37 deletions.
2 changes: 1 addition & 1 deletion docs/HTML/DataFrame.html
Original file line number Diff line number Diff line change
Expand Up @@ -622,7 +622,7 @@ <H2 ID="2"><font color="blue">API Reference with code samples <font size="+4">&#
</tr>

<tr class="item" onmouseover="this.style.backgroundColor='#ffff66';" onmouseout="this.style.backgroundColor='#d4e3e5';">
<td title="These are other functionalities of DataFrame" style="text-align:center;background-color:LightGrey;color:DarkBlue">Gears &nbsp;&nbsp; <font size="+3">&#x2699;</font></td>
<td title="These are other functionalities of DataFrame" style="text-align:center;background-color:LightGrey;color:DarkBlue">Gears &amp; Stuff &nbsp;&nbsp; <font size="+3">&#x2699;</font></td>
</tr>

<tr class="item" onmouseover="this.style.backgroundColor='#ffff66';" onmouseout="this.style.backgroundColor='#d4e3e5';">
Expand Down
2 changes: 1 addition & 1 deletion docs/HTML/self_contained.html
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
It also has some disadvantages:
<UL>
<LI>There might be functionalities that are hard/time-consuming to implement that are already there</LI>
<LI>If you find a battle-test library, the debugging is already done for you</LI>
<LI>If you find a battle-tested library, the debugging is already done for you</LI>
<LI>There might be industry-wide standards/trends that you want to follow by using a reputed library</LI>
</UL>
<BR>
Expand Down
62 changes: 31 additions & 31 deletions include/DataFrame/DataFrameStatsVisitors.h
Original file line number Diff line number Diff line change
Expand Up @@ -6382,16 +6382,12 @@ struct LinearFitVisitor {
const H &x_begin, const H &x_end,
const H &y_begin, const H &y_end) {

const size_type col_s = std::distance(x_begin, x_end);
const size_type col_s =
std::min(std::distance(x_begin, x_end),
std::distance(y_begin, y_end));
const auto thread_level = (col_s < ThreadPool::MUL_THR_THHOLD)
? 0L : ThreadGranularity::get_thread_level();

#ifdef HMDF_SANITY_EXCEPTIONS
if (col_s != size_type(std::distance(y_begin, y_end)))
throw DataFrameError("LinearFitVisitor: two columns must be "
"of equal sizes");
#endif // HMDF_SANITY_EXCEPTIONS

value_type sum_x { 0 }; // Sum of all observed x
value_type sum_y { 0 }; // Sum of all observed y
value_type sum_x2 { 0 }; // Sum of all observed x squared
Expand Down Expand Up @@ -7543,30 +7539,32 @@ is_normal(const V &column, double epsl, bool check_for_standard) {
svisit.post();

const value_type mean = static_cast<value_type>(svisit.get_mean());
const value_type std = static_cast<value_type>(svisit.get_std());
const value_type high_band_1 = static_cast<value_type>(mean + std);
const value_type low_band_1 = static_cast<value_type>(mean - std);
const value_type stdev = static_cast<value_type>(svisit.get_std());
const value_type high_band_1 = static_cast<value_type>(mean + stdev);
const value_type low_band_1 = static_cast<value_type>(mean - stdev);
double count_1 = 0.0;
const value_type high_band_2 =
static_cast<value_type>(mean + std * 2.0);
const value_type low_band_2 = static_cast<value_type>(mean - std * 2.0);
static_cast<value_type>(mean + stdev * 2.0);
const value_type low_band_2 =
static_cast<value_type>(mean - stdev * 2.0);
double count_2 = 0.0;
const value_type high_band_3 =
static_cast<value_type>(mean + std * 3.0);
const value_type low_band_3 = static_cast<value_type>(mean - std * 3.0);
static_cast<value_type>(mean + stdev * 3.0);
const value_type low_band_3 =
static_cast<value_type>(mean - stdev * 3.0);
double count_3 = 0.0;

for (auto citer : column) [[likely]] {
if (citer >= low_band_1 && citer < high_band_1) {
for (const auto &val : column) [[likely]] {
if (val >= low_band_1 && val < high_band_1) {
count_3 += 1;
count_2 += 1;
count_1 += 1;
}
else if (citer >= low_band_2 && citer < high_band_2) {
else if (val >= low_band_2 && val < high_band_2) {
count_3 += 1;
count_2 += 1;
}
else if (citer >= low_band_3 && citer < high_band_3) {
else if (val >= low_band_3 && val < high_band_3) {
count_3 += 1;
}
}
Expand All @@ -7578,7 +7576,7 @@ is_normal(const V &column, double epsl, bool check_for_standard) {
std::fabs((count_3 / col_s) - 0.997) <= epsl) {
if (check_for_standard)
return (std::fabs(mean - 0) <= epsl &&
std::fabs(std - 1.0) <= epsl);
std::fabs(stdev - 1.0) <= epsl);
return (true);
}
return (false);
Expand All @@ -7597,28 +7595,30 @@ is_lognormal(const V &column, double epsl) {
StatsVisitor<value_type, int> log_visit;

svisit.pre();
for (auto citer : column) [[likely]] {
svisit(dummy_idx, static_cast<value_type>(std::log(citer)));
log_visit(dummy_idx, citer);
for (auto val : column) [[likely]] {
svisit(dummy_idx, static_cast<value_type>(std::log(val)));
log_visit(dummy_idx, val);
}
svisit.post();

const value_type mean = static_cast<value_type>(svisit.get_mean());
const value_type std = static_cast<value_type>(svisit.get_std());
const value_type high_band_1 = static_cast<value_type>(mean + std);
const value_type low_band_1 = static_cast<value_type>(mean - std);
const value_type stdev = static_cast<value_type>(svisit.get_std());
const value_type high_band_1 = static_cast<value_type>(mean + stdev);
const value_type low_band_1 = static_cast<value_type>(mean - stdev);
double count_1 = 0.0;
const value_type high_band_2 =
static_cast<value_type>(mean + std * 2.0);
const value_type low_band_2 = static_cast<value_type>(mean - std * 2.0);
static_cast<value_type>(mean + stdev * 2.0);
const value_type low_band_2 =
static_cast<value_type>(mean - stdev * 2.0);
double count_2 = 0.0;
const value_type high_band_3 =
static_cast<value_type>(mean + std * 3.0);
const value_type low_band_3 = static_cast<value_type>(mean - std * 3.0);
static_cast<value_type>(mean + stdev * 3.0);
const value_type low_band_3 =
static_cast<value_type>(mean - stdev * 3.0);
double count_3 = 0.0;

for (auto citer : column) [[likely]] {
const auto log_val = std::log(citer);
for (const auto &val : column) [[likely]] {
const auto log_val = std::log(val);

if (log_val >= low_band_1 && log_val < high_band_1) {
count_3 += 1;
Expand Down
Loading

0 comments on commit 6bf1500

Please sign in to comment.