From 78741990f3706ac0d159bff1c32fb3bbc3a6870f Mon Sep 17 00:00:00 2001 From: Sermet Pekin Date: Sun, 8 Dec 2024 23:00:32 +0300 Subject: [PATCH] ch1 mismatch shared_ptr Value --- Makefile | 1 + easy_df.cpp | 42 +++++++++ include/console_utils.hpp | 13 +-- include/data_utils.hpp | 13 +++ include/dataframe.hpp | 94 ++++++++++++++++++-- include/datasetType.hpp | 38 ++++++++ include/loss.hpp | 31 ++++--- include/micrograd.hpp | 1 + include/train_eval.hpp | 181 ++++++++++++++++++++++++++++++++++++++ include/types.hpp | 1 + include/value.hpp | 18 +++- main.cpp | 15 ++++ tests/test_cross_ent.cpp | 41 +++++++++ 13 files changed, 466 insertions(+), 23 deletions(-) create mode 100644 easy_df.cpp create mode 100644 include/train_eval.hpp create mode 100644 tests/test_cross_ent.cpp diff --git a/Makefile b/Makefile index d3fa511..bf010b3 100644 --- a/Makefile +++ b/Makefile @@ -40,6 +40,7 @@ clean: rm -f $(OBJS) $(TARGET) $(TEST_TARGET) test: clean test_only +pytest: clean test_only .PHONY: clean run test diff --git a/easy_df.cpp b/easy_df.cpp new file mode 100644 index 0000000..27bd692 --- /dev/null +++ b/easy_df.cpp @@ -0,0 +1,42 @@ +#include "micrograd.hpp" +using namespace microgradCpp; + +int main() +{ + + // DatasetType dataset = get_iris(); + + DataFrame df; + df.from_csv("./data/iris.csv"); + + df.encode_column("variety"); + + df.print(); + + // return 0; + // shuffle(dataset); + double TRAIN_SIZE{0.8}; + + // Create MLP model + // Input: 4 features, hidden layers: [7,7], output: 3 classes + // Define the model and hyperparameters + MLP model(4, {10, 10, 3}); + double learning_rate = 0.01; + int epochs = 2; + // Train and evaluate the model + train_eval(df, TRAIN_SIZE, model, learning_rate, epochs); + + return 0; +} + +/* +Notes +----------- + +g++ -std=c++17 -Iinclude -O2 -o main easy_df.cpp + +// or +make run + + +*/ diff --git a/include/console_utils.hpp b/include/console_utils.hpp index 1fb132b..926011a 100644 --- a/include/console_utils.hpp +++ b/include/console_utils.hpp @@ -20,8 +20,8 @@ namespace microgradCpp std::exit(EXIT_FAILURE); } - - inline void epic_out_of_range(const std::string &reason ) + + inline void epic_out_of_range(const std::string &reason) { std::cout << "\nšŸ’„šŸ’„šŸ’„ BOOM! šŸ’„šŸ’„šŸ’„" << std::endl; std::cout << "āŒ Uh-oh! Something went wrong: [ šŸ”„ " << reason << " šŸ”„ ] " << std::endl; @@ -29,10 +29,13 @@ namespace microgradCpp std::cout << "šŸ“‰ Better luck next time, brave coder!" << std::endl; std::cout << "šŸ”„šŸ”„šŸ”„ Program terminated. šŸ”„šŸ”„šŸ”„\n" << std::endl; - - throw std::out_of_range(reason) ; - } + throw std::out_of_range(reason); + } + inline void stop(const std::string &reason = "...") + { + epic_out_of_range(reason); + } // Function to format shapes for display inline std::string format_shape(size_t rows, size_t cols) diff --git a/include/data_utils.hpp b/include/data_utils.hpp index 1f21fae..b639c82 100644 --- a/include/data_utils.hpp +++ b/include/data_utils.hpp @@ -14,9 +14,22 @@ #include #include "mlp.hpp" #include "console_utils.hpp" +#include "types.hpp" +using namespace microgradCpp; using vv_string = std::vector>; using vv_double = std::vector>; + +static inline v_shared_Value one_hot_encode(int class_index, int num_classes) +{ + + v_shared_Value target(num_classes, std::make_shared(0.0)); + + target[class_index] = std::make_shared(1.0); + + return target; +} + inline void log_model_info(const std::vector &layer_sizes, size_t input_features, size_t output_targets, diff --git a/include/dataframe.hpp b/include/dataframe.hpp index a62e420..2815e01 100644 --- a/include/dataframe.hpp +++ b/include/dataframe.hpp @@ -54,6 +54,7 @@ #include "header.hpp" #include "range.hpp" #include "console_utils.hpp" +#include "types.hpp" namespace microgradCpp @@ -78,6 +79,12 @@ namespace microgradCpp static inline bool DEFAULT_INPLACE = true; + int size() const + { + + return get_all_row_indices().size(); + } + DataFrame operator()(const std::initializer_list &row_indices, const std::vector &col_names) { return this->slice(std::vector(row_indices.begin(), row_indices.end()), col_names, DEFAULT_INPLACE); @@ -96,6 +103,82 @@ namespace microgradCpp return this->slice(numbers, col_names, DEFAULT_INPLACE); } + DataFrame rows(const Range &range) + { + + auto numbers = range.to_vector(); + + return this->slice(numbers, column_order, DEFAULT_INPLACE); + } + + v_string v(const Range &column_range) + { + v_string items; + for (size_t i = 0; i < column_order.size(); ++i) + { + if (column_range.includes(i)) + { + items.push_back(column_order[i]); + } + } + return items; + } + + vv_double to_vv_double() const + { + vv_double result; + + if (columns.empty()) + return result; + + // Determine the number of rows based on the first column + size_t num_rows = columns.begin()->second.size(); + + // Iterate through each row + for (size_t i = 0; i < num_rows; ++i) + { + std::vector row; + for (const auto &col_name : column_order) + { + const auto &col = columns.at(col_name); + if (i < col.size()) + { + const auto &cell = col[i]; + if (std::holds_alternative(cell)) + { + row.push_back(std::get(cell)); + } + else + { + row.push_back(0.0); + } + } + } + result.push_back(row); + } + + return result; + } + + // vv_string v(const Range &colum_range){ + + // vv_string items ; + // for(int i =0 ; i< column_order.size() ; i++ ){ + // if( colum_range.includes( i )) + // items.push_back( column_order[ i ]) ; + + // } + // return items ; + + // } + DataFrame subset(const Range &range, const Range &colum_range) + { + + auto numbers = range.to_vector(); + + return this->slice(numbers, column_order, DEFAULT_INPLACE); + } + DataFrame operator()(const Range &range) { @@ -115,6 +198,11 @@ namespace microgradCpp return this->slice(get_all_row_indices(), column_order, DEFAULT_INPLACE); } + // DataFrame operator()(const Range &range) + // { + // return this->slice(range.to_vector(), column_order, DEFAULT_INPLACE); + // } + DataFrame operator()(const std::vector &row_indices) { return this->slice(row_indices, column_order, DEFAULT_INPLACE); @@ -131,12 +219,6 @@ namespace microgradCpp return this->slice(get_all_row_indices(), col_names, inplace); } - - - - - - DataFrame slice(const std::vector &row_indices, const std::vector &col_names, bool inplace = DEFAULT_INPLACE) { diff --git a/include/datasetType.hpp b/include/datasetType.hpp index 71a8d35..8d2e6a7 100644 --- a/include/datasetType.hpp +++ b/include/datasetType.hpp @@ -6,6 +6,7 @@ #include #include "value.hpp" #include "types.hpp" +#include "dataframe.hpp" using namespace microgradCpp; inline DatasetType convert_to_dataset(const vv_double &data, int target_column = -1) @@ -43,6 +44,43 @@ inline DatasetType convert_to_dataset(const vv_double &data, int target_column = return dataset; } +inline DatasetType convert_to_dataset(const DataFrame &df, int target_column = -1) +{ + DatasetType dataset; + vv_double data = df.to_vv_double(); + + + for (const auto &row : data) + { + if (row.empty()) + { + continue; // Skip empty rows + } + + // Determine target column index + int target_idx = (target_column == -1) ? row.size() - 1 : target_column; + // Create inputs and targets + std::vector> inputs; + std::vector> targets; + for (size_t i = 0; i < row.size(); ++i) + { + if (static_cast(i) == target_idx) + { + targets.push_back(std::make_shared(row[i])); + } + else + { + inputs.push_back(std::make_shared(row[i])); + } + } + + // Add the pair to the dataset + dataset.emplace_back(inputs, targets); + } + + return dataset; +} + #include #include #include diff --git a/include/loss.hpp b/include/loss.hpp index 69e1fb3..5087d55 100644 --- a/include/loss.hpp +++ b/include/loss.hpp @@ -24,31 +24,40 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#include "value.hpp" #include #include #include #include +#include "value.hpp" +#include "console_utils.hpp" +#include "data_utils.hpp" +using namespace microgradCpp ; - - - -class Loss { +class Loss +{ public: static std::shared_ptr cross_entropy( - const std::vector>& predictions, - const std::vector>& targets - ) { + const std::vector> &predictions, + const std::vector> &targets) + { // Assumes: // 1. predictions are already probabilities (from softmax in MLP forward) // 2. targets are one-hot encoded: exactly one element is 1, others are 0 // cross entropy = -sum_i t_i * log(p_i) - auto loss = std::make_shared(0.0); - for (size_t i = 0; i < predictions.size(); ++i) { + // auto XX = one_hot_encode( targets , 3 ) ; + + if (predictions.size() != targets.size() || !(predictions.size() > 0) ){ + + std::cout << predictions.size() << " predictions <== ==> targets " <(0.0); + + for (size_t i = 0; i < predictions.size(); ++i) + { // log(p_i) auto logp = predictions[i]->log(); // accumulate t_i * log(p_i) diff --git a/include/micrograd.hpp b/include/micrograd.hpp index 3f3ff50..f5ae96e 100644 --- a/include/micrograd.hpp +++ b/include/micrograd.hpp @@ -32,6 +32,7 @@ THE SOFTWARE. #include "dataframe.hpp" #include "dataframe_utils.hpp" #include "sp_testing_utils.hpp" +#include "train_eval.hpp" #include "value.hpp" #include "iris.hpp" diff --git a/include/train_eval.hpp b/include/train_eval.hpp new file mode 100644 index 0000000..37100cf --- /dev/null +++ b/include/train_eval.hpp @@ -0,0 +1,181 @@ +#pragma once + +#include +#include +#include +#include "dataframe.hpp" +#include "mlp.hpp" +#include "sgd.hpp" +#include "loss.hpp" +#include "dataframe.hpp" +#include "types.hpp" +#include "datasetType.hpp" + +using namespace microgradCpp; + +// Function to split the DataFrame into train and test sets +// inline void train_test_split(DataFrame &df, double train_size, +// std::vector> &train_inputs, +// std::vector> &train_targets, +// std::vector> &test_inputs, +// std::vector> &test_targets) +// { +// size_t total_rows = 150; // TODO +// size_t train_rows = static_cast(total_rows * train_size); + +// DataFrame train_df = df.rows(Range(0, train_rows)); +// DataFrame test_df = df.rows(Range(train_rows, total_rows)); + +// // Assuming inputs are all columns except the last one, and targets are in the last column +// // train_inputs = train_df.get_inputs(); // Extract inputs from train DataFrame +// // train_targets = train_df.get_targets(); // Extract targets from train DataFrame + +// // test_inputs = test_df.get_inputs(); // Extract inputs from test DataFrame +// // test_targets = test_df.get_targets(); // Extract targets from test DataFrame +// } + +// The updated train_eval function +inline void train_test_split( + const DataFrame &df, + double TRAIN_SIZE, + ColRows &train_inputs, + ColRows &train_targets, + ColRows &test_inputs, + ColRows &test_targets) +{ + + // size_t train_size = static_cast(df.size() * TRAIN_SIZE); + + DatasetType dataset; + + dataset = convert_to_dataset(df); + size_t train_size = static_cast(dataset.size() * TRAIN_SIZE); + + for (size_t i = 0; i < train_size; ++i) + { + train_inputs.push_back(dataset[i].first); + train_targets.push_back(dataset[i].second); + } + for (size_t i = train_size; i < dataset.size(); ++i) + { + test_inputs.push_back(dataset[i].first); + test_targets.push_back(dataset[i].second); + } +} +inline void print(const ColRows &colrows) +{ + for (size_t i = 0; i < colrows.size(); ++i) + { + std::cout << "Row " << i << ": "; + for (const auto &cell : colrows[i]) + { + if (cell) + { + std::cout << cell->data << " "; + } + else + { + std::cout << "NaN "; + } + } + std::cout << "\n"; + } +} + +// std::vector> one_hot_encode(int class_index, int num_classes) +// { + +// std::vector> target(num_classes, std::make_shared(0.0)); + +// target[class_index] = std::make_shared(1.0); + +// return target; +// } + +/*======================== train_eval DataFrame =================================================*/ +inline void train_eval(DataFrame &df, double train_size, MLP &model, double lr = 0.01, int epochs = 100) +{ + + ColRows train_inputs, train_targets; + ColRows test_inputs, test_targets; + + train_test_split(df, train_size, train_inputs, train_targets, test_inputs, test_targets); + + // Create SGD optimizer + SGD optimizer(lr); + + auto start = std::chrono::high_resolution_clock::now(); + + print(test_targets); + print(train_targets); + print(train_inputs); + print(test_inputs); + // throw std::runtime_error("[ok]"); + + // return std::EXIT_SUCCESS ; + + // Training loop + for (int epoch = 0; epoch < epochs; ++epoch) + { + double total_loss = 0.0; + std::cout << "[ Here it is ] ...Epoch " << epoch + 1 << "/" << epochs << ", Loss: " << total_loss / train_inputs.size() << std::endl; + + size_t NUM_Training = train_inputs.size(); + + for (size_t i = 0; i < NUM_Training; ++i) + { + + // Forward pass (training=true) + auto predictions = model.forward(train_inputs[i], true); + std::cout << "[ Here it is ] predictions "; + + // int num_classes = predictions.size(); // Number of classes inferred from predictions + // v_shared_Value target = one_hot_encode(train_targets[i], num_classes); // Convert class index to one-hot + + // Compute Cross-Entropy Loss + auto loss = Loss::cross_entropy(predictions, train_targets[i]); + total_loss += loss->data; + + // Backpropagation + optimizer.zero_grad(model.parameters()); + loss->backward(); + + // Update weights + optimizer.step(model.parameters()); + } + + std::cout << "Epoch " << epoch + 1 << "/" << epochs << ", Loss: " << total_loss / train_inputs.size() << std::endl; + + // Evaluate test accuracy every 10 epochs and on the last epoch + if (epoch % 10 == 0 || epoch == epochs - 1) + { + int correct = 0; + + size_t NUM_Test = test_inputs.size(); + + for (size_t i = 0; i < NUM_Test; ++i) + { + auto predictions = model.forward(test_inputs[i], false); + + // Find predicted class + int predicted_class = std::distance(predictions.begin(), std::max_element(predictions.begin(), predictions.end())); + + // Check if prediction matches the target + if (test_targets[i][predicted_class]->data == 1.0) + { + correct++; + } + } + + double accuracy = static_cast(correct) / test_inputs.size(); + std::cout << "Epoch " << epoch + 1 << ": Test Accuracy = " << accuracy * 100.0 << "%" << std::endl; + + if (epoch == epochs - 1) + { + auto end = std::chrono::high_resolution_clock::now(); + std::chrono::duration duration = end - start; + std::cout << "Duration: " << duration.count() << " seconds" << std::endl; + } + } + } +} diff --git a/include/types.hpp b/include/types.hpp index 3419fd5..49774bf 100644 --- a/include/types.hpp +++ b/include/types.hpp @@ -44,6 +44,7 @@ namespace microgradCpp using ColumnData = std::variant, std::vector>; using vv_shared_Value = std::vector>>; + using v_shared_Value = std::vector>; using DatasetType = std::vector>, std::vector>>>; using ColRows = std::vector>>; diff --git a/include/value.hpp b/include/value.hpp index dfa6643..fee5f35 100644 --- a/include/value.hpp +++ b/include/value.hpp @@ -48,7 +48,7 @@ class Value : public std::enable_shared_from_this mutable bool topo_cached = false; // Constructor - Value(double data, const std::string &label = "", bool cache_topology = true ) + Value(double data, const std::string &label = "", bool cache_topology = true) : data(data), grad(0.0), label(label), @@ -69,6 +69,17 @@ class Value : public std::enable_shared_from_this parents.push_back(parent); } } + // Member function for Value == double + bool operator==(double val) const + { + return data == val; + } + + // Friend function for double == Value + friend bool operator==(double val, const Value &v) + { + return val == v.data; + } // Build topological order for backpropagation (with caching) std::vector> build_topological_order() @@ -83,6 +94,11 @@ class Value : public std::enable_shared_from_this std::function visit = [&](Value *v) { + if (!v) + { + throw std::runtime_error("Null pointer encountered in build_topological_order"); + } + if (v && visited.find(v) == visited.end()) { visited.insert(v); diff --git a/main.cpp b/main.cpp index 17a81cb..bac1336 100644 --- a/main.cpp +++ b/main.cpp @@ -158,4 +158,19 @@ g++ -std=c++17 -Iinclude -O2 -o main main.cpp make run + g++ -std=c++20 -Iinclude -O2 -g -o main easy_df.cpp + +g++ -std=c++20 -Iinclude -g -O0 -o main easy_df.cpp + +lldb ./main + +r +bt + +... g++ -std=c++20 -fsanitize=address -g -o main main.cpp + +Address Sanitizer will provide detailed error messages if there are invalid memory accesses. +g++ -std=c++20 -fsanitize=address -Iinclude -g -O0 -o main easy_df.cpp +./main + */ diff --git a/tests/test_cross_ent.cpp b/tests/test_cross_ent.cpp new file mode 100644 index 0000000..04430ff --- /dev/null +++ b/tests/test_cross_ent.cpp @@ -0,0 +1,41 @@ +#include +#include +#include +#include "value.hpp" +#include "loss.hpp" +#include "micrograd.hpp" +#include "types.hpp" +using namespace microgradCpp; + +// Helper function to create a Value pointer +std::shared_ptr make_value(double data) +{ + return std::make_shared(data); +} + +// Test for Loss::cross_entropy +TEST(LossTest, CrossEntropyLoss) +{ + // Example predictions: probabilities for 3 classes + std::vector> predictions = { + make_value(0.7), // Class 0 + make_value(0.2), // Class 1 + make_value(0.1) // Class 2 + }; + + // Target: class 1 (one-hot encoded) + std::vector> targets = { + make_value(0.0), // Class 0 + make_value(1.0), // Class 1 (correct class) + make_value(0.0) // Class 2 + }; + + // Compute the cross-entropy loss + auto loss = Loss::cross_entropy(predictions, targets); + + // Expected cross-entropy loss: -log(p_1) = -log(0.2) ā‰ˆ 1.6094 + double expected_loss = -std::log(0.2); + + // Check if the computed loss is approximately equal to the expected loss + ASSERT_NEAR(loss->data, expected_loss, 1e-4); +}