diff --git a/csv_divider/csv_divider.ipynb b/csv_divider/csv_divider.ipynb index 3935efd..2cbb677 100644 --- a/csv_divider/csv_divider.ipynb +++ b/csv_divider/csv_divider.ipynb @@ -33,7 +33,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -132,7 +132,7 @@ "max_file_size_mb = 75\n", "\n", "\n", - "split_csv(input_path, output_dir, max_file_size_mb, delimiter=',')" + "split_csv(input_path, output_dir, max_file_size_mb, delimiter=',', output_file_name='chunk')" ] }, { @@ -420,6 +420,65 @@ "# Concatenated data head\n", "concatenated_data.head()" ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Real life example" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "~/python_sc/Learning/Python_learning/NLTK/data/amazon.csv\n" + ] + } + ], + "source": [ + "import os\n", + "\n", + "# Get the current working directory\n", + "current_dir = os.getcwd()\n", + "\n", + "input_path = \"~/python_sc/Learning/Python_learning/NLTK/data/amazon.csv\"\n", + "\n", + "\n", + "# Path to the amazon.csv file\n", + "file_path = os.path.join(input_path, \"amazon.csv\")\n", + "print(file_path)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Saved 1 / 2\n", + "Saved 2 / 2\n" + ] + } + ], + "source": [ + "# specifyings args and kwargs, file_path, output_dir, max file size in delimiter bytes, and delimier\n", + "input_path = \"/Users/florian/python_sc/Learning/Python_learning/NLTK/data/amazon.csv\"\n", + "output_dir = \"/Users/florian/python_sc/Learning/Python_learning/NLTK/data\"\n", + "max_file_size_mb = 75\n", + "\n", + "\n", + "split_csv(input_path, output_dir, max_file_size_mb, delimiter=',', output_file_name=\"amazon\")" + ] } ], "metadata": {