Merge pull request #18 from ferdlestier/enhance-logging

Improve error handling and logging in parser
ferdlestier · Apr 26, 2024 · 4d4cb5c · 4d4cb5c
2 parents 5704680 + bb1eae7
commit 4d4cb5c
Show file tree

Hide file tree

Showing 2 changed files with 126 additions and 124 deletions.
diff --git a/README.md b/README.md
@@ -1,3 +1,13 @@
 # dataparsing
 
 The idea of this script is to run a showcase on how to parse a csv file, with data from my portfolio using only pandas.
+
+## Error Handling and Logging Practices
+
+To ensure the robustness and reliability of the `parser.py` script, error handling and logging have been significantly improved. These enhancements aim to catch potential errors during data parsing or file operations and provide a structured approach to monitoring and debugging the script's execution.
+
+### Viewing and Interpreting Logs
+
+The `parser.py` script now generates logs that can be found in the `parser.log` file. These logs include timestamps, log levels, and messages that describe the script's execution flow and any errors encountered. To view the logs, simply open the `parser.log` file with a text editor or a log viewer tool.
+
+By analyzing the logs, users can gain insights into the script's behavior, identify any issues that occurred during execution, and understand the context in which these issues happened. This information is invaluable for debugging purposes and for improving the script's performance and reliability.
diff --git a/parser.py b/parser.py
@@ -2,131 +2,123 @@
 # For that we'll only use pandas
 
 import pandas as pd
+import logging  # Import logging module
+
+# Configure logging
+logging.basicConfig(filename='parser.log', level=logging.DEBUG, format='%(asctime)s:%(levelname)s:%(message)s')
+
+try:
+    # From the dataset I'll run different portfolio analysis by filtering only certain columns out of the dataset at a time
+    colsprice = ['ticker',
+                 'chgPct6M',
+                 'chgPctMovAvg100D',
+                 'pxLast',
+                 'bestTargetPrice',
+                 'eqyRawBeta6M',
+                 'volatility360D',
+                 'bestPeRatio',
+                 'industryGroup']
+
+    cols = ['ticker',
+            'name',
+            'chgPct1D',
+            'pxLast',
+            'bestTargetPrice',
+            'eqyRawBeta6M',
+            'bdvdProjDivAmt',
+            'esgLinkedBonus',
+            'industryGroup']
+
+    fin_str = ['ticker',
+             'waccNetOperProfit',
+             'degreeFinancialLeverage',
+             'degreeOperatingLeverage',
+             'cfNetInc',
+             'cfFreeCashFlow',
+              'industryGroup']
+
+    cap_ret = ['ticker',
+             'salesRevTurn',
+             'operMargin',
+             'bdvdNextProjAct',
+             'bdvdProjDivAmt',
+             'retrnOnCommnEqtyAdjstd',
+             'returnOnInvCapital',
+             'waccTotalInvCapital',
+             'bestPeRatio',
+              'industryGroup']
 
-# From the dataset I'll run different portfolio analysis by filtering only certain columns out of the dataset at a time
-colsprice = ['ticker',
-             'chgPct6M',
-             'chgPctMovAvg100D',
-             'pxLast',
-             'bestTargetPrice',
-             'eqyRawBeta6M',
-             'volatility360D',
+    fundamentals = ['ticker',
              'bestPeRatio',
+             'ebitdaToRevenue',
+             'currentEvToT12mEbitda',
+             'netIncome',
+             'cfNetInc',
+             'cfFreeCashFlow',
+             'freeCashFlowEquity',
+             'freeCashFlowMargin',
+             'freeCashFlowPerSh',
              'industryGroup']
 
-cols = ['ticker',
-        'name',
-        'chgPct1D',
-        'pxLast',
-        'bestTargetPrice',
-        'eqyRawBeta6M',
-        'bdvdProjDivAmt',
-        'esgLinkedBonus',
-        'industryGroup']
-
-fin_str = ['ticker',
-         'waccNetOperProfit',
-         'degreeFinancialLeverage',
-         'degreeOperatingLeverage',
-         'cfNetInc',
-         'cfFreeCashFlow',
-          'industryGroup']
-
-cap_ret = ['ticker',
-         'salesRevTurn',
-         'operMargin',
-         'bdvdNextProjAct',
-         'bdvdProjDivAmt',
-         'retrnOnCommnEqtyAdjstd',
-         'returnOnInvCapital',
-         'waccTotalInvCapital',
-         'bestPeRatio',
-          'industryGroup']
-
-fundamentals = ['ticker',
-         'bestPeRatio',
-         'ebitdaToRevenue',
-         'currentEvToT12mEbitda',
-         'netIncome',
-         'cfNetInc',
-         'cfFreeCashFlow',
-         'freeCashFlowEquity',
-         'freeCashFlowMargin',
-         'freeCashFlowPerSh',
-         'industryGroup']
-
-# First, we'll show the daily movers, that is, the instruments high higher price change in 1 day (column chgPct1D)
-# Tracking Daily Movers
-
-##### a) Top 20 daily growth
-
-#Import the .csv file as a dataframe
-port = pd.read_csv('minhaReq2.20220126.csv', index_col=False)
-# From the imported dataframe we'll only use the cols columns and sort them by 'chgPct1D' in descending order
-dailymovers = port[cols].sort_values(by='chgPct1D',ascending=False).fillna('-').set_index('ticker')
-# Diplaying only the first 20 records with .head()
-dailymovers.head(20)
-
-##### b) Top 20 daily loss 
-
-# Diplaying only the last 20 records with .tail()
-dailymovers.tail(20)
-
-# Tracking the Price Movement - Medium Term
-
-# Now following the same steps but sorting by the column 'chgPct6M' which represents the change in price for the last 6 months.
-# The idea is to get a medium term price change
-pricetracker = port[colsprice].sort_values(by='chgPct6M',ascending=False).set_index('ticker').head(20).fillna('-')
-pricetracker
-
-pricetrackerloss = port[colsprice].sort_values(by='chgPct6M',ascending=False).set_index('ticker').tail(20).fillna('-')
-pricetrackerloss
-
-# Creating a new column to calculate the difference between Analyst Recommendations and last price
-P_t = port[colsprice].set_index('ticker').fillna('0')
-P_t['bestTargetPrice'] = P_t['bestTargetPrice'].astype(float)
-
-p_e = P_t[P_t['bestTargetPrice'] != 0.0]
-p_e['pE'] = p_e['bestTargetPrice'] / p_e['pxLast'] -1
-p_e.sort_values(by='pE', ascending=False)
-
-# Ranking by Growth Potential (Analyst Recommendation / Last Price)
-
-p_e = P_t[P_t['bestTargetPrice'] != 0.0]
-p_e['pE'] = p_e['bestTargetPrice'] / p_e['pxLast'] -1
-p_e.sort_values(by='pE', ascending=False)
-#p_e.info()
-#p_e['pE'].map("{:.2%}".format).sort_values(ascending=False)
-#p_e.head(20)
-
-#p_e.sort_values(by='pE',ascending=False)
-
-# Tracking Financial Exposure
-
-exposure = port[fin_str].sort_values(by='degreeFinancialLeverage',ascending=True).set_index('ticker').fillna('-')
-exposure.head(20)
-
-# Tracking Return on Invested Capital
-
-cap_return = port[cap_ret].sort_values(by='returnOnInvCapital',ascending=False).set_index('ticker').fillna('-')
-cap_return.head(20)
-
-# Tracking Fundamental Data
-
-fund_data = port[fundamentals].sort_values(by='ebitdaToRevenue',ascending=False).set_index('ticker').fillna(0)
-fund_data.head(20)
-
-# Comparing Fundamental Data Across Industry Groups
-
-grupos = ['Diversified Finan Serv', 'Software', 'REITS',
-       'Commercial Services', 'Real Estate', 'Electric', 'Semiconductors',
-       'Computers', 'Internet', 'Oil&Gas Services', 'Healthcare-Products',
-       'Private Equity', 'Cosmetics/Personal Care', 'Oil&Gas', 'Retail',
-       'Home Furnishings', 'Auto Manufacturers', 'Pharmaceuticals',
-       'Apparel', 'Biotechnology', 'Banks', 'Insurance']
-
-#We can add .to_excel(grupo+".xlsx") if we want each of the tables exported to Excel
-
-for grupo in grupos:
-    industria = fund_data[fund_data['industryGroup'] == str(grupo)].sort_values(by='ebitdaToRevenue',ascending=False)
-    industria
+    # Import the .csv file as a dataframe
+    port = pd.read_csv('minhaReq2.20220126.csv', index_col=False)
+    logging.info("CSV file loaded successfully")
+
+    # From the imported dataframe we'll only use the cols columns and sort them by 'chgPct1D' in descending order
+    dailymovers = port[cols].sort_values(by='chgPct1D',ascending=False).fillna('-').set_index('ticker')
+    # Diplaying only the first 20 records with .head()
+    dailymovers.head(20)
+    logging.info("Top 20 daily growth calculated")
+
+    # Diplaying only the last 20 records with .tail()
+    dailymovers.tail(20)
+    logging.info("Top 20 daily loss calculated")
+
+    # Now following the same steps but sorting by the column 'chgPct6M' which represents the change in price for the last 6 months.
+    pricetracker = port[colsprice].sort_values(by='chgPct6M',ascending=False).set_index('ticker').head(20).fillna('-')
+    pricetracker
+    logging.info("Medium term price change (growth) calculated")
+
+    pricetrackerloss = port[colsprice].sort_values(by='chgPct6M',ascending=False).set_index('ticker').tail(20).fillna('-')
+    pricetrackerloss
+    logging.info("Medium term price change (loss) calculated")
+
+    # Creating a new column to calculate the difference between Analyst Recommendations and last price
+    P_t = port[colsprice].set_index('ticker').fillna('0')
+    P_t['bestTargetPrice'] = P_t['bestTargetPrice'].astype(float)
+
+    p_e = P_t[P_t['bestTargetPrice'] != 0.0]
+    p_e['pE'] = p_e['bestTargetPrice'] / p_e['pxLast'] -1
+    p_e.sort_values(by='pE', ascending=False)
+    logging.info("Growth potential based on Analyst Recommendations calculated")
+
+    # Tracking Financial Exposure
+    exposure = port[fin_str].sort_values(by='degreeFinancialLeverage',ascending=True).set_index('ticker').fillna('-')
+    exposure.head(20)
+    logging.info("Financial exposure tracked")
+
+    # Tracking Return on Invested Capital
+    cap_return = port[cap_ret].sort_values(by='returnOnInvCapital',ascending=False).set_index('ticker').fillna('-')
+    cap_return.head(20)
+    logging.info("Return on Invested Capital tracked")
+
+    # Tracking Fundamental Data
+    fund_data = port[fundamentals].sort_values(by='ebitdaToRevenue',ascending=False).set_index('ticker').fillna(0)
+    fund_data.head(20)
+    logging.info("Fundamental data tracked")
+
+    # Comparing Fundamental Data Across Industry Groups
+    grupos = ['Diversified Finan Serv', 'Software', 'REITS',
+           'Commercial Services', 'Real Estate', 'Electric', 'Semiconductors',
+           'Computers', 'Internet', 'Oil&Gas Services', 'Healthcare-Products',
+           'Private Equity', 'Cosmetics/Personal Care', 'Oil&Gas', 'Retail',
+           'Home Furnishings', 'Auto Manufacturers', 'Pharmaceuticals',
+           'Apparel', 'Biotechnology', 'Banks', 'Insurance']
+
+    for grupo in grupos:
+        industria = fund_data[fund_data['industryGroup'] == str(grupo)].sort_values(by='ebitdaToRevenue',ascending=False)
+        industria
+        logging.info(f"Fundamental data compared across industry group: {grupo}")
+
+except Exception as e:
+    logging.error("Error occurred: " + str(e))