From 84de977c4cef532463507c999558536f9ef44b87 Mon Sep 17 00:00:00 2001
From: Ehsan Mashhadi <ehsan@Ehsans-MacBook-Pro.local>
Date: Sun, 20 Aug 2023 18:42:16 -0600
Subject: [PATCH] Update readme file

---
 .DS_Store                    | Bin 6148 -> 6148 bytes
 README.md                    |  67 +++++++++++++++++------------------
 data_gathering/.DS_Store     | Bin 10244 -> 10244 bytes
 experiments/.DS_Store        | Bin 6148 -> 8196 bytes
 experiments/models/.DS_Store | Bin 6148 -> 6148 bytes
 5 files changed, 33 insertions(+), 34 deletions(-)

diff --git a/.DS_Store b/.DS_Store
index ef5aa0d0c2551e5d48e8ed82886b09b32338d68b..dbfe3857200c0cd150b9503066c1339d05f0ac43 100644
GIT binary patch
delta 86
zcmZoMXffDe$i}#1vIR?_x<qxgsj0D!f{|gZjzYDev4N$If~C1hZ7nB<sItCwP<(by
oZeD)Z<nt`@jNOwbvP*HM6es5-<>%)xfOwNt*&l3X=lIJH06E+mHvj+t

delta 80
zcmZoMXffDe$i}#BvIR?_s#tZkxrvT~g<-9ZLbaulfsTTuxk+s;Cx@uAzI9N1c1~_y
je#hkVEb@$<lX=;tI8%y~bCUA&a~OfVUz=Gu{__I>qktF*

diff --git a/README.md b/README.md
index a3a839d..3b48d8d 100644
--- a/README.md
+++ b/README.md
@@ -10,17 +10,22 @@ This artifact contains all data (including data gathering step), code, and scrip
 This folder contains all scripts and code required (specific to this apper) to re-run the training and testing our models (including classic models, CodeBERT, ConcatInline, and ConcatCLS). The structure of this folder is:
 
 ```
-+-- data (contains data and preprocessing step script)
-|   +-- preprocess.sh
-+-- dataset (contains dataset split after preprocessing)
++-- data (contains paper full dataset and preprocessing step script)
+|   +-- preprocess.sh (splitting dataset and scaling values)
++-- dataset (contains small subset of dataset after preprocessing for getting started section)
 +-- models
-|   +-- code_metrics (contains code for classic models)
+|   +-- code_metrics (contains code for training and testing our classic models)
 |      +-- train_test.sh (training and testing the models)
 |   +-- code_representation
 |      +-- codebert
-|          +-- train.sh (training the models)
-|          +-- inference.sh (testing the models)
-|   +-- evaluation (evaluation metrics)
+|          +-- CodeBertModel.py (code for CodeBERT model)
+|          +-- ConcatInline.py (code ConcatInline model)
+|          +-- ConcatCLS.py (code ConcatCLS model)
+|          +-- train.sh (script for training the models)
+|          +-- inference.sh (script for testing the models)
+|   +-- evaluation
+|      +-- evaluation.py (evaluation metrics)
++-- utils (constant file)
 ```
 
 #### data
@@ -55,19 +60,11 @@ For Getting Started:
 - CPU/RAM: There is no strict minimum on these.
 - Python: Python 3 is required.
 
-For *Reproducibility* only:
-- Java: Java 18 is required (**only for running data gathering step**).
-- Git: (**only for running data gathering step**).
-- SVN (**only for running data gathering step**).
-- [Defects4J](https://github.com/rjust/defects4j) (**only for running data gathering step**).
-- [Bugs.jar](https://github.com/bugs-dot-jar/bugs-dot-jar) (**only for running data gathering step**).
-
 ## Getting Started:
 This section is only set up the artifact and validate its general functionality based on a small example data (complete dataset for the classic models, but the first 50 rows for CodeBERT models).
 
 1. Clone the repository
-   - `git@github.com:EhsanMashhadi/ISSRE2023-BugSeverityPrediction.git` 
-
+   - `git@github.com:EhsanMashhadi/ISSRE2023-BugSeverityPrediction.git`
 
 2. Install dependencies (using `requirements.txt` file) or manually :
   - `pip install pandas==1.4.2`
@@ -105,14 +102,6 @@ This section is only set up the artifact and validate its general functionality
    - `git@github.com:EhsanMashhadi/ISSRE2023-BugSeverityPrediction.git` 
 2. Install dependencies (You may need to change the torch version for running on your GPU/CPU)
 
-**Note: If you only want to re-run the experiments of this paper you can skip `Data Gathering` section below (recommended)**
-
-- **Data gathering**: All following should be installed completely and correctly to reproduce the dataset gathering step (this setup may take long time)
-   - Install Git (brew, apt, ... based on your OS)
-   - Install SVN (brew, apt, ... based on your OS)
-   - Install [Defects4J](https://github.com/rjust/defects4j) (Follow all the steps in the provided installation guide)
-   - Install [Bugs.jar](https://github.com/bugs-dot-jar/bugs-dot-jar) (You must install this in the `data_gathering` directory)
-
 - **Experiments**:
   -  It is better to install these dependencies on a virtual env (you can also use requirements.txt)
   - `pip install pandas==1.4.2`
@@ -153,7 +142,26 @@ This section is only set up the artifact and validate its general functionality
 4. `bash inference.sh` for evaluating the model with the `test` split
 5. Results are generated in the `log` folder
 
-### How to re-run the data gathering step?
+### How to run with different config/hyperparameters?
+   - You can change/add different hyperparameters/configs in `train.sh` and `inference.sh` files.
+
+### Have trouble running on GPU?
+1. Check the `CUDA` and `PyTorch` compatibility
+2. Assign the correct values for `CUDA_VISIBLE_DEVICES`, `gpu_rank`, and `world_size` based on your GPU numbers in all scripts.
+3. Run on CPU by removing the `gpu_rank`, and `world_size` options in all scripts.
+4. Refer to the [CodeBERT Repo](https://github.com/microsoft/CodeBERT) for finding common issue.
+
+
+### How to re-run the data gathering step  (out of paper scope)?
+
+Below tools should be installed and configured correctly, otherwise this step won't work. It may take long time to do this step and can be skipped (recommended).
+
+  - Java: Java 18 is required (**only for running data gathering step**).
+  - Git: (brew, apt, ... based on your OS)
+  - SVN: (brew, apt, ... based on your OS)
+  - [Defects4J](https://github.com/rjust/defects4j) (Follow all the steps in the provided installation guide).
+  - [Bugs.jar](https://github.com/bugs-dot-jar/bugs-dot-jar) (You must install this in the `data_gathering` directory).
+
 1. `cd ISSRE2023-BugSeverityPrediction/data_gathering/issue_scraper`
 2. `python main.py`
 
@@ -162,12 +170,3 @@ For below steps it can easier to use `gradlew`or simply open by IntelliJ IDEA to
 4. `run MethodExtractorMain.java`
 5. `cd ISSRE2023-BugSeverityPrediction/data_gathering/MetricsExtractor/src/main/java/software/ehsan/severityprediction/metric_extractor`
 6. `run MetricCalculatorMain.java`
-
-### How to run with different config/hyperparameters?
-   - You can change/add different hyperparameters/configs in `train.sh` and `inference.sh` files.
-
-### Have trouble running on GPU?
-1. Check the `CUDA` and `PyTorch` compatibility
-2. Assign the correct values for `CUDA_VISIBLE_DEVICES`, `gpu_rank`, and `world_size` based on your GPU numbers in all scripts.
-3. Run on CPU by removing the `gpu_rank`, and `world_size` options in all scripts.
-4. Refer to the [CodeBERT Repo](https://github.com/microsoft/CodeBERT) for finding common issue.
\ No newline at end of file
diff --git a/data_gathering/.DS_Store b/data_gathering/.DS_Store
index 7c1375af72fd02a8336ec7f76c20219db22f05be..c1f19e454ee90415033d23fc8b32e8ca7c573c67 100644
GIT binary patch
delta 29
jcmZn(XbIRLBFM-v`JI5=W@AAGMj$g$N@a6}P>(18gd+%7

delta 29
jcmZn(XbIRLBFM-%`JI5=W@AAGMj$g$N@a6}P>(18ghB{e

diff --git a/experiments/.DS_Store b/experiments/.DS_Store
index 871e22f886b1fc4435122454130038c596309dd1..4d624a313d4a040d59b5112a4370ee4ae494bee6 100644
GIT binary patch
delta 302
zcmZoMXmOBWU|?W$DortDU;r^WfEYvza8E20o2aMA$g?qEH}hr%jz7$c**Q2SHn1@A
zOy*(f6*e{2Q7|&B)lsN6G&Zo*QLr>OnY^CWc=COgJtC&qB`YUOvo_$8G`3@7$Ysc9
zNMT52$YCf>DNfEw%FoXMI)wp6qAa*5FDE}Q9VpHS#4HS@3?&Si3^{nso@~M<DhgMM
tY{X_co)64i65K$eTtU9xEXeVlc{0C<=VX5#4i2CVpa5am9M3a{8320uL#O}%

delta 169
zcmZp1XfcprU|?W$DortDU=RQ@Ie-{Mvv5r;6q~50$jGrVU^g=($7CLXULgYm9R(9J
z<60esYD*(9d-4Hcv&jzyc8M4o8tW(+Tbje=%uFpN9~3s8{7BGk@*g1%M#jk>g(WtY
u?qgic&cPwb4736W1h|2OE6BEuh2NPc^UHXG>}FtsxC7)ghRyLjbC?1Azau;V

diff --git a/experiments/models/.DS_Store b/experiments/models/.DS_Store
index 2b0348eeaed36506d5794f9499213b10b107ae2b..2233542eb75e6bce5e983301fb977ab59184dc1b 100644
GIT binary patch
delta 14
VcmZoMXffDez{tq3*^u$F7yuxb1Q`GT

delta 14
VcmZoMXffDez{tqB*^u$F7yuxh1R4MU