From f3f18db4cb9eff060823e9ac16c5a0ec12ded874 Mon Sep 17 00:00:00 2001
From: miguelpduarte <miguelpduarte98@gmail.com>
Date: Sun, 17 Oct 2021 11:51:38 +0100
Subject: [PATCH 01/20] Initial commit for new scraper. Scrapy project creation

---
 scrape_to_csv/Dockerfile                      |  11 ++
 scrape_to_csv/requirements.txt                |  32 ++++++
 scrape_to_csv/scrape_to_csv/__init__.py       |   0
 scrape_to_csv/scrape_to_csv/items.py          |  47 ++++++++
 scrape_to_csv/scrape_to_csv/middlewares.py    | 103 ++++++++++++++++++
 scrape_to_csv/scrape_to_csv/pipelines.py      |  13 +++
 scrape_to_csv/scrape_to_csv/settings.py       |  88 +++++++++++++++
 .../scrape_to_csv/spiders/__init__.py         |   4 +
 scrape_to_csv/scrapy.cfg                      |  11 ++
 9 files changed, 309 insertions(+)
 create mode 100644 scrape_to_csv/Dockerfile
 create mode 100644 scrape_to_csv/requirements.txt
 create mode 100644 scrape_to_csv/scrape_to_csv/__init__.py
 create mode 100644 scrape_to_csv/scrape_to_csv/items.py
 create mode 100644 scrape_to_csv/scrape_to_csv/middlewares.py
 create mode 100644 scrape_to_csv/scrape_to_csv/pipelines.py
 create mode 100644 scrape_to_csv/scrape_to_csv/settings.py
 create mode 100644 scrape_to_csv/scrape_to_csv/spiders/__init__.py
 create mode 100644 scrape_to_csv/scrapy.cfg

diff --git a/scrape_to_csv/Dockerfile b/scrape_to_csv/Dockerfile
new file mode 100644
index 0000000..cfab7cf
--- /dev/null
+++ b/scrape_to_csv/Dockerfile
@@ -0,0 +1,11 @@
+FROM python:3.9.7
+
+WORKDIR /scrapper
+
+COPY requirements.txt .
+
+RUN pip install -r requirements.txt
+
+# Just copy the code over, instead of using volumes like before
+COPY scrapy.cfg .
+COPY scrape_to_csv/ scrape_to_csv/
diff --git a/scrape_to_csv/requirements.txt b/scrape_to_csv/requirements.txt
new file mode 100644
index 0000000..54f799f
--- /dev/null
+++ b/scrape_to_csv/requirements.txt
@@ -0,0 +1,32 @@
+attrs==21.2.0
+Automat==20.2.0
+cffi==1.14.6
+constantly==15.1.0
+cryptography==35.0.0
+cssselect==1.1.0
+h2==3.2.0
+hpack==3.0.0
+hyperframe==5.2.0
+hyperlink==21.0.0
+idna==3.2
+incremental==21.3.0
+itemadapter==0.4.0
+itemloaders==1.0.4
+jmespath==0.10.0
+lxml==4.6.3
+parsel==1.6.0
+priority==1.3.0
+Protego==0.1.16
+pyasn1==0.4.8
+pyasn1-modules==0.2.8
+pycparser==2.20
+PyDispatcher==2.0.5
+pyOpenSSL==21.0.0
+queuelib==1.6.2
+Scrapy==2.5.1
+service-identity==21.1.0
+six==1.16.0
+Twisted==21.7.0
+typing-extensions==3.10.0.2
+w3lib==1.22.0
+zope.interface==5.4.0
diff --git a/scrape_to_csv/scrape_to_csv/__init__.py b/scrape_to_csv/scrape_to_csv/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/scrape_to_csv/scrape_to_csv/items.py b/scrape_to_csv/scrape_to_csv/items.py
new file mode 100644
index 0000000..12170d6
--- /dev/null
+++ b/scrape_to_csv/scrape_to_csv/items.py
@@ -0,0 +1,47 @@
+# Define here the models for your scraped items
+#
+# See documentation in:
+# https://docs.scrapy.org/en/latest/topics/items.html
+from dataclasses import dataclass, field
+from typing import Optional
+
+# Based on the previous items.py, adaptation may be necessary
+
+@dataclass
+class Faculty:
+    acronym: str
+    name: str
+
+@dataclass
+class Course:
+    # Cannot use id and type because they are keywords in python
+    course_id: int
+    name: str
+    course_type: str
+    acronym: str
+    url: str # Not sure that this is useful
+    plan_url: str
+    faculty: str
+    year: int
+
+@dataclass
+class CourseUnit:
+    course_unit_id: int
+    name: str
+    acronym: str
+    course_id: int
+    url: str
+    year: int
+    schedule_url: str
+
+@dataclass
+class Schedule:
+    course_unit_id: int
+    lesson_type: str # T, TP, PL, etc.
+    day: int # 0 = monday, 1 = tuesday, .., 5 = saturday (no sunday)
+    duration: float # In hours. 0.5 hours is half an hour
+    start_time: int # TODO: Confirm data type
+    teacher_acronym: str # JAS, GTD, etc.
+    location: str # room name/number
+    class_name: str # 1MIEIC01
+    composed_class_name: Optional[str] = field(default=None) # None or COMP_372 # TODO: See if this can be joined with the previous if it makes sense to do so
diff --git a/scrape_to_csv/scrape_to_csv/middlewares.py b/scrape_to_csv/scrape_to_csv/middlewares.py
new file mode 100644
index 0000000..d9e8df2
--- /dev/null
+++ b/scrape_to_csv/scrape_to_csv/middlewares.py
@@ -0,0 +1,103 @@
+# Define here the models for your spider middleware
+#
+# See documentation in:
+# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+
+from scrapy import signals
+
+# useful for handling different item types with a single interface
+from itemadapter import is_item, ItemAdapter
+
+
+class ScrapeToCsvSpiderMiddleware:
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the spider middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_spider_input(self, response, spider):
+        # Called for each response that goes through the spider
+        # middleware and into the spider.
+
+        # Should return None or raise an exception.
+        return None
+
+    def process_spider_output(self, response, result, spider):
+        # Called with the results returned from the Spider, after
+        # it has processed the response.
+
+        # Must return an iterable of Request, or item objects.
+        for i in result:
+            yield i
+
+    def process_spider_exception(self, response, exception, spider):
+        # Called when a spider or process_spider_input() method
+        # (from other spider middleware) raises an exception.
+
+        # Should return either None or an iterable of Request or item objects.
+        pass
+
+    def process_start_requests(self, start_requests, spider):
+        # Called with the start requests of the spider, and works
+        # similarly to the process_spider_output() method, except
+        # that it doesn’t have a response associated.
+
+        # Must return only requests (not items).
+        for r in start_requests:
+            yield r
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
+
+
+class ScrapeToCsvDownloaderMiddleware:
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the downloader middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_request(self, request, spider):
+        # Called for each request that goes through the downloader
+        # middleware.
+
+        # Must either:
+        # - return None: continue processing this request
+        # - or return a Response object
+        # - or return a Request object
+        # - or raise IgnoreRequest: process_exception() methods of
+        #   installed downloader middleware will be called
+        return None
+
+    def process_response(self, request, response, spider):
+        # Called with the response returned from the downloader.
+
+        # Must either;
+        # - return a Response object
+        # - return a Request object
+        # - or raise IgnoreRequest
+        return response
+
+    def process_exception(self, request, exception, spider):
+        # Called when a download handler or a process_request()
+        # (from other downloader middleware) raises an exception.
+
+        # Must either:
+        # - return None: continue processing this exception
+        # - return a Response object: stops process_exception() chain
+        # - return a Request object: stops process_exception() chain
+        pass
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
diff --git a/scrape_to_csv/scrape_to_csv/pipelines.py b/scrape_to_csv/scrape_to_csv/pipelines.py
new file mode 100644
index 0000000..a64989e
--- /dev/null
+++ b/scrape_to_csv/scrape_to_csv/pipelines.py
@@ -0,0 +1,13 @@
+# Define your item pipelines here
+#
+# Don't forget to add your pipeline to the ITEM_PIPELINES setting
+# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
+
+
+# useful for handling different item types with a single interface
+from itemadapter import ItemAdapter
+
+
+class ScrapeToCsvPipeline:
+    def process_item(self, item, spider):
+        return item
diff --git a/scrape_to_csv/scrape_to_csv/settings.py b/scrape_to_csv/scrape_to_csv/settings.py
new file mode 100644
index 0000000..58e8d48
--- /dev/null
+++ b/scrape_to_csv/scrape_to_csv/settings.py
@@ -0,0 +1,88 @@
+# Scrapy settings for scrape_to_csv project
+#
+# For simplicity, this file contains only settings considered important or
+# commonly used. You can find more settings consulting the documentation:
+#
+#     https://docs.scrapy.org/en/latest/topics/settings.html
+#     https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
+#     https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+
+BOT_NAME = 'scrape_to_csv'
+
+SPIDER_MODULES = ['scrape_to_csv.spiders']
+NEWSPIDER_MODULE = 'scrape_to_csv.spiders'
+
+
+# Crawl responsibly by identifying yourself (and your website) on the user-agent
+#USER_AGENT = 'scrape_to_csv (+http://www.yourdomain.com)'
+
+# Obey robots.txt rules
+ROBOTSTXT_OBEY = True
+
+# Configure maximum concurrent requests performed by Scrapy (default: 16)
+#CONCURRENT_REQUESTS = 32
+
+# Configure a delay for requests for the same website (default: 0)
+# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
+# See also autothrottle settings and docs
+#DOWNLOAD_DELAY = 3
+# The download delay setting will honor only one of:
+#CONCURRENT_REQUESTS_PER_DOMAIN = 16
+#CONCURRENT_REQUESTS_PER_IP = 16
+
+# Disable cookies (enabled by default)
+#COOKIES_ENABLED = False
+
+# Disable Telnet Console (enabled by default)
+#TELNETCONSOLE_ENABLED = False
+
+# Override the default request headers:
+#DEFAULT_REQUEST_HEADERS = {
+#   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+#   'Accept-Language': 'en',
+#}
+
+# Enable or disable spider middlewares
+# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+#SPIDER_MIDDLEWARES = {
+#    'scrape_to_csv.middlewares.ScrapeToCsvSpiderMiddleware': 543,
+#}
+
+# Enable or disable downloader middlewares
+# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
+#DOWNLOADER_MIDDLEWARES = {
+#    'scrape_to_csv.middlewares.ScrapeToCsvDownloaderMiddleware': 543,
+#}
+
+# Enable or disable extensions
+# See https://docs.scrapy.org/en/latest/topics/extensions.html
+#EXTENSIONS = {
+#    'scrapy.extensions.telnet.TelnetConsole': None,
+#}
+
+# Configure item pipelines
+# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
+#ITEM_PIPELINES = {
+#    'scrape_to_csv.pipelines.ScrapeToCsvPipeline': 300,
+#}
+
+# Enable and configure the AutoThrottle extension (disabled by default)
+# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
+#AUTOTHROTTLE_ENABLED = True
+# The initial download delay
+#AUTOTHROTTLE_START_DELAY = 5
+# The maximum download delay to be set in case of high latencies
+#AUTOTHROTTLE_MAX_DELAY = 60
+# The average number of requests Scrapy should be sending in parallel to
+# each remote server
+#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
+# Enable showing throttling stats for every response received:
+#AUTOTHROTTLE_DEBUG = False
+
+# Enable and configure HTTP caching (disabled by default)
+# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
+#HTTPCACHE_ENABLED = True
+#HTTPCACHE_EXPIRATION_SECS = 0
+#HTTPCACHE_DIR = 'httpcache'
+#HTTPCACHE_IGNORE_HTTP_CODES = []
+#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
diff --git a/scrape_to_csv/scrape_to_csv/spiders/__init__.py b/scrape_to_csv/scrape_to_csv/spiders/__init__.py
new file mode 100644
index 0000000..ebd689a
--- /dev/null
+++ b/scrape_to_csv/scrape_to_csv/spiders/__init__.py
@@ -0,0 +1,4 @@
+# This package will contain the spiders of your Scrapy project
+#
+# Please refer to the documentation for information on how to create and manage
+# your spiders.
diff --git a/scrape_to_csv/scrapy.cfg b/scrape_to_csv/scrapy.cfg
new file mode 100644
index 0000000..d08279f
--- /dev/null
+++ b/scrape_to_csv/scrapy.cfg
@@ -0,0 +1,11 @@
+# Automatically created by: scrapy startproject
+#
+# For more information about the [deploy] section see:
+# https://scrapyd.readthedocs.io/en/latest/deploy.html
+
+[settings]
+default = scrape_to_csv.settings
+
+[deploy]
+#url = http://localhost:6800/
+project = scrape_to_csv

From 7b9a05e8665e2d08a243a292cca3186add504682 Mon Sep 17 00:00:00 2001
From: miguelpduarte <miguelpduarte98@gmail.com>
Date: Sun, 17 Oct 2021 11:55:05 +0100
Subject: [PATCH 02/20] Add initial version of course and faculties spider

Course spider will have to read from the faculties CSV eventually, but
just focusing on selectors and structure for now.
---
 scrape_to_csv/.gitignore                      |  1 +
 .../scrape_to_csv/spiders/courses_spider.py   | 84 +++++++++++++++++++
 .../scrape_to_csv/spiders/faculties_spider.py | 20 +++++
 3 files changed, 105 insertions(+)
 create mode 100644 scrape_to_csv/.gitignore
 create mode 100644 scrape_to_csv/scrape_to_csv/spiders/courses_spider.py
 create mode 100644 scrape_to_csv/scrape_to_csv/spiders/faculties_spider.py

diff --git a/scrape_to_csv/.gitignore b/scrape_to_csv/.gitignore
new file mode 100644
index 0000000..53752db
--- /dev/null
+++ b/scrape_to_csv/.gitignore
@@ -0,0 +1 @@
+output
diff --git a/scrape_to_csv/scrape_to_csv/spiders/courses_spider.py b/scrape_to_csv/scrape_to_csv/spiders/courses_spider.py
new file mode 100644
index 0000000..1097f01
--- /dev/null
+++ b/scrape_to_csv/scrape_to_csv/spiders/courses_spider.py
@@ -0,0 +1,84 @@
+import scrapy
+from operator import itemgetter
+from urllib.parse import urlparse, parse_qs
+
+class CoursesSpider(scrapy.Spider):
+    name = "courses"
+    allowed_domains = ["sigarra.up.pt"]
+
+    raw_courses_url = "https://sigarra.up.pt/feup/pt/cur_geral.cur_tipo_curso_view?pv_tipo_sigla={course_type}&pv_ano_lectivo={school_year}"
+
+    def start_requests(self):
+        # TODO: Get from arguments, env variables, etc, somewhere
+        year = 2021
+
+        # TODO: Get from the previous results
+        faculties = ['feup', 'fcup']
+        
+        COURSE_TYPES = ['L', 'M', 'MI', 'D']
+
+        for faculty in faculties:
+            for course_type in COURSE_TYPES:
+                url = self.raw_courses_url.format(course_type=course_type, school_year=year)
+                self.logger.debug(f"Calculated url: {url}")
+                meta_data = {"faculty_acronym": faculty, "course_type": course_type, "year": year}
+
+                yield scrapy.Request(url=url, callback=self.parse_course_list, meta=meta_data)
+
+    def parse_course_list(self, response):
+        """
+        Parses the initial course list page.
+        Then, issues follow-up requests to the course pages themselves to get more information.
+        `response.follow_all` is used since the `href`s are relative links, and the relevant elements are <a>s
+        """
+
+        # Get the <ul> whose preceding sibling is an <h2> with the text "Lista de Cursos"
+        ul = response.xpath("//ul[preceding-sibling::h2[text()='Lista de Cursos']]")
+        # Then, get the children elements (<li>s) and their respective first <a>
+        # These have the course information (following <a>s just mention if the course is a collaboration between faculties, etc. - not relevant for this)
+        courses = ul.xpath("./li/a[1]")
+
+        # `courses` is a set of <a>s so we can use the shorter `response.follow_all` instead of `response.follow`
+        # also forward the response.meta data
+        yield from response.follow_all(courses, meta=response.meta, callback=self.parse_course)
+
+            
+    def parse_course(self, response):
+        """
+        Parses a specific course page.
+        The url should look something like: `https://sigarra.up.pt/feup/pt/cur_geral.cur_view?pv_ano_lectivo={school_year}&pv_origem=CUR&pv_tipo_cur_sigla={course_type}&pv_curso_id={course_id}`
+        Direct navigation can't be used since the course_id is not known beforehand.
+        """
+
+        #TODO: Check if the "test if this page points to another one" is necessary...
+        courseHtml = response.css("body")
+        if courseHtml.xpath("//*[@id='conteudoinner']/div[1]/a").get() is not None:
+            parsed_url = urlparse(response.url)
+            queryparams = parse_qs(parsed_url.query)
+            course_id = queryparams['pv_curso_id'][0]
+            self.logger.warn("Found a possible pointer to another page for course with id={} at {}".format(course_id, response.meta["faculty_acronym"]))
+        # end this check
+
+        # Get the data forwarded using response.meta
+        faculty_acronym, course_type, school_year = itemgetter("faculty_acronym", "course_type", "year")(response.meta)
+
+        # Get course_id from the URL
+        # See https://stackoverflow.com/questions/5074803
+        parsed_url = urlparse(response.url)
+        queryparams = parse_qs(parsed_url.query)
+        course_id = queryparams['pv_curso_id'][0]
+
+        # In the "Planos de Estudos" section, get the first link in the div box
+        relative_plan_url = response.xpath("//h3[contains(., 'Planos de Estudos')]/following-sibling::div[1]//a[1]/@href").get()
+
+        yield {
+                "id": course_id,
+                "name": response.xpath("//h1[2]/text()").get(), # Second h1 in page
+                "type": course_type,
+                "acronym": response.xpath("//td[preceding-sibling::td[contains(., 'Sigla:')]]/text()").get(),
+                "url": response.url, # not sure how this is useful
+                "plan_url": response.urljoin(relative_plan_url),
+
+                "faculty": faculty_acronym,
+                "year": school_year,
+        }
diff --git a/scrape_to_csv/scrape_to_csv/spiders/faculties_spider.py b/scrape_to_csv/scrape_to_csv/spiders/faculties_spider.py
new file mode 100644
index 0000000..b737039
--- /dev/null
+++ b/scrape_to_csv/scrape_to_csv/spiders/faculties_spider.py
@@ -0,0 +1,20 @@
+import scrapy
+
+class FacultiesSpider(scrapy.Spider):
+    name = "faculties"
+    allowed_domains = ["sigarra.up.pt"]
+
+    start_urls = [
+        "https://sigarra.up.pt/up/pt/web_base.gera_pagina?p_pagina=escolas"
+    ]
+
+    def parse(self, response):
+        for faculty in response.css("ul > li.menu-nivel-3 > a"):
+            acronym = faculty.attrib['href'][2:]
+            name = faculty.css("::text").get()
+            # self.logger.debug("{} - {}".format(acronym, name))
+
+            yield {
+                "acronym": acronym,
+                "name": name,
+            }

From a6d0f7e1d003bfa8832477a087d19b6227927997 Mon Sep 17 00:00:00 2001
From: miguelpduarte <miguelpduarte98@gmail.com>
Date: Tue, 19 Oct 2021 23:56:13 +0100
Subject: [PATCH 03/20] Add README to new scraper module

---
 scrape_to_csv/README.md | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 scrape_to_csv/README.md

diff --git a/scrape_to_csv/README.md b/scrape_to_csv/README.md
new file mode 100644
index 0000000..3d24ca8
--- /dev/null
+++ b/scrape_to_csv/README.md
@@ -0,0 +1,37 @@
+# Scraper to CSV
+
+This scraper is built using [Scrapy](https://scrapy.org/) ([docs](https://docs.scrapy.org/en/latest/intro/overview.html)).
+
+Its purpose is to scrape relevant entities into CSV files, which may then be used to populate a database.
+
+The extracted entities are:
+* Faculties (all of UPorto's faculties)
+* Courses (the courses that are active in a certain year for all of the found faculties)
+* CourseUnits (aka _"cadeiras"_)
+* Schedule "items" for each CourseUnit
+
+## Setup and installation
+
+The scraper is Dockerized so either the provided Dockerfile or the `docker-compose.yml` at the root of the project can be used.
+
+If a local installation is preferred, just `pip install -r requirements.txt`.  
+**Note:** Usage of a python virtual environment is _highly_ recommended for a local setup.
+
+## Usage
+
+There is a spider that crawls pages and extracts each of the above entities.
+
+To run a spider, just run `scrapy crawl <spider_name>`. For example, `scrapy crawl faculties` will crawl SIGARRA and extract all of the faculties.
+
+**(The following part may change in the future)**
+
+So that the scraping results are stored in a CSV file, run the spiders with the `-O` option.  
+For example, `scrapy crawl faculties -O output/faculties.csv` will run the faculties scraper and output the scraped data into the `faculties.csv` file in the `output` directory (relative to the project root).
+
+**(May change)** The scrapers assume that the files of the previous scrapings are placed in the `output/` directory. For example, to run the Course spider, `output/faculties.csv` should exist and be populated with the faculties via the faculties scraper.
+
+## Notes of specific behaviours or issues
+
+* Courses that are hosted by several faculties ("co-op" courses) have "duplicate" lines in the courses CSV. This may be necessary since the `plan_url` is different. However, these should probably be "deduplicated" by the module that populates the DB from the CSVs, joining the several "instances" of the same course in different faculties.
+    * The desired behaviour should be something like: Being able to find the course from either of the associated faculties; and also being able to see all of the associated course units, regardless of these being present in either "instance" of the course. This way the information will be as connected as possible :)
+    * Note that this may also generate some duplication in the following steps, which may require additional care in implementing the module that populates the database from the CSV file.

From 74e190cec3d7e8e79f45cb1d397db7dab40ec9d4 Mon Sep 17 00:00:00 2001
From: miguelpduarte <miguelpduarte98@gmail.com>
Date: Sat, 23 Oct 2021 16:26:47 +0100
Subject: [PATCH 04/20] Update course spider

Now fetches faculties from previous results.
---
 .../scrape_to_csv/spiders/courses_spider.py   | 28 ++++++++++++++-----
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/scrape_to_csv/scrape_to_csv/spiders/courses_spider.py b/scrape_to_csv/scrape_to_csv/spiders/courses_spider.py
index 1097f01..15ec8d0 100644
--- a/scrape_to_csv/scrape_to_csv/spiders/courses_spider.py
+++ b/scrape_to_csv/scrape_to_csv/spiders/courses_spider.py
@@ -1,26 +1,33 @@
 import scrapy
+import csv
 from operator import itemgetter
 from urllib.parse import urlparse, parse_qs
 
+FACULTIES_CSV_FILE_PATH = "output2/faculties.csv"
+
 class CoursesSpider(scrapy.Spider):
     name = "courses"
     allowed_domains = ["sigarra.up.pt"]
 
-    raw_courses_url = "https://sigarra.up.pt/feup/pt/cur_geral.cur_tipo_curso_view?pv_tipo_sigla={course_type}&pv_ano_lectivo={school_year}"
+    raw_courses_url = "https://sigarra.up.pt/{faculty}/pt/cur_geral.cur_tipo_curso_view?pv_tipo_sigla={course_type}&pv_ano_lectivo={school_year}"
+
 
     def start_requests(self):
         # TODO: Get from arguments, env variables, etc, somewhere
         year = 2021
 
-        # TODO: Get from the previous results
-        faculties = ['feup', 'fcup']
-        
+        faculties = []
+
+        with open(FACULTIES_CSV_FILE_PATH, "r") as f:
+            csv_data = csv.DictReader(f)
+            faculties = [faculty["acronym"] for faculty in csv_data]
+
         COURSE_TYPES = ['L', 'M', 'MI', 'D']
 
         for faculty in faculties:
             for course_type in COURSE_TYPES:
-                url = self.raw_courses_url.format(course_type=course_type, school_year=year)
-                self.logger.debug(f"Calculated url: {url}")
+                url = self.raw_courses_url.format(faculty=faculty, course_type=course_type, school_year=year)
+                # self.logger.debug(f"Calculated url: {url}")
                 meta_data = {"faculty_acronym": faculty, "course_type": course_type, "year": year}
 
                 yield scrapy.Request(url=url, callback=self.parse_course_list, meta=meta_data)
@@ -50,13 +57,20 @@ def parse_course(self, response):
         Direct navigation can't be used since the course_id is not known beforehand.
         """
 
+        # Checks if the course page is pointing elsewhere (see details below)
+        # Previous selector was response.css("body").xpath("//*[@id='conteudoinner']/div[1]/a").get()
+        # This was a true positive for https://sigarra.up.pt/fcup/pt/cur_geral.cur_view?pv_ano_lectivo=2021&pv_origem=CUR&pv_tipo_cur_sigla=D&pv_curso_id=21901
+        # But a false positive for https://sigarra.up.pt/faup/pt/cur_geral.cur_view?pv_ano_lectivo=2021&pv_origem=CUR&pv_tipo_cur_sigla=D&pv_curso_id=45
+        # TODO: Decide if we just ignore the specific page (the one that points to another) since the data would be duplicate; if we scrape the page since the data is hidden but still there, or if we try to follow the link
+        # TODO: Regardless of the above, we have to find a selector that works for one but not the other. Just checking "first div child of div#conteudoinner that has a <a>" will work for both...
         #TODO: Check if the "test if this page points to another one" is necessary...
+
         courseHtml = response.css("body")
         if courseHtml.xpath("//*[@id='conteudoinner']/div[1]/a").get() is not None:
             parsed_url = urlparse(response.url)
             queryparams = parse_qs(parsed_url.query)
             course_id = queryparams['pv_curso_id'][0]
-            self.logger.warn("Found a possible pointer to another page for course with id={} at {}".format(course_id, response.meta["faculty_acronym"]))
+            self.logger.warn("Found a possible pointer to another page for course with id={} at {} (url is {})".format(course_id, response.meta["faculty_acronym"], response.url))
         # end this check
 
         # Get the data forwarded using response.meta

From 9df3ad55f469e304d79a054be40a9eb39a813e0d Mon Sep 17 00:00:00 2001
From: miguelpduarte <miguelpduarte98@gmail.com>
Date: Sat, 23 Oct 2021 17:17:41 +0100
Subject: [PATCH 05/20] Add item definitions for scraped items. Remove
 autogenerated pipeline snippet

---
 scrape_to_csv/scrape_to_csv/items.py     | 2 ++
 scrape_to_csv/scrape_to_csv/pipelines.py | 9 ---------
 2 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/scrape_to_csv/scrape_to_csv/items.py b/scrape_to_csv/scrape_to_csv/items.py
index 12170d6..ee5f26e 100644
--- a/scrape_to_csv/scrape_to_csv/items.py
+++ b/scrape_to_csv/scrape_to_csv/items.py
@@ -21,8 +21,10 @@ class Course:
     acronym: str
     url: str # Not sure that this is useful
     plan_url: str
+    plan_id: int # Helpful for making less requests (just need to consider the course_id -> plan_id mapping)
     faculty: str
     year: int
+    plan_id: int
 
 @dataclass
 class CourseUnit:
diff --git a/scrape_to_csv/scrape_to_csv/pipelines.py b/scrape_to_csv/scrape_to_csv/pipelines.py
index a64989e..a6c698f 100644
--- a/scrape_to_csv/scrape_to_csv/pipelines.py
+++ b/scrape_to_csv/scrape_to_csv/pipelines.py
@@ -2,12 +2,3 @@
 #
 # Don't forget to add your pipeline to the ITEM_PIPELINES setting
 # See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
-
-
-# useful for handling different item types with a single interface
-from itemadapter import ItemAdapter
-
-
-class ScrapeToCsvPipeline:
-    def process_item(self, item, spider):
-        return item

From ce7b9543a9c13396703681667649bb31b4371217 Mon Sep 17 00:00:00 2001
From: miguelpduarte <miguelpduarte98@gmail.com>
Date: Sat, 23 Oct 2021 17:17:56 +0100
Subject: [PATCH 06/20] Update new scraper README

---
 scrape_to_csv/README.md | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/scrape_to_csv/README.md b/scrape_to_csv/README.md
index 3d24ca8..739e9fb 100644
--- a/scrape_to_csv/README.md
+++ b/scrape_to_csv/README.md
@@ -23,15 +23,30 @@ There is a spider that crawls pages and extracts each of the above entities.
 
 To run a spider, just run `scrapy crawl <spider_name>`. For example, `scrapy crawl faculties` will crawl SIGARRA and extract all of the faculties.
 
-**(The following part may change in the future)**
+The default logging level is `WARNING`. To change this, use the `--loglevel` or `-L` option with the desired level (e.g. `DEBUG`, `INFO`).
 
-So that the scraping results are stored in a CSV file, run the spiders with the `-O` option.  
-For example, `scrapy crawl faculties -O output/faculties.csv` will run the faculties scraper and output the scraped data into the `faculties.csv` file in the `output` directory (relative to the project root).
+A feed export is configured in the scrapy project configs so that each spider outputs its data to `output/spidername.csv`.
 
-**(May change)** The scrapers assume that the files of the previous scrapings are placed in the `output/` directory. For example, to run the Course spider, `output/faculties.csv` should exist and be populated with the faculties via the faculties scraper.
+The spiders assume that the files of previous stages of the scraper are present in this file and directory structure. For example, to be able to run the Course spider, `output/faculties.csv` should exist and be populated with faculties (result of simply running the faculty crawler).
+
+As such, to get the full scraping output, one simply needs to run:
+```
+scrapy crawl faculties
+scrapy crawl courses
+scrapy crawl courseunits ?
+scrapy crawl schedules
+```
+
+TODO: Possibly implement something with https://doc.scrapy.org/en/stable/topics/practices.html#run-scrapy-from-a-script -> this would simply the crawling process by just having one script be ran and everything else would "just work" TM
 
 ## Notes of specific behaviours or issues
 
 * Courses that are hosted by several faculties ("co-op" courses) have "duplicate" lines in the courses CSV. This may be necessary since the `plan_url` is different. However, these should probably be "deduplicated" by the module that populates the DB from the CSVs, joining the several "instances" of the same course in different faculties.
     * The desired behaviour should be something like: Being able to find the course from either of the associated faculties; and also being able to see all of the associated course units, regardless of these being present in either "instance" of the course. This way the information will be as connected as possible :)
     * Note that this may also generate some duplication in the following steps, which may require additional care in implementing the module that populates the database from the CSV file.
+
+* A course's plan only requires the year (`pv_ano_lectivo`) and the study plan ID (`pv_plano_id`). It also does not seem to matter which faculty is fetched (i.e. replacing `feup` with `fcup` in the URL to get the course plan does not change the resulting data).
+    * This may be helpful in de-duplicating data, since we need only to map a course id (`pv_curso_id`) to a course plan id (`pv_plano_id`) to get the full list of course units.
+    * Hopefully this consideration will help in reducing the number of requests that are made to get all of the course units, and thus the duration necessary for scraping.
+    * However, this may be temporary behaviour which may be changed in the future, since the same does not happen with courses... If a course is lectured at FEUP, changing the url to FAUP breaks the page with an error.
+    * Still, for now it seems like a better option to simply consider the plan id as a set, to reduce duplication.

From a2bbdd188777b51a4518537ab2612bca765a11a8 Mon Sep 17 00:00:00 2001
From: miguelpduarte <miguelpduarte98@gmail.com>
Date: Sat, 23 Oct 2021 17:18:26 +0100
Subject: [PATCH 07/20] Use settings to define a CSV feed to export scraped
 data

---
 scrape_to_csv/scrape_to_csv/settings.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/scrape_to_csv/scrape_to_csv/settings.py b/scrape_to_csv/scrape_to_csv/settings.py
index 58e8d48..16c36bf 100644
--- a/scrape_to_csv/scrape_to_csv/settings.py
+++ b/scrape_to_csv/scrape_to_csv/settings.py
@@ -7,14 +7,28 @@
 #     https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
 #     https://docs.scrapy.org/en/latest/topics/spider-middleware.html
 
+LOG_LEVEL = 'WARNING'
 BOT_NAME = 'scrape_to_csv'
 
 SPIDER_MODULES = ['scrape_to_csv.spiders']
 NEWSPIDER_MODULE = 'scrape_to_csv.spiders'
 
+# Output the CSV to a different file for each spider
+# See: https://stackoverflow.com/a/43290988/5437511
+# These were used for older versions of scrapy
+# FEED_FORMAT = 'csv'
+# FEED_URI = 'output2/%(name)s.csv'
+# The newer equivalent:
+FEEDS = {
+    'output/%(name)s.csv': {
+        'format': 'csv',
+        'overwrite': False, # Not sure about this one
+    },
+}
+
 
 # Crawl responsibly by identifying yourself (and your website) on the user-agent
-#USER_AGENT = 'scrape_to_csv (+http://www.yourdomain.com)'
+USER_AGENT = 'NIAEFEUP TTS Scraper (https://ni.fe.up.pt/tts)'
 
 # Obey robots.txt rules
 ROBOTSTXT_OBEY = True

From 5b1e56ca28b3c14a2b869af29dd45aaff5f5ab7e Mon Sep 17 00:00:00 2001
From: miguelpduarte <miguelpduarte98@gmail.com>
Date: Sat, 23 Oct 2021 17:19:28 +0100
Subject: [PATCH 08/20] Update courses and faculties spiders to use defined
 Items. Update courses spider to resolve some of the found issues

Additionally, we are now storing the plan_id since the course_units URL
can be constructed from that+school year (see README.md for details).
---
 .../scrape_to_csv/spiders/courses_spider.py   | 42 +++++++++++--------
 .../scrape_to_csv/spiders/faculties_spider.py | 10 +++--
 2 files changed, 30 insertions(+), 22 deletions(-)

diff --git a/scrape_to_csv/scrape_to_csv/spiders/courses_spider.py b/scrape_to_csv/scrape_to_csv/spiders/courses_spider.py
index 15ec8d0..a34db38 100644
--- a/scrape_to_csv/scrape_to_csv/spiders/courses_spider.py
+++ b/scrape_to_csv/scrape_to_csv/spiders/courses_spider.py
@@ -3,7 +3,9 @@
 from operator import itemgetter
 from urllib.parse import urlparse, parse_qs
 
-FACULTIES_CSV_FILE_PATH = "output2/faculties.csv"
+from ..items import Course
+
+FACULTIES_CSV_FILE_PATH = "output/faculties.csv"
 
 class CoursesSpider(scrapy.Spider):
     name = "courses"
@@ -61,16 +63,15 @@ def parse_course(self, response):
         # Previous selector was response.css("body").xpath("//*[@id='conteudoinner']/div[1]/a").get()
         # This was a true positive for https://sigarra.up.pt/fcup/pt/cur_geral.cur_view?pv_ano_lectivo=2021&pv_origem=CUR&pv_tipo_cur_sigla=D&pv_curso_id=21901
         # But a false positive for https://sigarra.up.pt/faup/pt/cur_geral.cur_view?pv_ano_lectivo=2021&pv_origem=CUR&pv_tipo_cur_sigla=D&pv_curso_id=45
-        # TODO: Decide if we just ignore the specific page (the one that points to another) since the data would be duplicate; if we scrape the page since the data is hidden but still there, or if we try to follow the link
-        # TODO: Regardless of the above, we have to find a selector that works for one but not the other. Just checking "first div child of div#conteudoinner that has a <a>" will work for both...
-        #TODO: Check if the "test if this page points to another one" is necessary...
-
+        # Let's just ignore this, since it is duplicate data anyway. (The page is found for the other faculty anyway, and the data is scraped all the same)
+        # Not parsing this would probably remove some data for when these selectors are false positives, so we must scrape it and then "de-duplicate" it later.
         courseHtml = response.css("body")
         if courseHtml.xpath("//*[@id='conteudoinner']/div[1]/a").get() is not None:
             parsed_url = urlparse(response.url)
             queryparams = parse_qs(parsed_url.query)
             course_id = queryparams['pv_curso_id'][0]
-            self.logger.warn("Found a possible pointer to another page for course with id={} at {} (url is {})".format(course_id, response.meta["faculty_acronym"], response.url))
+            # This can most likely be ignored, but can be checked just in case (see above comments)
+            self.logger.info("Found a possible pointer to another page for course with id={} at {} (url is {})".format(course_id, response.meta["faculty_acronym"], response.url))
         # end this check
 
         # Get the data forwarded using response.meta
@@ -84,15 +85,20 @@ def parse_course(self, response):
 
         # In the "Planos de Estudos" section, get the first link in the div box
         relative_plan_url = response.xpath("//h3[contains(., 'Planos de Estudos')]/following-sibling::div[1]//a[1]/@href").get()
-
-        yield {
-                "id": course_id,
-                "name": response.xpath("//h1[2]/text()").get(), # Second h1 in page
-                "type": course_type,
-                "acronym": response.xpath("//td[preceding-sibling::td[contains(., 'Sigla:')]]/text()").get(),
-                "url": response.url, # not sure how this is useful
-                "plan_url": response.urljoin(relative_plan_url),
-
-                "faculty": faculty_acronym,
-                "year": school_year,
-        }
+        # Parsing this is helpful for de-duplicating requests later
+        parsed_rel_plan_url = urlparse(relative_plan_url)
+        plan_url_queryparams = parse_qs(parsed_rel_plan_url.query)
+        course_plan_id = plan_url_queryparams['pv_plano_id'][0]
+
+        yield Course(
+            course_id=course_id,
+            name=response.xpath("//h1[2]/text()").get(), # Second h1 in page
+            course_type=course_type,
+            acronym=response.xpath("//td[preceding-sibling::td[contains(., 'Sigla:')]]/text()").get(),
+            url=response.url, # not sure how this is useful
+            plan_url=response.urljoin(relative_plan_url),
+            plan_id=course_plan_id,
+
+            faculty=faculty_acronym,
+            year=school_year,
+        )
diff --git a/scrape_to_csv/scrape_to_csv/spiders/faculties_spider.py b/scrape_to_csv/scrape_to_csv/spiders/faculties_spider.py
index b737039..4939aa6 100644
--- a/scrape_to_csv/scrape_to_csv/spiders/faculties_spider.py
+++ b/scrape_to_csv/scrape_to_csv/spiders/faculties_spider.py
@@ -1,5 +1,7 @@
 import scrapy
 
+from ..items import Faculty
+
 class FacultiesSpider(scrapy.Spider):
     name = "faculties"
     allowed_domains = ["sigarra.up.pt"]
@@ -14,7 +16,7 @@ def parse(self, response):
             name = faculty.css("::text").get()
             # self.logger.debug("{} - {}".format(acronym, name))
 
-            yield {
-                "acronym": acronym,
-                "name": name,
-            }
+            yield Faculty(
+                name=name,
+                acronym=acronym,
+            )

From 379e35933b44f60b6c695658aeca42da1d91742d Mon Sep 17 00:00:00 2001
From: Juliane Marubayashi <juliane.marubayashi@gmail.com>
Date: Sat, 13 Nov 2021 12:45:20 +0000
Subject: [PATCH 09/20] Add: boilerplate to parse csv.

---
 .gitignore                          | 6 ++++++
 csv_to_sql/makefile                 | 0
 csv_to_sql/parser/__init__.py       | 0
 csv_to_sql/parser/utils/__init__.py | 0
 csv_to_sql/requirements.txt         | 0
 5 files changed, 6 insertions(+)
 create mode 100644 csv_to_sql/makefile
 create mode 100644 csv_to_sql/parser/__init__.py
 create mode 100644 csv_to_sql/parser/utils/__init__.py
 create mode 100644 csv_to_sql/requirements.txt

diff --git a/.gitignore b/.gitignore
index fcbaa84..38ee0fb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,7 @@
+# Python environment
+env_scrapper/**
+csv_to_sql/sqlparser/**
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
@@ -114,3 +118,5 @@ node_modules/
 # Intellij
 .idea
 
+
+
diff --git a/csv_to_sql/makefile b/csv_to_sql/makefile
new file mode 100644
index 0000000..e69de29
diff --git a/csv_to_sql/parser/__init__.py b/csv_to_sql/parser/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/csv_to_sql/parser/utils/__init__.py b/csv_to_sql/parser/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/csv_to_sql/requirements.txt b/csv_to_sql/requirements.txt
new file mode 100644
index 0000000..e69de29

From 7d8d058d17fc15d06188d6ec4ce0b9131360c784 Mon Sep 17 00:00:00 2001
From: Juliane Marubayashi <juliane.marubayashi@gmail.com>
Date: Sat, 13 Nov 2021 12:46:23 +0000
Subject: [PATCH 10/20] Add: creating faculties table.

The faculties table is being created, but no information is being added to that yet.
---
 csv_to_sql/README.md             |  2 ++
 csv_to_sql/parser/faculties.py   | 19 +++++++++++++++++++
 csv_to_sql/parser/utils/mysql.py | 10 ++++++++++
 csv_to_sql/parser/utils/paths.py | 10 ++++++++++
 4 files changed, 41 insertions(+)
 create mode 100644 csv_to_sql/README.md
 create mode 100644 csv_to_sql/parser/faculties.py
 create mode 100644 csv_to_sql/parser/utils/mysql.py
 create mode 100644 csv_to_sql/parser/utils/paths.py

diff --git a/csv_to_sql/README.md b/csv_to_sql/README.md
new file mode 100644
index 0000000..5087b9d
--- /dev/null
+++ b/csv_to_sql/README.md
@@ -0,0 +1,2 @@
+# Csv parser
+
diff --git a/csv_to_sql/parser/faculties.py b/csv_to_sql/parser/faculties.py
new file mode 100644
index 0000000..625bf87
--- /dev/null
+++ b/csv_to_sql/parser/faculties.py
@@ -0,0 +1,19 @@
+import csv
+import os
+from parser.utils import mysql 
+from utils import mysql, paths
+
+TABLE_NAME = "faculties"
+
+# Reading files
+f = open(paths.get_input_filepath(TABLE_NAME) , "r")
+f_sql = open(paths.get_output_filepath(TABLE_NAME), "w")
+f_reader = csv.reader(f)
+
+# Type 
+head = next(f_reader, None)
+types = ["VARCHAR(20)", "VARCHAR(100)"]
+f_sql.write(mysql.get_create_table(TABLE_NAME, head, types))
+
+
+
diff --git a/csv_to_sql/parser/utils/mysql.py b/csv_to_sql/parser/utils/mysql.py
new file mode 100644
index 0000000..e3e4c54
--- /dev/null
+++ b/csv_to_sql/parser/utils/mysql.py
@@ -0,0 +1,10 @@
+
+
+def get_create_table(table_name: str, head: list[str], types: list[str]) -> str:
+    num_columns = len(types)
+    create_string = f"CREATE TABLE {table_name} ("
+
+    for i in range(num_columns):
+        create_string += f"{head[i]} {types[i]},"
+
+    return create_string[:-1] + ");"
\ No newline at end of file
diff --git a/csv_to_sql/parser/utils/paths.py b/csv_to_sql/parser/utils/paths.py
new file mode 100644
index 0000000..86b0af8
--- /dev/null
+++ b/csv_to_sql/parser/utils/paths.py
@@ -0,0 +1,10 @@
+import os 
+CURRENT_PATH = os.path.dirname(os.path.abspath(__file__))
+
+def get_input_filepath(table_name: str):
+    return f"{CURRENT_PATH}/../data/raw/{table_name}.csv"
+
+def get_output_filepath(table_name: str):
+    return f"{CURRENT_PATH}/../data/sql/{table_name}.sql"
+
+

From b60eab606318dae1ddfbfd9d33a50f4db54b9def Mon Sep 17 00:00:00 2001
From: Juliane Marubayashi <juliane.marubayashi@gmail.com>
Date: Sat, 13 Nov 2021 13:52:43 +0000
Subject: [PATCH 11/20] Add: parsing faculties and created makefile.

---
 csv_to_sql/makefile              | 25 +++++++++++++++++++++++++
 csv_to_sql/parser/faculties.py   |  5 +++--
 csv_to_sql/parser/utils/mysql.py | 22 ++++++++++++++++++----
 csv_to_sql/parser/utils/paths.py |  4 ++--
 4 files changed, 48 insertions(+), 8 deletions(-)

diff --git a/csv_to_sql/makefile b/csv_to_sql/makefile
index e69de29..234d9b4 100644
--- a/csv_to_sql/makefile
+++ b/csv_to_sql/makefile
@@ -0,0 +1,25 @@
+PYTHON = python 
+
+.PHONY: all clean
+
+target_path = data/sql/
+exec_path = parser/
+targets_entities = faculties
+targets_filepath = $(addsuffix .sql, $(addprefix $(target_path), $(targets_entities)))
+
+
+all: parse 
+
+parse: $(targets_filepath)
+
+$(targets_filepath):
+	@echo [ CREATING ] $(target_path)
+	@mkdir $(target_path)
+	@echo [ PARSING ] $@...
+	@$(PYTHON) $(patsubst $(target_path)%.sql, $(exec_path)%.py, $@)
+
+
+clean:
+	@echo [ CLEANING ] ./data/sql/
+	@rm -f -r $(target_path)
+
diff --git a/csv_to_sql/parser/faculties.py b/csv_to_sql/parser/faculties.py
index 625bf87..e154fe6 100644
--- a/csv_to_sql/parser/faculties.py
+++ b/csv_to_sql/parser/faculties.py
@@ -1,6 +1,5 @@
 import csv
 import os
-from parser.utils import mysql 
 from utils import mysql, paths
 
 TABLE_NAME = "faculties"
@@ -15,5 +14,7 @@
 types = ["VARCHAR(20)", "VARCHAR(100)"]
 f_sql.write(mysql.get_create_table(TABLE_NAME, head, types))
 
-
+for row in f_reader: 
+    f_sql.write("\n")
+    f_sql.write(mysql.get_insert_values(TABLE_NAME, head, row)) 
 
diff --git a/csv_to_sql/parser/utils/mysql.py b/csv_to_sql/parser/utils/mysql.py
index e3e4c54..956917e 100644
--- a/csv_to_sql/parser/utils/mysql.py
+++ b/csv_to_sql/parser/utils/mysql.py
@@ -1,10 +1,24 @@
 
 
 def get_create_table(table_name: str, head: list[str], types: list[str]) -> str:
-    num_columns = len(types)
     create_string = f"CREATE TABLE {table_name} ("
 
-    for i in range(num_columns):
-        create_string += f"{head[i]} {types[i]},"
+    for head, type in zip(head, types):
+        create_string += f"\"{head}\" {type},"
 
-    return create_string[:-1] + ");"
\ No newline at end of file
+    return create_string[:-1] + ");"
+
+
+def get_insert_values(table_name: str, columns: list[str], values: list[str]) -> str: 
+    insert_string = f"INSERT INTO {table_name} (" 
+
+    # add columns names
+    for col in columns:
+        insert_string += f"\"{col}\"," 
+    insert_string = insert_string[:-1] + ")"
+
+    insert_string += " VALUES ("
+    for val in values: 
+        insert_string += f"\"{val}\"" + ","
+    return insert_string[:-1] + ");"
+        
\ No newline at end of file
diff --git a/csv_to_sql/parser/utils/paths.py b/csv_to_sql/parser/utils/paths.py
index 86b0af8..cf00c4a 100644
--- a/csv_to_sql/parser/utils/paths.py
+++ b/csv_to_sql/parser/utils/paths.py
@@ -2,9 +2,9 @@
 CURRENT_PATH = os.path.dirname(os.path.abspath(__file__))
 
 def get_input_filepath(table_name: str):
-    return f"{CURRENT_PATH}/../data/raw/{table_name}.csv"
+    return f"{CURRENT_PATH}/../../data/raw/{table_name}.csv"
 
 def get_output_filepath(table_name: str):
-    return f"{CURRENT_PATH}/../data/sql/{table_name}.sql"
+    return f"{CURRENT_PATH}/../../data/sql/{table_name}.sql"
 
 

From 44cdbb13e5cb86e22c3a6c6b59fb0529f888919f Mon Sep 17 00:00:00 2001
From: Juliane Marubayashi <juliane.marubayashi@gmail.com>
Date: Wed, 1 Dec 2021 00:58:08 +0000
Subject: [PATCH 12/20] Modify: creating mysql tables for courses and faculty

---
 mysql/db_creation.sql | 204 ++++++------------------------------------
 1 file changed, 26 insertions(+), 178 deletions(-)

diff --git a/mysql/db_creation.sql b/mysql/db_creation.sql
index 9b205d9..a4bd8c8 100644
--- a/mysql/db_creation.sql
+++ b/mysql/db_creation.sql
@@ -1,184 +1,32 @@
--- phpMyAdmin SQL Dump
--- version 4.7.7
--- https://www.phpmyadmin.net/
---
--- Host: db
--- Generation Time: Feb 04, 2018 at 01:27 PM
--- Server version: 5.7.20
--- PHP Version: 7.1.9
-
 SET SQL_MODE = "NO_AUTO_VALUE_ON_ZERO";
 SET AUTOCOMMIT = 0;
 START TRANSACTION;
 SET time_zone = "+00:00";
 
-
-/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
-/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
-/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
-/*!40101 SET NAMES utf8mb4 */;
-
---
--- Database: `tts`
---
-
--- --------------------------------------------------------
-
---
--- Table structure for table `course`
---
-
-CREATE TABLE `course` (
-  `id` int(11) NOT NULL,
-  `course_id` int(11) NOT NULL,
-  `faculty_id` int(11) NOT NULL,
-  `name` varchar(200) NOT NULL,
-  `acronym` varchar(10) NOT NULL,
-  `course_type` varchar(2) NOT NULL,
-  `year` int(11) NOT NULL,
-  `url` varchar(2000) NOT NULL,
-  `plan_url` varchar(2000) NOT NULL,
-  `last_updated` datetime NOT NULL
-) ENGINE=InnoDB DEFAULT CHARSET=latin1;
-
--- --------------------------------------------------------
-
---
--- Table structure for table `course_unit`
---
-
-CREATE TABLE `course_unit` (
-  `id` int(11) NOT NULL,
-  `course_unit_id` int(11) NOT NULL,
-  `course_id` int(11) NOT NULL,
-  `name` varchar(200) NOT NULL,
-  `acronym` varchar(16) NOT NULL,
-  `url` varchar(2000) NOT NULL,
-  `course_year` tinyint(4) NOT NULL,
-  `semester` tinyint(4) NOT NULL,
-  `year` smallint(6) NOT NULL,
-  `schedule_url` varchar(2000) DEFAULT NULL,
-  `last_updated` datetime NOT NULL
-) ENGINE=InnoDB DEFAULT CHARSET=latin1;
-
--- --------------------------------------------------------
-
---
--- Table structure for table `faculty`
---
-
 CREATE TABLE `faculty` (
-  `id` int(11) NOT NULL,
-  `acronym` varchar(10) DEFAULT NULL,
-  `name` text,
-  `last_updated` datetime NOT NULL
-) ENGINE=InnoDB DEFAULT CHARSET=latin1;
-
--- --------------------------------------------------------
-
---
--- Table structure for table `schedule`
---
-
-CREATE TABLE `schedule` (
-  `id` int(11) NOT NULL,
-  `day` tinyint(3) UNSIGNED NOT NULL,
-  `duration` decimal(3,1) UNSIGNED NOT NULL,
-  `start_time` decimal(3,1) UNSIGNED NOT NULL,
-  `location` varchar(16) NOT NULL,
-  `lesson_type` varchar(3) NOT NULL,
-  `teacher_acronym` varchar(16) NOT NULL,
-  `course_unit_id` int(11) NOT NULL,
-  `last_updated` datetime NOT NULL,
-  `class_name` varchar(16) NOT NULL,
-  `composed_class_name` varchar(16) DEFAULT NULL
-) ENGINE=InnoDB DEFAULT CHARSET=latin1;
-
---
--- Indexes for dumped tables
---
-
---
--- Indexes for table `course`
---
-ALTER TABLE `course`
-  ADD PRIMARY KEY (`id`),
-  ADD UNIQUE KEY `course_id` (`course_id`,`faculty_id`,`year`),
-  ADD KEY `faculty_id` (`faculty_id`);
-
---
--- Indexes for table `course_unit`
---
-ALTER TABLE `course_unit`
-  ADD PRIMARY KEY (`id`),
-  ADD UNIQUE KEY `uniqueness` (`course_unit_id`,`course_id`,`year`,`semester`) USING BTREE,
-  ADD KEY `course_id` (`course_id`);
-
---
--- Indexes for table `faculty`
---
-ALTER TABLE `faculty`
-  ADD PRIMARY KEY (`id`),
-  ADD UNIQUE KEY `acronym` (`acronym`);
-
---
--- Indexes for table `schedule`
---
-ALTER TABLE `schedule`
-  ADD PRIMARY KEY (`id`),
-  ADD KEY `course_unit_id` (`course_unit_id`) USING BTREE;
-
---
--- AUTO_INCREMENT for dumped tables
---
-
---
--- AUTO_INCREMENT for table `course`
---
-ALTER TABLE `course`
-  MODIFY `id` int(11) NOT NULL AUTO_INCREMENT;
-
---
--- AUTO_INCREMENT for table `course_unit`
---
-ALTER TABLE `course_unit`
-  MODIFY `id` int(11) NOT NULL AUTO_INCREMENT;
-
---
--- AUTO_INCREMENT for table `faculty`
---
-ALTER TABLE `faculty`
-  MODIFY `id` int(11) NOT NULL AUTO_INCREMENT;
-
---
--- AUTO_INCREMENT for table `schedule`
---
-ALTER TABLE `schedule`
-  MODIFY `id` int(11) NOT NULL AUTO_INCREMENT;
-
---
--- Constraints for dumped tables
---
-
---
--- Constraints for table `course`
---
-ALTER TABLE `course`
-  ADD CONSTRAINT `course_ibfk_1` FOREIGN KEY (`faculty_id`) REFERENCES `faculty` (`id`) ON DELETE CASCADE ON UPDATE CASCADE;
-
---
--- Constraints for table `course_unit`
---
-ALTER TABLE `course_unit`
-  ADD CONSTRAINT `course_unit_ibfk_1` FOREIGN KEY (`course_id`) REFERENCES `course` (`id`) ON DELETE CASCADE ON UPDATE CASCADE;
-
---
--- Constraints for table `schedule`
---
-ALTER TABLE `schedule`
-  ADD CONSTRAINT `schedule_ibfk_1` FOREIGN KEY (`course_unit_id`) REFERENCES `course_unit` (`id`) ON DELETE CASCADE ON UPDATE CASCADE;
-COMMIT;
-
-/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
-/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
-/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
+    `id` int(11) PRIMARY KEY, 
+    `acronym` varchar(10) NOT NULL UNIQUE KEY,
+    `name` varchar(200) NOT NULL
+);
+
+
+CREATE TABLE `courses` (
+    `id` int (11) PRIMARY KEY, 
+    `acronym` varchar(10) NOT NULL UNIQUE KEY, 
+    `name` varchar(200) NOT NULL,
+    `course_type` varchar(2) NOT NULL, 
+    `plan_id` int (11) NOT NULL,
+    `plan_url` varchar(2000) NOT NULL,
+    `url` varchar(2000) NOT NULL,
+    `year` int(11) NOT NULL 
+); 
+
+
+CREATE TABLE `courses_faculty` (
+    `faculty_id` int(11) NOT NULL, 
+    `course_id` int(11) NOT NULL,
+    CONSTRAINT fk_course FOREIGN KEY (`course_id`) REFERENCES `courses` (`id`) ON DELETE CASCADE ON UPDATE CASCADE, 
+    CONSTRAINT fk_faculty FOREIGN KEY (`faculty_id`) REFERENCES `faculty`(`id`) ON DELETE CASCADE ON UPDATE CASCADE 
+); 
+
+COMMIT;
\ No newline at end of file

From 0b56f212f1c15e2fa8a481baf6556894ed6db845 Mon Sep 17 00:00:00 2001
From: Juliane Marubayashi <juliane.marubayashi@gmail.com>
Date: Thu, 2 Dec 2021 15:10:38 +0000
Subject: [PATCH 13/20] Add: faculties generating insert

---
 .gitignore                     |  2 +-
 csv_to_sql/parser/faculties.py | 16 ++++++----------
 2 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/.gitignore b/.gitignore
index 38ee0fb..a3aeed5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,6 @@
 # Python environment
 env_scrapper/**
-csv_to_sql/sqlparser/**
+csv_to_sql/sql_parser/**
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
diff --git a/csv_to_sql/parser/faculties.py b/csv_to_sql/parser/faculties.py
index e154fe6..a5eaed7 100644
--- a/csv_to_sql/parser/faculties.py
+++ b/csv_to_sql/parser/faculties.py
@@ -1,20 +1,16 @@
 import csv
-import os
-from utils import mysql, paths
+from utils import paths, utils
 
 TABLE_NAME = "faculties"
 
 # Reading files
-f = open(paths.get_input_filepath(TABLE_NAME) , "r")
+f = open(paths.get_input_filepath(TABLE_NAME) , "r") 
 f_sql = open(paths.get_output_filepath(TABLE_NAME), "w")
 f_reader = csv.reader(f)
 
-# Type 
-head = next(f_reader, None)
-types = ["VARCHAR(20)", "VARCHAR(100)"]
-f_sql.write(mysql.get_create_table(TABLE_NAME, head, types))
 
-for row in f_reader: 
-    f_sql.write("\n")
-    f_sql.write(mysql.get_insert_values(TABLE_NAME, head, row)) 
+col_names = ','.join(list(map(utils.add_brackets_cols, ['id'] + next(f_reader))))    # Names for each column   
 
+for faculty_id, row in enumerate(f_reader):  
+    values = ','.join([str(faculty_id)] + list(map(utils.add_brackets_vals, row)))
+    f_sql.write(f"INSERT INTO `faculty`({col_names}) VALUES ({values});\n") 

From 367049bcb3bfec5937d66b6388c2f4247c121690 Mon Sep 17 00:00:00 2001
From: Juliane Marubayashi <juliane.marubayashi@gmail.com>
Date: Thu, 2 Dec 2021 17:04:07 +0000
Subject: [PATCH 14/20] Add: configparser and main class

The alterations made in this commit makes the code uniform by using inheritance classes and configparser.
---
 csv_to_sql/configparser.ini            |  9 ++++++
 csv_to_sql/makefile                    | 25 ----------------
 csv_to_sql/parser/faculties.py         | 16 -----------
 csv_to_sql/parser/utils/__init__.py    |  0
 csv_to_sql/parser/utils/mysql.py       | 24 ----------------
 csv_to_sql/parser/utils/paths.py       | 10 -------
 csv_to_sql/{parser => src}/__init__.py |  0
 csv_to_sql/src/__main__.py             |  9 ++++++
 csv_to_sql/src/course.py               | 31 ++++++++++++++++++++
 csv_to_sql/src/faculty.py              | 15 ++++++++++
 csv_to_sql/src/parser.py               | 40 ++++++++++++++++++++++++++
 11 files changed, 104 insertions(+), 75 deletions(-)
 create mode 100644 csv_to_sql/configparser.ini
 delete mode 100644 csv_to_sql/makefile
 delete mode 100644 csv_to_sql/parser/faculties.py
 delete mode 100644 csv_to_sql/parser/utils/__init__.py
 delete mode 100644 csv_to_sql/parser/utils/mysql.py
 delete mode 100644 csv_to_sql/parser/utils/paths.py
 rename csv_to_sql/{parser => src}/__init__.py (100%)
 create mode 100644 csv_to_sql/src/__main__.py
 create mode 100644 csv_to_sql/src/course.py
 create mode 100644 csv_to_sql/src/faculty.py
 create mode 100644 csv_to_sql/src/parser.py

diff --git a/csv_to_sql/configparser.ini b/csv_to_sql/configparser.ini
new file mode 100644
index 0000000..07cd868
--- /dev/null
+++ b/csv_to_sql/configparser.ini
@@ -0,0 +1,9 @@
+[course]
+csv = courses
+faculties_col = faculties
+
+[faculty]
+csv = faculties
+
+[course_faculty]
+
diff --git a/csv_to_sql/makefile b/csv_to_sql/makefile
deleted file mode 100644
index 234d9b4..0000000
--- a/csv_to_sql/makefile
+++ /dev/null
@@ -1,25 +0,0 @@
-PYTHON = python 
-
-.PHONY: all clean
-
-target_path = data/sql/
-exec_path = parser/
-targets_entities = faculties
-targets_filepath = $(addsuffix .sql, $(addprefix $(target_path), $(targets_entities)))
-
-
-all: parse 
-
-parse: $(targets_filepath)
-
-$(targets_filepath):
-	@echo [ CREATING ] $(target_path)
-	@mkdir $(target_path)
-	@echo [ PARSING ] $@...
-	@$(PYTHON) $(patsubst $(target_path)%.sql, $(exec_path)%.py, $@)
-
-
-clean:
-	@echo [ CLEANING ] ./data/sql/
-	@rm -f -r $(target_path)
-
diff --git a/csv_to_sql/parser/faculties.py b/csv_to_sql/parser/faculties.py
deleted file mode 100644
index a5eaed7..0000000
--- a/csv_to_sql/parser/faculties.py
+++ /dev/null
@@ -1,16 +0,0 @@
-import csv
-from utils import paths, utils
-
-TABLE_NAME = "faculties"
-
-# Reading files
-f = open(paths.get_input_filepath(TABLE_NAME) , "r") 
-f_sql = open(paths.get_output_filepath(TABLE_NAME), "w")
-f_reader = csv.reader(f)
-
-
-col_names = ','.join(list(map(utils.add_brackets_cols, ['id'] + next(f_reader))))    # Names for each column   
-
-for faculty_id, row in enumerate(f_reader):  
-    values = ','.join([str(faculty_id)] + list(map(utils.add_brackets_vals, row)))
-    f_sql.write(f"INSERT INTO `faculty`({col_names}) VALUES ({values});\n") 
diff --git a/csv_to_sql/parser/utils/__init__.py b/csv_to_sql/parser/utils/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/csv_to_sql/parser/utils/mysql.py b/csv_to_sql/parser/utils/mysql.py
deleted file mode 100644
index 956917e..0000000
--- a/csv_to_sql/parser/utils/mysql.py
+++ /dev/null
@@ -1,24 +0,0 @@
-
-
-def get_create_table(table_name: str, head: list[str], types: list[str]) -> str:
-    create_string = f"CREATE TABLE {table_name} ("
-
-    for head, type in zip(head, types):
-        create_string += f"\"{head}\" {type},"
-
-    return create_string[:-1] + ");"
-
-
-def get_insert_values(table_name: str, columns: list[str], values: list[str]) -> str: 
-    insert_string = f"INSERT INTO {table_name} (" 
-
-    # add columns names
-    for col in columns:
-        insert_string += f"\"{col}\"," 
-    insert_string = insert_string[:-1] + ")"
-
-    insert_string += " VALUES ("
-    for val in values: 
-        insert_string += f"\"{val}\"" + ","
-    return insert_string[:-1] + ");"
-        
\ No newline at end of file
diff --git a/csv_to_sql/parser/utils/paths.py b/csv_to_sql/parser/utils/paths.py
deleted file mode 100644
index cf00c4a..0000000
--- a/csv_to_sql/parser/utils/paths.py
+++ /dev/null
@@ -1,10 +0,0 @@
-import os 
-CURRENT_PATH = os.path.dirname(os.path.abspath(__file__))
-
-def get_input_filepath(table_name: str):
-    return f"{CURRENT_PATH}/../../data/raw/{table_name}.csv"
-
-def get_output_filepath(table_name: str):
-    return f"{CURRENT_PATH}/../../data/sql/{table_name}.sql"
-
-
diff --git a/csv_to_sql/parser/__init__.py b/csv_to_sql/src/__init__.py
similarity index 100%
rename from csv_to_sql/parser/__init__.py
rename to csv_to_sql/src/__init__.py
diff --git a/csv_to_sql/src/__main__.py b/csv_to_sql/src/__main__.py
new file mode 100644
index 0000000..2992354
--- /dev/null
+++ b/csv_to_sql/src/__main__.py
@@ -0,0 +1,9 @@
+
+from .faculty import Faculty
+from .course import Course
+import configparser as cp 
+
+config = cp.ConfigParser()
+config.read("./configparser.ini")
+faculty = Faculty(config).parse()
+course = Course(config).parse()
\ No newline at end of file
diff --git a/csv_to_sql/src/course.py b/csv_to_sql/src/course.py
new file mode 100644
index 0000000..110dc5d
--- /dev/null
+++ b/csv_to_sql/src/course.py
@@ -0,0 +1,31 @@
+from re import A
+from .parser import Parser 
+import configparser as cp
+
+class Course(Parser):
+    def __init__(self, config: cp.ConfigParser):
+        self.config = config
+        super().__init__("course", config['course']['csv'])
+    
+    def parse(self):   
+        cols_list = next(self.f_reader)   
+
+        # Get faculties index. 
+        faculties_col_name = self.config['course']['faculties_col']
+        faculties_index = cols_list.index(faculties_col_name) 
+
+        # Drop faculties col. 
+        del cols_list[faculties_index]
+        cols = self.get_cols()  
+
+        # Generate inserts 
+        for course_id, row in enumerate(self.f_reader): 
+            del row[faculties_index]    # Remove faculties position.
+            values = self.get_values(course_id, row)
+            insert = self.sql_get_insert(cols, values) 
+            self.f_sql.write(insert)
+
+
+
+
+        
diff --git a/csv_to_sql/src/faculty.py b/csv_to_sql/src/faculty.py
new file mode 100644
index 0000000..31f940f
--- /dev/null
+++ b/csv_to_sql/src/faculty.py
@@ -0,0 +1,15 @@
+from .parser import Parser 
+import configparser as cp
+
+class Faculty(Parser):
+    def __init__(self, config: cp.ConfigParser): 
+        self.config = config 
+        super().__init__("faculty", config['faculty']['csv'])
+
+    def parse(self):
+        cols_list = next(self.f_reader)
+        cols = self.get_cols(cols_list)
+        for faculty_id, row in enumerate(self.f_reader):   
+            values = self.get_values(faculty_id, row)
+            insert = self.sql_get_insert(cols, values)
+            self.f_sql.write(insert)
diff --git a/csv_to_sql/src/parser.py b/csv_to_sql/src/parser.py
new file mode 100644
index 0000000..d0523dc
--- /dev/null
+++ b/csv_to_sql/src/parser.py
@@ -0,0 +1,40 @@
+from abc import abstractclassmethod
+import os 
+import csv 
+
+class Parser: 
+    def __init__(self, table_name: str, csv_name: str):
+        self.current_path = os.path.dirname(os.path.abspath(__file__))    
+        self.table_name = table_name
+        self.csv_name = csv_name
+        # Reading csv. 
+        f = open(self.get_input_filepath() , "r")  
+        self.f_reader = csv.reader(f) 
+        # Creating sql.
+        self.f_sql = open(self.get_output_filepath(), "w")
+    
+    def add_brackets_vals(self, x: str):
+        return f"'{x}'"  
+
+    def add_brackets_cols(self, x: str):  
+        return f"`{x}`" 
+
+    def sql_get_insert(self, cols: list, values: list): 
+        return f"INSERT INTO {self.table_name} ({cols}) VALUES ({values}); \n"
+
+    def get_input_filepath(self):
+        return f"{self.current_path}/../data/raw/{self.csv_name}.csv"
+
+    def get_output_filepath(self):
+        return f"{self.current_path}/../data/sql/{self.table_name}.sql"
+
+    def get_cols(self, cols_list): 
+        return ','.join(list(map(self.add_brackets_cols, ['id'] + cols_list)))    
+
+    def get_values(self, id_, row):
+        return ','.join([str(id_)] + list(map(self.add_brackets_vals, row)))    
+
+
+    @abstractclassmethod 
+    def parser(self):
+        pass 
\ No newline at end of file

From 8ea0283af2de546b2df3f0d6f41bfed1cbcfa17d Mon Sep 17 00:00:00 2001
From: Juliane Marubayashi <juliane.marubayashi@gmail.com>
Date: Thu, 2 Dec 2021 17:50:24 +0000
Subject: [PATCH 15/20] Add: Creating association course_faculty

---
 csv_to_sql/src/__main__.py       |  7 +++++--
 csv_to_sql/src/course.py         |  3 +--
 csv_to_sql/src/course_faculty.py | 33 +++++++++++++++++++++++++++++++
 csv_to_sql/src/parser.py         | 34 +++++++++++++++++++++-----------
 4 files changed, 61 insertions(+), 16 deletions(-)
 create mode 100644 csv_to_sql/src/course_faculty.py

diff --git a/csv_to_sql/src/__main__.py b/csv_to_sql/src/__main__.py
index 2992354..e17be7b 100644
--- a/csv_to_sql/src/__main__.py
+++ b/csv_to_sql/src/__main__.py
@@ -1,9 +1,12 @@
 
 from .faculty import Faculty
-from .course import Course
+from .course import Course 
+from .course_faculty import Course_Faculty 
+
 import configparser as cp 
 
 config = cp.ConfigParser()
 config.read("./configparser.ini")
 faculty = Faculty(config).parse()
-course = Course(config).parse()
\ No newline at end of file
+course = Course(config).parse()
+course_faculty = Course_Faculty(config).parse()
\ No newline at end of file
diff --git a/csv_to_sql/src/course.py b/csv_to_sql/src/course.py
index 110dc5d..13c4a97 100644
--- a/csv_to_sql/src/course.py
+++ b/csv_to_sql/src/course.py
@@ -1,4 +1,3 @@
-from re import A
 from .parser import Parser 
 import configparser as cp
 
@@ -16,7 +15,7 @@ def parse(self):
 
         # Drop faculties col. 
         del cols_list[faculties_index]
-        cols = self.get_cols()  
+        cols = self.get_cols(cols_list)  
 
         # Generate inserts 
         for course_id, row in enumerate(self.f_reader): 
diff --git a/csv_to_sql/src/course_faculty.py b/csv_to_sql/src/course_faculty.py
new file mode 100644
index 0000000..e787c68
--- /dev/null
+++ b/csv_to_sql/src/course_faculty.py
@@ -0,0 +1,33 @@
+import configparser as cp
+from .parser import Parser 
+import pandas as pd 
+from ast import literal_eval 
+
+class Course_Faculty(Parser):
+    def __init__(self, config: cp.ConfigParser): 
+        self.config = config
+        super().__init__("course_faculty", None) 
+        self.df_faculty = pd.read_csv(self.get_input_filepath(config['faculty']['csv']), index_col=False) 
+        self.df_course = pd.read_csv(self.get_input_filepath(config['course']['csv']), index_col=False)  
+
+    def get_faculty_id(self, faculty_acronym: str): 
+        return self.df_faculty[self.df_faculty['acronym'] == faculty_acronym].index[0]
+
+
+    def parse(self):  
+        cols = self.get_cols(["course_id", "faculty_id"], with_id=False)   
+
+        # Get faculties index. 
+        faculties_col_name = self.config['course']['faculties_col'] 
+        
+        # For each course get's the faculties ids that it's associated. 
+        for course_id, faculties in enumerate(self.df_course[faculties_col_name]):     
+            # Acronym to id 
+            faculties_acronyms = literal_eval(faculties)
+            faculties_ids = list(map(self.get_faculty_id, faculties_acronyms))     
+            # For each id create on instance in the table
+            for faculty_id in faculties_ids:   
+                values = self.get_values(None, [course_id, faculty_id], with_id=False)
+                insert = self.sql_get_insert(cols, values)
+                self.f_sql.write(insert)
+
diff --git a/csv_to_sql/src/parser.py b/csv_to_sql/src/parser.py
index d0523dc..0d0e543 100644
--- a/csv_to_sql/src/parser.py
+++ b/csv_to_sql/src/parser.py
@@ -3,15 +3,19 @@
 import csv 
 
 class Parser: 
-    def __init__(self, table_name: str, csv_name: str):
+    def __init__(self, table_name: str, csv_name: str = None):
         self.current_path = os.path.dirname(os.path.abspath(__file__))    
         self.table_name = table_name
-        self.csv_name = csv_name
-        # Reading csv. 
-        f = open(self.get_input_filepath() , "r")  
-        self.f_reader = csv.reader(f) 
+        self.csv_name = csv_name 
+
         # Creating sql.
-        self.f_sql = open(self.get_output_filepath(), "w")
+        self.f_sql = open(self.get_output_filepath(), "w")    
+
+        # Reading csv. 
+        if csv_name is not None: 
+            f = open(self.get_input_filepath() , "r")  
+            self.f_reader = csv.reader(f) 
+
     
     def add_brackets_vals(self, x: str):
         return f"'{x}'"  
@@ -22,17 +26,23 @@ def add_brackets_cols(self, x: str):
     def sql_get_insert(self, cols: list, values: list): 
         return f"INSERT INTO {self.table_name} ({cols}) VALUES ({values}); \n"
 
-    def get_input_filepath(self):
-        return f"{self.current_path}/../data/raw/{self.csv_name}.csv"
+    def get_input_filepath(self, csv_name=None): 
+        if csv_name is None:
+            return f"{self.current_path}/../data/raw/{self.csv_name}.csv" 
+        return f"{self.current_path}/../data/raw/{csv_name}.csv" 
 
     def get_output_filepath(self):
         return f"{self.current_path}/../data/sql/{self.table_name}.sql"
 
-    def get_cols(self, cols_list): 
-        return ','.join(list(map(self.add_brackets_cols, ['id'] + cols_list)))    
+    def get_cols(self, cols_list, with_id=True):  
+        if with_id:
+            return ','.join(list(map(self.add_brackets_cols, ['id'] + cols_list)))     
+        return ','.join(list(map(self.add_brackets_cols, cols_list)))     
 
-    def get_values(self, id_, row):
-        return ','.join([str(id_)] + list(map(self.add_brackets_vals, row)))    
+    def get_values(self, id_, row, with_id=True): 
+        if with_id:
+            return ','.join([str(id_)] + list(map(self.add_brackets_vals, row)))     
+        return ','.join(list(map(self.add_brackets_vals, row)))     
 
 
     @abstractclassmethod 

From e82a333dd768f8ad8ab070256efd4c40deff64bb Mon Sep 17 00:00:00 2001
From: Juliane Marubayashi <juliane.marubayashi@gmail.com>
Date: Thu, 2 Dec 2021 18:10:41 +0000
Subject: [PATCH 16/20] FIx: table names and some cols

---
 mysql/db_creation.sql | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/mysql/db_creation.sql b/mysql/db_creation.sql
index a4bd8c8..b70cc14 100644
--- a/mysql/db_creation.sql
+++ b/mysql/db_creation.sql
@@ -10,22 +10,23 @@ CREATE TABLE `faculty` (
 );
 
 
-CREATE TABLE `courses` (
-    `id` int (11) PRIMARY KEY, 
-    `acronym` varchar(10) NOT NULL UNIQUE KEY, 
+CREATE TABLE `course` (
+    `id` int (11) PRIMARY KEY,   
+    `course_id` int (11) NOT NULL UNIQUE,
+    `acronym` varchar(10) NOT NULL, 
     `name` varchar(200) NOT NULL,
-    `course_type` varchar(2) NOT NULL, 
-    `plan_id` int (11) NOT NULL,
-    `plan_url` varchar(2000) NOT NULL,
+    `course_type` varchar(2) NOT NULL,  
     `url` varchar(2000) NOT NULL,
-    `year` int(11) NOT NULL 
+    `plan_url` varchar(2000) NOT NULL,  
+    `plan_id` int (11) NOT NULL,
+    `year` int(12) NOT NULL 
 ); 
 
 
-CREATE TABLE `courses_faculty` (
+CREATE TABLE `course_faculty` (
     `faculty_id` int(11) NOT NULL, 
     `course_id` int(11) NOT NULL,
-    CONSTRAINT fk_course FOREIGN KEY (`course_id`) REFERENCES `courses` (`id`) ON DELETE CASCADE ON UPDATE CASCADE, 
+    CONSTRAINT fk_course FOREIGN KEY (`course_id`) REFERENCES `course` (`id`) ON DELETE CASCADE ON UPDATE CASCADE, 
     CONSTRAINT fk_faculty FOREIGN KEY (`faculty_id`) REFERENCES `faculty`(`id`) ON DELETE CASCADE ON UPDATE CASCADE 
 ); 
 

From 8efeb05cfb9f592542a435b96f5ff950d6cdf2dd Mon Sep 17 00:00:00 2001
From: Juliane Marubayashi <juliane.marubayashi@gmail.com>
Date: Thu, 2 Dec 2021 18:23:52 +0000
Subject: [PATCH 17/20] Restore: previous db_creation

---
 mysql/prev_db_creation.sql | 184 +++++++++++++++++++++++++++++++++++++
 1 file changed, 184 insertions(+)
 create mode 100644 mysql/prev_db_creation.sql

diff --git a/mysql/prev_db_creation.sql b/mysql/prev_db_creation.sql
new file mode 100644
index 0000000..b8b833d
--- /dev/null
+++ b/mysql/prev_db_creation.sql
@@ -0,0 +1,184 @@
+- phpMyAdmin SQL Dump
+-- version 4.7.7
+-- https://www.phpmyadmin.net/
+--
+-- Host: db
+-- Generation Time: Feb 04, 2018 at 01:27 PM
+-- Server version: 5.7.20
+-- PHP Version: 7.1.9
+
+SET SQL_MODE = "NO_AUTO_VALUE_ON_ZERO";
+SET AUTOCOMMIT = 0;
+START TRANSACTION;
+SET time_zone = "+00:00";
+
+
+/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
+/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
+/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
+/*!40101 SET NAMES utf8mb4 */;
+
+--
+-- Database: `tts`
+--
+
+-- --------------------------------------------------------
+
+--
+-- Table structure for table `course`
+--
+
+CREATE TABLE `course` (
+  `id` int(11) NOT NULL,
+  `course_id` int(11) NOT NULL,
+  `faculty_id` int(11) NOT NULL,
+  `name` varchar(200) NOT NULL,
+  `acronym` varchar(10) NOT NULL,
+  `course_type` varchar(2) NOT NULL,
+  `year` int(11) NOT NULL,
+  `url` varchar(2000) NOT NULL,
+  `plan_url` varchar(2000) NOT NULL,
+  `last_updated` datetime NOT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1;
+
+-- --------------------------------------------------------
+
+--
+-- Table structure for table `course_unit`
+--
+
+CREATE TABLE `course_unit` (
+  `id` int(11) NOT NULL,
+  `course_unit_id` int(11) NOT NULL,
+  `course_id` int(11) NOT NULL,
+  `name` varchar(200) NOT NULL,
+  `acronym` varchar(16) NOT NULL,
+  `url` varchar(2000) NOT NULL,
+  `course_year` tinyint(4) NOT NULL,
+  `semester` tinyint(4) NOT NULL,
+  `year` smallint(6) NOT NULL,
+  `schedule_url` varchar(2000) DEFAULT NULL,
+  `last_updated` datetime NOT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1;
+
+-- --------------------------------------------------------
+
+--
+-- Table structure for table `faculty`
+--
+
+CREATE TABLE `faculty` (
+  `id` int(11) NOT NULL,
+  `acronym` varchar(10) DEFAULT NULL,
+  `name` text,
+  `last_updated` datetime NOT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1;
+
+-- --------------------------------------------------------
+
+--
+-- Table structure for table `schedule`
+--
+
+CREATE TABLE `schedule` (
+  `id` int(11) NOT NULL,
+  `day` tinyint(3) UNSIGNED NOT NULL,
+  `duration` decimal(3,1) UNSIGNED NOT NULL,
+  `start_time` decimal(3,1) UNSIGNED NOT NULL,
+  `location` varchar(16) NOT NULL,
+  `lesson_type` varchar(3) NOT NULL,
+  `teacher_acronym` varchar(16) NOT NULL,
+  `course_unit_id` int(11) NOT NULL,
+  `last_updated` datetime NOT NULL,
+  `class_name` varchar(16) NOT NULL,
+  `composed_class_name` varchar(16) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1;
+
+--
+-- Indexes for dumped tables
+--
+
+--
+-- Indexes for table `course`
+--
+ALTER TABLE `course`
+  ADD PRIMARY KEY (`id`),
+  ADD UNIQUE KEY `course_id` (`course_id`,`faculty_id`,`year`),
+  ADD KEY `faculty_id` (`faculty_id`);
+
+--
+-- Indexes for table `course_unit`
+--
+ALTER TABLE `course_unit`
+  ADD PRIMARY KEY (`id`),
+  ADD UNIQUE KEY `uniqueness` (`course_unit_id`,`course_id`,`year`,`semester`) USING BTREE,
+  ADD KEY `course_id` (`course_id`);
+
+--
+-- Indexes for table `faculty`
+--
+ALTER TABLE `faculty`
+  ADD PRIMARY KEY (`id`),
+  ADD UNIQUE KEY `acronym` (`acronym`);
+
+--
+-- Indexes for table `schedule`
+--
+ALTER TABLE `schedule`
+  ADD PRIMARY KEY (`id`),
+  ADD KEY `course_unit_id` (`course_unit_id`) USING BTREE;
+
+--
+-- AUTO_INCREMENT for dumped tables
+--
+
+--
+-- AUTO_INCREMENT for table `course`
+--
+ALTER TABLE `course`
+  MODIFY `id` int(11) NOT NULL AUTO_INCREMENT;
+
+--
+-- AUTO_INCREMENT for table `course_unit`
+--
+ALTER TABLE `course_unit`
+  MODIFY `id` int(11) NOT NULL AUTO_INCREMENT;
+
+--
+-- AUTO_INCREMENT for table `faculty`
+--
+ALTER TABLE `faculty`
+  MODIFY `id` int(11) NOT NULL AUTO_INCREMENT;
+
+--
+-- AUTO_INCREMENT for table `schedule`
+--
+ALTER TABLE `schedule`
+  MODIFY `id` int(11) NOT NULL AUTO_INCREMENT;
+
+--
+-- Constraints for dumped tables
+--
+
+--
+-- Constraints for table `course`
+--
+ALTER TABLE `course`
+  ADD CONSTRAINT `course_ibfk_1` FOREIGN KEY (`faculty_id`) REFERENCES `faculty` (`id`) ON DELETE CASCADE ON UPDATE CASCADE;
+
+--
+-- Constraints for table `course_unit`
+--
+ALTER TABLE `course_unit`
+  ADD CONSTRAINT `course_unit_ibfk_1` FOREIGN KEY (`course_id`) REFERENCES `course` (`id`) ON DELETE CASCADE ON UPDATE CASCADE;
+
+--
+-- Constraints for table `schedule`
+--
+ALTER TABLE `schedule`
+  ADD CONSTRAINT `schedule_ibfk_1` FOREIGN KEY (`course_unit_id`) REFERENCES `course_unit` (`id`) ON DELETE CASCADE ON UPDATE CASCADE;
+COMMIT;
+
+/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
+/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
+/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
\ No newline at end of file

From b00bc6d53ecdf26e1debeda759d59679e14bec30 Mon Sep 17 00:00:00 2001
From: Juliane Marubayashi <juliane.marubayashi@gmail.com>
Date: Sun, 26 Dec 2021 22:46:56 +0000
Subject: [PATCH 18/20] Remove: deleted mysql and phpmyadmin files, since will
 be used in another rep

---
 mysql/Dockerfile           |   2 -
 mysql/db_creation.sql      |  33 -------
 mysql/prev_db_creation.sql | 184 -------------------------------------
 phpmyadmin/Dockerfile      |   1 -
 4 files changed, 220 deletions(-)
 delete mode 100644 mysql/Dockerfile
 delete mode 100644 mysql/db_creation.sql
 delete mode 100644 mysql/prev_db_creation.sql
 delete mode 100644 phpmyadmin/Dockerfile

diff --git a/mysql/Dockerfile b/mysql/Dockerfile
deleted file mode 100644
index ac4e879..0000000
--- a/mysql/Dockerfile
+++ /dev/null
@@ -1,2 +0,0 @@
-FROM mysql:5
-ADD db_creation.sql /docker-entrypoint-initdb.d
\ No newline at end of file
diff --git a/mysql/db_creation.sql b/mysql/db_creation.sql
deleted file mode 100644
index b70cc14..0000000
--- a/mysql/db_creation.sql
+++ /dev/null
@@ -1,33 +0,0 @@
-SET SQL_MODE = "NO_AUTO_VALUE_ON_ZERO";
-SET AUTOCOMMIT = 0;
-START TRANSACTION;
-SET time_zone = "+00:00";
-
-CREATE TABLE `faculty` (
-    `id` int(11) PRIMARY KEY, 
-    `acronym` varchar(10) NOT NULL UNIQUE KEY,
-    `name` varchar(200) NOT NULL
-);
-
-
-CREATE TABLE `course` (
-    `id` int (11) PRIMARY KEY,   
-    `course_id` int (11) NOT NULL UNIQUE,
-    `acronym` varchar(10) NOT NULL, 
-    `name` varchar(200) NOT NULL,
-    `course_type` varchar(2) NOT NULL,  
-    `url` varchar(2000) NOT NULL,
-    `plan_url` varchar(2000) NOT NULL,  
-    `plan_id` int (11) NOT NULL,
-    `year` int(12) NOT NULL 
-); 
-
-
-CREATE TABLE `course_faculty` (
-    `faculty_id` int(11) NOT NULL, 
-    `course_id` int(11) NOT NULL,
-    CONSTRAINT fk_course FOREIGN KEY (`course_id`) REFERENCES `course` (`id`) ON DELETE CASCADE ON UPDATE CASCADE, 
-    CONSTRAINT fk_faculty FOREIGN KEY (`faculty_id`) REFERENCES `faculty`(`id`) ON DELETE CASCADE ON UPDATE CASCADE 
-); 
-
-COMMIT;
\ No newline at end of file
diff --git a/mysql/prev_db_creation.sql b/mysql/prev_db_creation.sql
deleted file mode 100644
index b8b833d..0000000
--- a/mysql/prev_db_creation.sql
+++ /dev/null
@@ -1,184 +0,0 @@
-- phpMyAdmin SQL Dump
--- version 4.7.7
--- https://www.phpmyadmin.net/
---
--- Host: db
--- Generation Time: Feb 04, 2018 at 01:27 PM
--- Server version: 5.7.20
--- PHP Version: 7.1.9
-
-SET SQL_MODE = "NO_AUTO_VALUE_ON_ZERO";
-SET AUTOCOMMIT = 0;
-START TRANSACTION;
-SET time_zone = "+00:00";
-
-
-/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
-/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
-/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
-/*!40101 SET NAMES utf8mb4 */;
-
---
--- Database: `tts`
---
-
--- --------------------------------------------------------
-
---
--- Table structure for table `course`
---
-
-CREATE TABLE `course` (
-  `id` int(11) NOT NULL,
-  `course_id` int(11) NOT NULL,
-  `faculty_id` int(11) NOT NULL,
-  `name` varchar(200) NOT NULL,
-  `acronym` varchar(10) NOT NULL,
-  `course_type` varchar(2) NOT NULL,
-  `year` int(11) NOT NULL,
-  `url` varchar(2000) NOT NULL,
-  `plan_url` varchar(2000) NOT NULL,
-  `last_updated` datetime NOT NULL
-) ENGINE=InnoDB DEFAULT CHARSET=latin1;
-
--- --------------------------------------------------------
-
---
--- Table structure for table `course_unit`
---
-
-CREATE TABLE `course_unit` (
-  `id` int(11) NOT NULL,
-  `course_unit_id` int(11) NOT NULL,
-  `course_id` int(11) NOT NULL,
-  `name` varchar(200) NOT NULL,
-  `acronym` varchar(16) NOT NULL,
-  `url` varchar(2000) NOT NULL,
-  `course_year` tinyint(4) NOT NULL,
-  `semester` tinyint(4) NOT NULL,
-  `year` smallint(6) NOT NULL,
-  `schedule_url` varchar(2000) DEFAULT NULL,
-  `last_updated` datetime NOT NULL
-) ENGINE=InnoDB DEFAULT CHARSET=latin1;
-
--- --------------------------------------------------------
-
---
--- Table structure for table `faculty`
---
-
-CREATE TABLE `faculty` (
-  `id` int(11) NOT NULL,
-  `acronym` varchar(10) DEFAULT NULL,
-  `name` text,
-  `last_updated` datetime NOT NULL
-) ENGINE=InnoDB DEFAULT CHARSET=latin1;
-
--- --------------------------------------------------------
-
---
--- Table structure for table `schedule`
---
-
-CREATE TABLE `schedule` (
-  `id` int(11) NOT NULL,
-  `day` tinyint(3) UNSIGNED NOT NULL,
-  `duration` decimal(3,1) UNSIGNED NOT NULL,
-  `start_time` decimal(3,1) UNSIGNED NOT NULL,
-  `location` varchar(16) NOT NULL,
-  `lesson_type` varchar(3) NOT NULL,
-  `teacher_acronym` varchar(16) NOT NULL,
-  `course_unit_id` int(11) NOT NULL,
-  `last_updated` datetime NOT NULL,
-  `class_name` varchar(16) NOT NULL,
-  `composed_class_name` varchar(16) DEFAULT NULL
-) ENGINE=InnoDB DEFAULT CHARSET=latin1;
-
---
--- Indexes for dumped tables
---
-
---
--- Indexes for table `course`
---
-ALTER TABLE `course`
-  ADD PRIMARY KEY (`id`),
-  ADD UNIQUE KEY `course_id` (`course_id`,`faculty_id`,`year`),
-  ADD KEY `faculty_id` (`faculty_id`);
-
---
--- Indexes for table `course_unit`
---
-ALTER TABLE `course_unit`
-  ADD PRIMARY KEY (`id`),
-  ADD UNIQUE KEY `uniqueness` (`course_unit_id`,`course_id`,`year`,`semester`) USING BTREE,
-  ADD KEY `course_id` (`course_id`);
-
---
--- Indexes for table `faculty`
---
-ALTER TABLE `faculty`
-  ADD PRIMARY KEY (`id`),
-  ADD UNIQUE KEY `acronym` (`acronym`);
-
---
--- Indexes for table `schedule`
---
-ALTER TABLE `schedule`
-  ADD PRIMARY KEY (`id`),
-  ADD KEY `course_unit_id` (`course_unit_id`) USING BTREE;
-
---
--- AUTO_INCREMENT for dumped tables
---
-
---
--- AUTO_INCREMENT for table `course`
---
-ALTER TABLE `course`
-  MODIFY `id` int(11) NOT NULL AUTO_INCREMENT;
-
---
--- AUTO_INCREMENT for table `course_unit`
---
-ALTER TABLE `course_unit`
-  MODIFY `id` int(11) NOT NULL AUTO_INCREMENT;
-
---
--- AUTO_INCREMENT for table `faculty`
---
-ALTER TABLE `faculty`
-  MODIFY `id` int(11) NOT NULL AUTO_INCREMENT;
-
---
--- AUTO_INCREMENT for table `schedule`
---
-ALTER TABLE `schedule`
-  MODIFY `id` int(11) NOT NULL AUTO_INCREMENT;
-
---
--- Constraints for dumped tables
---
-
---
--- Constraints for table `course`
---
-ALTER TABLE `course`
-  ADD CONSTRAINT `course_ibfk_1` FOREIGN KEY (`faculty_id`) REFERENCES `faculty` (`id`) ON DELETE CASCADE ON UPDATE CASCADE;
-
---
--- Constraints for table `course_unit`
---
-ALTER TABLE `course_unit`
-  ADD CONSTRAINT `course_unit_ibfk_1` FOREIGN KEY (`course_id`) REFERENCES `course` (`id`) ON DELETE CASCADE ON UPDATE CASCADE;
-
---
--- Constraints for table `schedule`
---
-ALTER TABLE `schedule`
-  ADD CONSTRAINT `schedule_ibfk_1` FOREIGN KEY (`course_unit_id`) REFERENCES `course_unit` (`id`) ON DELETE CASCADE ON UPDATE CASCADE;
-COMMIT;
-
-/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
-/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
-/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
\ No newline at end of file
diff --git a/phpmyadmin/Dockerfile b/phpmyadmin/Dockerfile
deleted file mode 100644
index 7b8f076..0000000
--- a/phpmyadmin/Dockerfile
+++ /dev/null
@@ -1 +0,0 @@
-FROM phpmyadmin/phpmyadmin:4.7
\ No newline at end of file

From e8128691ba5fa27511588f579e3817c4dfedeaea Mon Sep 17 00:00:00 2001
From: Juliane Marubayashi <juliane.marubayashi@gmail.com>
Date: Sun, 26 Dec 2021 22:48:01 +0000
Subject: [PATCH 19/20] Update: add order which each file will be added to the
 db

---
 csv_to_sql/src/__main__.py | 16 +++++++++++++++-
 csv_to_sql/src/parser.py   |  4 ++--
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/csv_to_sql/src/__main__.py b/csv_to_sql/src/__main__.py
index e17be7b..91490e7 100644
--- a/csv_to_sql/src/__main__.py
+++ b/csv_to_sql/src/__main__.py
@@ -4,9 +4,23 @@
 from .course_faculty import Course_Faculty 
 
 import configparser as cp 
+import os 
+
+# Rename the files by the order it should be executed in the database. 
+def rename_file(order: int, filename: str):
+    path = "./data/sql"
+    new_name = f"{path}/{order}_{filename}.sql"
+    old_name = f"{path}/{filename}.sql"
+    os.rename(old_name, new_name)
+
+# Order that the files should be added to the database. 
+order = ["faculty", "course", "course_faculty"]
 
 config = cp.ConfigParser()
 config.read("./configparser.ini")
 faculty = Faculty(config).parse()
 course = Course(config).parse()
-course_faculty = Course_Faculty(config).parse()
\ No newline at end of file
+course_faculty = Course_Faculty(config).parse() 
+
+for i, filename in enumerate(order): 
+    rename_file(i+1, filename)
diff --git a/csv_to_sql/src/parser.py b/csv_to_sql/src/parser.py
index 0d0e543..f7f2363 100644
--- a/csv_to_sql/src/parser.py
+++ b/csv_to_sql/src/parser.py
@@ -9,7 +9,7 @@ def __init__(self, table_name: str, csv_name: str = None):
         self.csv_name = csv_name 
 
         # Creating sql.
-        self.f_sql = open(self.get_output_filepath(), "w")    
+        self.f_sql = open(self.get_output_filepath(), "w", encoding="utf-8")    
 
         # Reading csv. 
         if csv_name is not None: 
@@ -47,4 +47,4 @@ def get_values(self, id_, row, with_id=True):
 
     @abstractclassmethod 
     def parser(self):
-        pass 
\ No newline at end of file
+        pass 

From e590960554a7833e181381e55ee931bbf4195f03 Mon Sep 17 00:00:00 2001
From: Juliane Marubayashi <juliane.marubayashi@gmail.com>
Date: Sun, 26 Dec 2021 22:48:16 +0000
Subject: [PATCH 20/20] Update: requirements

---
 csv_to_sql/requirements.txt | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/csv_to_sql/requirements.txt b/csv_to_sql/requirements.txt
index e69de29..0e8d4f1 100644
--- a/csv_to_sql/requirements.txt
+++ b/csv_to_sql/requirements.txt
@@ -0,0 +1,10 @@
+astunparse==1.6.3
+Jinja2==3.0.3
+MarkupSafe==2.0.1
+numpy==1.21.4
+pandas==1.3.4
+pdoc==8.0.1
+Pygments==2.10.0
+python-dateutil==2.8.2
+pytz==2021.3
+six==1.16.0