Skip to content

Commit

Permalink
Reorder functionality and blacken the code
Browse files Browse the repository at this point in the history
* Used formatter black on code
* Remove test folder as it only contained a pseudo test to run the main part of the package
* Changed setup.py to run toplevel imported function
* Updated readme to explain usage of the package
* Updated package dependencies as I removed requests as external dependency
  • Loading branch information
wagnerpeer committed Jun 9, 2019
1 parent 8b824ad commit fc80701
Show file tree
Hide file tree
Showing 9 changed files with 146 additions and 122 deletions.
26 changes: 25 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,27 @@
# park-vorhersage

Dies ist ein Repository, um die Parkhaus-/Parkplatzdaten der Osnabücker Parkstätten Betriebsgesellschaft (OPG) zu analysieren.
Dies ist ein Repository, um die Parkhaus-/Parkplatzdaten der Osnabücker
Parkstätten Betriebsgesellschaft (OPG) zu analysieren.


## Usage

Wenn ihr das Paket benutzen möchtet, müsst ihr Folgendes tun:

```python
import park_vorhersage as pv

pv.init()
pv.scrape_and_store()
```

Der oben stehende Code importiert das Paket, die `init()` Funktion erstellt
eine Datenbank und die `scrape_and_store()` Funktion stellt die eigentliche
Hauptaktion des Paketes zur Verfügung.

Da der zentrale Bestandteil des Paketes nur das Abgreifen und speichern der
Daten der OPG Website ist, ist diese Funktion besonders einfach zu erreichen.
Solltet ihr das Paket über PIP installiert haben, wird ein sogenannter
"Entry Point" generiert. Hierdurch könnt ihr nach der Installation einfach
den Befehl `parkvorhersage` auf der Kommandozeile ausführen und es wird
automatisch die Funktion `scrape_and_store()` ausgeführt.
2 changes: 0 additions & 2 deletions environment.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
name: codefor

dependencies:
- BeautifulSoup4
- pytz
- requests
- sqlalchemy
43 changes: 18 additions & 25 deletions park_vorhersage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,40 +12,33 @@
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker


__all__ = ['Session']

__all__ = ["Session"]

LOGGING_CONFIGURATION = {
'version': 1,
'formatters': {
'default': {
'format': '%(asctime)s %(levelname)-8s %(name)-15s %(message)s',
'datefmt': '%Y-%m-%d %H:%M:%S',
"version": 1,
"formatters": {
"default": {
"format": "%(asctime)s %(levelname)-8s %(name)-15s %(message)s",
"datefmt": "%Y-%m-%d %H:%M:%S",
}
},
'handlers': {
'console': {
'class': 'logging.StreamHandler',
'level': 'INFO',
'formatter': 'default',
'stream': 'ext://sys.stdout',
"handlers": {
"console": {
"class": "logging.StreamHandler",
"level": "INFO",
"formatter": "default",
"stream": "ext://sys.stdout",
}
},
'loggers': {
'opg_scraper': {
'level': 'INFO',
'propagate': True,
'handlers': ['console'],
}
}
"loggers": {
"opg_scraper": {"level": "INFO", "propagate": True, "handlers": ["console"]}
},
}


logging.config.dictConfig(LOGGING_CONFIGURATION)


_engine = create_engine('sqlite:///opg.db', echo=True)

_engine = create_engine("sqlite:///opg.db", echo=True)

Session = sessionmaker(bind=_engine)

from .controler import *
49 changes: 7 additions & 42 deletions park_vorhersage/controler.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,55 +7,20 @@
information from the official OPG website https://www.parken-osnabrueck.de/
It can also be used to create the database from the schema definition.
"""
__all__ = ["init", "scrape_and_store"]


from . import Session

from .storage import Base, ParkingRamp, Capacity
from .storage import Base, Capacity, ParkingRamp, store
from .scraper import scrape


def create():
def init():
session = Session()
Base.metadata.create_all(session.bind)


def scrape_website():
return scrape()


def _create_or_retrieve_objects(tstamp=None,
free_capacity=None,
total_capacity=None,
**kwargs):
session = Session()

parking_ramp = session.query(ParkingRamp).get(kwargs['identifier'])

if parking_ramp is None:
parking_ramp = ParkingRamp(**kwargs)

capacity = Capacity(tstamp=tstamp,
free_capacity=free_capacity,
total_capacity=total_capacity)
return parking_ramp, capacity


def store_data(data):
session = Session()

for ramp in data.values():
capacity = Capacity(**ramp.pop('utilization'))

parking_ramp = session.query(ParkingRamp).get(ramp['identifier'])

if parking_ramp is None:
parking_ramp = ParkingRamp(**ramp)
session.add(parking_ramp)

parking_ramp.capacities.append(capacity)

session.commit()


def scrape_and_store():
data = scrape_website()
store_data(data)
data = scrape()
store(data)
95 changes: 67 additions & 28 deletions park_vorhersage/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,71 +12,110 @@
import json
import logging
import re
import time
from urllib import robotparser

from bs4 import BeautifulSoup
from urllib import error, request, robotparser

import pytz

import requests
AGENT_NAME = "codeforosnabrueckbot"

TIMEZONE_OSNABRUECK = pytz.timezone("Europe/Berlin")

logger = logging.getLogger("opg_scraper." + __name__)


AGENT_NAME = 'codeforosnabrueckbot'
def raise_for_status(response):
"""Raises stored :class:`HTTPError`, if one occurred.
Taken from requests library.
See: https://2.python-requests.org/en/master/_modules/requests/models/#Response.raise_for_status
"""

TIMEZONE_OSNABRUECK = pytz.timezone('Europe/Berlin')
http_error_msg = ""
if isinstance(response.reason, bytes):
# We attempt to decode utf-8 first because some servers
# choose to localize their reason strings. If the string
# isn't utf-8, we fall back to iso-8859-1 for all other
# encodings. (See PR #3538)
try:
reason = response.reason.decode("utf-8")
except UnicodeDecodeError:
reason = response.reason.decode("iso-8859-1")
else:
reason = response.reason

if 400 <= response.status < 500:
http_error_msg = u"%s Client Error: %s for url: %s" % (
response.status,
reason,
response.url,
)

logger = logging.getLogger('opg_scraper.' + __name__)
elif 500 <= response.status < 600:
http_error_msg = u"%s Server Error: %s for url: %s" % (
response.status,
reason,
response.url,
)

if http_error_msg:
raise error.HTTPError(http_error_msg, response=response)


def raise_for_robots_txt(url, agent_name=AGENT_NAME):
parser = robotparser.RobotFileParser(url)
parser.read()

if not parser.can_fetch(agent_name, url):
raise PermissionError('The robots.txt permitts the crawling of the '
'site {}'.format(url))
raise PermissionError(
"The robots.txt permitts the crawling of the site {}".format(url)
)


def get_details(url=None):
response = requests.get(url)
response.raise_for_status()
utilization = json.loads(response.content.decode(response.encoding))
with request.urlopen(url) as response:
raise_for_status(response)
page_source = response.read().decode()

utilization = json.loads(page_source)

utilization['access_time'] = datetime.datetime.now(tz=TIMEZONE_OSNABRUECK)
utilization["access_time"] = datetime.datetime.now(tz=TIMEZONE_OSNABRUECK)

return utilization


def get_general_info():
url = r'https://www.parken-osnabrueck.de/'
url = r"https://www.parken-osnabrueck.de/"

raise_for_robots_txt(url)

response = requests.get(url)
response.raise_for_status()
page_source = response.content.decode(response.encoding)
with request.urlopen(url) as response:
raise_for_status(response)
page_source = response.read().decode()

parking_ramps = re.search(pattern='var parkingRampData = (\{.*\});',
string=page_source)
parking_ramps = re.search(
pattern=r"var parkingRampData = (\{.*\});", string=page_source
)

parking_ramps = json.loads(html.unescape(parking_ramps.group(1)))

utilization = get_details(r'https://www.parken-osnabrueck.de/index.php?type=427590&tx_tiopgparkhaeuserosnabrueck_parkingosnabruek[controller]=Parking&tx_tiopgparkhaeuserosnabrueck_parkingosnabruek[action]=ajaxCallGetUtilizationData&_=1556046149040')
utilization = get_details(
r"https://www.parken-osnabrueck.de/index.php?type=427590&tx_tiopgparkhaeuserosnabrueck_parkingosnabruek[controller]=Parking&tx_tiopgparkhaeuserosnabrueck_parkingosnabruek[action]=ajaxCallGetUtilizationData&_=1556046149040"
)

for identifier, ramp_data in parking_ramps.items():
logger.info('Parking Ramp Name: {}'.format(ramp_data['name']))
logger.info("Parking Ramp Name: %s", ramp_data["name"])

details = utilization['ramp-' + identifier]
details = utilization["ramp-" + identifier]

logger.info(('{available} von {capacity} frei.').format(**details))
logger.info(("{available} von {capacity} frei.").format(**details))

ramp_data['utilization'] = {'free_capacity': details['available'],
'total_capacity': details['capacity'],
'access_time': utilization['access_time']}
ramp_data["utilization"] = {
"free_capacity": details["available"],
"total_capacity": details["capacity"],
"access_time": utilization["access_time"],
}

del ramp_data['gmapsMarker']
del ramp_data["gmapsMarker"]

return parking_ramps

Expand Down
33 changes: 25 additions & 8 deletions park_vorhersage/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,13 @@
from sqlalchemy import Column, ForeignKey, Integer, String
from sqlalchemy.orm import relationship

from . import Session

Base = declarative_base()


class ParkingRamp(Base):
__tablename__ = 'parking_ramps'
__tablename__ = "parking_ramps"

identifier = Column(Integer, primary_key=True)
name = Column(String)
Expand All @@ -29,18 +30,34 @@ class ParkingRamp(Base):


class Capacity(Base):
__tablename__ = 'capacities'
__tablename__ = "capacities"

identifier = Column(Integer, primary_key=True)
access_time = Column(Integer)
free_capacity = Column(Integer)
total_capacity = Column(Integer)
parking_ramp_identifier = Column(Integer,
ForeignKey('parking_ramps.identifier'))
parking_ramp_identifier = Column(Integer, ForeignKey("parking_ramps.identifier"))

parking_ramp = relationship('ParkingRamp', back_populates='capacities')
parking_ramp = relationship("ParkingRamp", back_populates="capacities")


ParkingRamp.capacities = relationship('Capacity',
order_by=Capacity.identifier,
back_populates='parking_ramp')
ParkingRamp.capacities = relationship(
"Capacity", order_by=Capacity.identifier, back_populates="parking_ramp"
)


def store(data):
session = Session()

for ramp in data.values():
capacity = Capacity(**ramp.pop("utilization"))

parking_ramp = session.query(ParkingRamp).get(ramp["identifier"])

if parking_ramp is None:
parking_ramp = ParkingRamp(**ramp)
session.add(parking_ramp)

parking_ramp.capacities.append(capacity)

session.commit()
2 changes: 0 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
BeautifulSoup4
pytz
requests
sqlalchemy
8 changes: 4 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,19 @@
from setuptools import setup, find_packages

setup(name='park-vorhersage',
version='0.0.1',
version='0.0.2',
description='Dies ist ein Repository, um die Parkhaus-/Parkplatzdaten der Osnabücker Parkstätten Betriebsgesellschaft (OPG) zu analysieren.',
author_email='',
classifiers=['Development Status :: 3 - Alpha',
'License :: OSI Approved :: MIT License',
'Programming Language :: Python :: 3'],
keywords='parking scraper',
packages=find_packages(exclude=['test']),
install_requires=['requests', 'BeautifulSoup4', 'pytz', 'sqlalchemy'],
packages=find_packages(),
install_requires=['pytz', 'sqlalchemy'],
python_requires='>=3.5',
entry_points={
'console_scripts': [
'parkvorhersage=park_vorhersage.controler:scrape_and_store',
'parkvorhersage=park_vorhersage:scrape_and_store',
],
},
)
10 changes: 0 additions & 10 deletions test/test_controler.py

This file was deleted.

0 comments on commit fc80701

Please sign in to comment.