Skip to content

Commit

Permalink
add tests including downloading
Browse files Browse the repository at this point in the history
  • Loading branch information
jgriesfeller committed Dec 6, 2023
1 parent bdc043e commit 46c15eb
Show file tree
Hide file tree
Showing 2 changed files with 135 additions and 84 deletions.
192 changes: 112 additions & 80 deletions src/pyaro_readers/aeronetsunreader/AeronetSunTimeseriesReader.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,12 @@
Engine,
)

# import requests, zipfile, io
import requests, zipfile, io
import geocoder

from urllib.parse import urlparse


# from tqdm import tqdm

# default URL
Expand Down Expand Up @@ -72,87 +75,109 @@ def __init__(
self._header = []
_laststatstr = ""

with open(self._filename, newline="") as csvfile:
for _hidx in range(HEADER_LINE_NO - 1):
self._header.append(csvfile.readline())
# get fields from header line although csv can do that as well since we might want to adjust these names
self._fields = csvfile.readline().strip().split(",")

crd = csv.DictReader(csvfile, fieldnames=self._fields, **csvreader_kwargs)
for _ridx, row in enumerate(crd):
if row[SITE_NAME] != _laststatstr:
print(f"reading station {row[SITE_NAME]}...")
_laststatstr = row[SITE_NAME]
# new station
station = row[SITE_NAME]
lon = float(row[LON_NAME])
lat = float(row[LAT_NAME])
alt = float(row["Site_Elevation(m)"])
if fill_country_flag:
try:
country = geocoder.osm([lat, lon], method="reverse").json[
"country_code"
]
country = country.upper()
except:
country = "NN"
else:
country = "NN"
# print(country)
# units of Aeronet data are always 1
units = "1"
if not station in self._stations:
self._stations[station] = Station(
{
"station": station,
"longitude": lon,
"latitude": lat,
"altitude": alt,
"country": country,
"url": "",
"long_name": station,
}
)
# every line contains all variables, sometimes filled with NaNs though
if _ridx == 0:
for variable in DATA_VARS:
if variable in self._data:
da = self._data[variable]
if da.units != units:
raise Exception(
f"unit change from '{da.units}' to 'units'"
)
else:
da = NpStructuredData(variable, units)
self._data[variable] = da

day, month, year = row[DATE_NAME].split(":")
datestring = "-".join([year, month, day])
datestring = "T".join([datestring, row[TIME_NAME]])
start = np.datetime64(datestring)
end = start

ts_dummy_data = {}
for variable in DATA_VARS:
# check if file is a URL
if self.is_valid_url(self._filename):
from urllib.request import urlopen
from io import BytesIO
from zipfile import ZipFile

# try to open as zipfile
try:
r = requests.get(self._filename)
zip_ref = ZipFile(BytesIO(r.content))
for file in zip_ref.namelist():
with zip_ref.open(file) as response:
lines = [line.decode("utf-8") for line in response.readlines()]
# read only 1st file here
break
except:
response = urlopen(self._filename)
lines = [line.decode("utf-8") for line in response.readlines()]

else:
with open(self._filename, newline="") as csvfile:
lines = csvfile.readlines()

for _hidx in range(HEADER_LINE_NO - 1):
self._header.append(lines.pop(0))
# get fields from header line although csv can do that as well since we might want to adjust these names
self._fields = lines.pop(0).strip().split(",")

crd = csv.DictReader(lines, fieldnames=self._fields, **csvreader_kwargs)
for _ridx, row in enumerate(crd):
if row[SITE_NAME] != _laststatstr:
print(f"reading station {row[SITE_NAME]}...")
_laststatstr = row[SITE_NAME]
# new station
station = row[SITE_NAME]
lon = float(row[LON_NAME])
lat = float(row[LAT_NAME])
alt = float(row["Site_Elevation(m)"])
if fill_country_flag:
try:
value = float(row[variable])
if value == NAN_VAL:
value = np.nan
# store value in ts_dummy_data, so we don't need to perform the nan check
# for each component of calculated values again
ts_dummy_data[variable] = value
except KeyError:
# computed variable
if variable == AOD550_NAME:
value = self.compute_od_from_angstromexp(
0.55,
ts_dummy_data[AOD440_NAME],
0.44,
ts_dummy_data[ANG4487_NAME],
)
self._data[variable].append(
value, station, lat, lon, alt, start, end, Flag.VALID, np.nan
country = geocoder.osm([lat, lon], method="reverse").json[
"country_code"
]
country = country.upper()
except:
country = "NN"
else:
country = "NN"

# units of Aeronet data are always 1
units = "1"
if not station in self._stations:
self._stations[station] = Station(
{
"station": station,
"longitude": lon,
"latitude": lat,
"altitude": alt,
"country": country,
"url": "",
"long_name": station,
}
)
# every line contains all variables, sometimes filled with NaNs though
if _ridx == 0:
for variable in DATA_VARS:
if variable in self._data:
da = self._data[variable]
if da.units != units:
raise Exception(
f"unit change from '{da.units}' to 'units'"
)
else:
da = NpStructuredData(variable, units)
self._data[variable] = da

day, month, year = row[DATE_NAME].split(":")
datestring = "-".join([year, month, day])
datestring = "T".join([datestring, row[TIME_NAME]])
start = np.datetime64(datestring)
end = start

ts_dummy_data = {}
for variable in DATA_VARS:
try:
value = float(row[variable])
if value == NAN_VAL:
value = np.nan
# store value in ts_dummy_data, so we don't need to perform the nan check
# for each component of calculated values again
ts_dummy_data[variable] = value
except KeyError:
# computed variable
if variable == AOD550_NAME:
value = self.compute_od_from_angstromexp(
0.55,
ts_dummy_data[AOD440_NAME],
0.44,
ts_dummy_data[ANG4487_NAME],
)
self._data[variable].append(
value, station, lat, lon, alt, start, end, Flag.VALID, np.nan
)

def _unfiltered_data(self, varname) -> Data:
return self._data[varname]
Expand Down Expand Up @@ -207,6 +232,13 @@ def calc_angstroem_coeff(
"""
return -np.log(od1 / od2) / np.log(wl1 / wl2)

def is_valid_url(self, url):
try:
result = urlparse(url)
return all([result.scheme, result.netloc])
except ValueError:
return False


class AeronetSunTimeseriesEngine(Engine):
def open(self, filename, *args, **kwargs) -> AeronetSunTimeseriesReader:
Expand Down
27 changes: 23 additions & 4 deletions tests/test_AERONETTimeSeriesReader.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
import unittest
import os

import numpy as np
import pyaro
import pyaro.timeseries
from pyaro.timeseries.Wrappers import VariableNameChangingReader

TEST_URL = "https://pyaerocom.met.no/pyaro-suppl/testdata/aeronetsun_testdata.csv"
TEST_ZIP_URL = (
"https://pyaerocom.met.no/pyaro-suppl/testdata/aeronetsun_testdata.csv.zip"
)


class TestAERONETTimeSeriesReader(unittest.TestCase):
file = os.path.join(
Expand All @@ -14,6 +18,24 @@ class TestAERONETTimeSeriesReader(unittest.TestCase):
"aeronetsun_testdata.csv",
)

def test_dl_data_unzipped(self):
engine = pyaro.list_timeseries_engines()["aeronetsunreader"]
with engine.open(TEST_URL, filters=[], fill_country_flag=False) as ts:
count = 0
for var in ts.variables():
count += len(ts.data(var))
self.assertEqual(count, 49965)
self.assertEqual(len(ts.stations()), 4)

def test_dl_data_zipped(self):
engine = pyaro.list_timeseries_engines()["aeronetsunreader"]
with engine.open(TEST_ZIP_URL, filters=[], fill_country_flag=False) as ts:
count = 0
for var in ts.variables():
count += len(ts.data(var))
self.assertEqual(count, 49965)
self.assertEqual(len(ts.stations()), 4)

def test_init(self):
engine = pyaro.list_timeseries_engines()["aeronetsunreader"]
self.assertEqual(engine.url(), "https://github.com/metno/pyaro-readers")
Expand Down Expand Up @@ -55,9 +77,6 @@ def test_variables_filter(self):
with engine.open(self.file, filters=[vfilter]) as ts:
self.assertEqual(ts.data(new_var_name).variable, new_var_name)

def test_downloaded_file(self):
pass


if __name__ == "__main__":
unittest.main()

0 comments on commit 46c15eb

Please sign in to comment.