Skip to content

Commit

Permalink
start reading tar file as input
Browse files Browse the repository at this point in the history
  • Loading branch information
Jan Griesfeller committed Dec 19, 2023
1 parent f896739 commit f318d40
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 1 deletion.
24 changes: 24 additions & 0 deletions src/pyaro_readers/aeronetsdareader/AeronetSdaTimeseriesReader.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
)
import numpy as np
import requests
import tarfile
from pyaro.timeseries import (
AutoFilterReaderEngine,
Data,
Expand Down Expand Up @@ -101,6 +102,29 @@ def __init__(
# read only 1st file here
break
except BadZipFile:
# try reading as tar.gz file
# Aeronet's tar files differ from the zip files by providing one file per station instead of one file
# with all stations
# the general format of the data is the same though.
# so we just keep the header lines of the 1st station, and add all data lines of all stations
# That way we get to the same file format as the zip file
r = requests.get(self._filename)
with tarfile.open(fileobj=BytesIO(r.raw.read()), mode="r") as tf:
lines = []
for _midx, member in enumerate(tf.getmembers()):
f = tf.extractfile(member)
if _midx == 0:
lines.extend(
[line.decode("utf-8") for line in f.readlines()]
)
else:
# skip the header lines
for _hidx in range(HEADER_LINE_NO):
dummy = f.readline()

lines.extend([line.decode("utf-8") for line in f.readlines()])
except tarfile.TarError:
# read as text file
response = urlopen(self._filename)
lines = [line.decode("utf-8") for line in response.readlines()]

Expand Down
21 changes: 20 additions & 1 deletion tests/test_AERONETSDATimeSeriesReader.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
from pyaro.timeseries.Wrappers import VariableNameChangingReader

TEST_URL = "https://pyaerocom.met.no/pyaro-suppl/testdata/aeronetsda_testdata.csv"
TEST_TAR_URL = (
"https://pyaerocom.met.no/pyaro-suppl/testdata/SDA_Level20_Daily_V3_testdata.tar.gz"
)
TEST_ZIP_URL = (
"https://pyaerocom.met.no/pyaro-suppl/testdata/aeronetsda_testdata.csv.zip"
)
Expand All @@ -29,6 +32,22 @@ def external_resource_available(self, url):
except:
return False

def test_dl_data_tared(self):
if not self.external_resource_available(TEST_TAR_URL):
self.skipTest(f"external resource not available: {TEST_TAR_URL}")
engine = pyaro.list_timeseries_engines()["aeronetsdareader"]
with engine.open(
TEST_TAR_URL,
filters=[],
fill_country_flag=False,
tqdm_desc="test_sda_dl_data_tared",
) as ts:
count = 0
for var in ts.variables():
count += len(ts.data(var))
self.assertEqual(count, 79944)
self.assertEqual(len(ts.stations()), 4)

def test_dl_data_unzipped(self):
if not self.external_resource_available(TEST_URL):
self.skipTest(f"external resource not available: {TEST_URL}")
Expand Down Expand Up @@ -121,7 +140,7 @@ def test_variables_filter(self):
engine = pyaro.list_timeseries_engines()["aeronetsdareader"]
new_var_name = "od550gt1aer"
vfilter = pyaro.timeseries.filters.get(
"variables", reader_to_new={"Coarse_Mode_AOD_500nm[tau_c]": new_var_name}
"variables", reader_to_new={"AODGT1_550nm": new_var_name}
)
with engine.open(
self.file, filters=[vfilter], tqdm_desc="test_sda_variables_filter"
Expand Down

0 comments on commit f318d40

Please sign in to comment.