Skip to content

Commit

Permalink
Merge pull request #5 from metno/4-make-the-aeronetsunreader-really-r…
Browse files Browse the repository at this point in the history
…ead-aeronet-sun-data

4 make the aeronetsunreader really read aeronet sun data
  • Loading branch information
jgriesfeller authored Dec 7, 2023
2 parents 532461d + 5e45559 commit 6a953e0
Show file tree
Hide file tree
Showing 5 changed files with 10,323 additions and 66 deletions.
35 changes: 35 additions & 0 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,38 @@ jobs:
- name: Run linters
continue-on-error: true
run: tox -e lint

venv:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
python-version: [ '3.11' ]
experimental: [ false ]
os: [ ubuntu-22.04 ]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Python info
run: |
which python
python --version
- name: Cache pip and tox
uses: actions/cache@v3
if: ${{ ! matrix.experimental }}
with:
path: |
~/.cache/pip
~/.cache/srtm
.tox
key: ${{ matrix.os }}-tox-${{ matrix.python-version }}-${{ hashFiles('setup.cfg', 'pyproject.toml') }}
- name: Install Tox
run: |
python -m pip install --upgrade pip
python -m pip install tox
- name: Run test
continue-on-error: ${{ matrix.experimental }}
run: tox -e py
10 changes: 6 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,12 @@ classifiers = [
"Topic :: Scientific/Engineering :: Atmospheric Science",
]
requires-python = ">=3.9"
dependencies = ["pyaro@git+https://github.com/metno/pyaro"]
dependencies = [
"pyaro@git+https://github.com/metno/pyaro",
"requests",
"geocoder",
"tqdm",
]

[project.urls]
"Homepage" = "https://github.com/metno/pyaro-readers"
Expand Down Expand Up @@ -49,9 +54,6 @@ skip_missing_interpreters = True
isolated_build = True
envlist =
py39
py310
py311
format
Expand Down
262 changes: 200 additions & 62 deletions src/pyaro_readers/aeronetsunreader/AeronetSunTimeseriesReader.py
Original file line number Diff line number Diff line change
@@ -1,73 +1,183 @@
import csv
from io import BytesIO
from urllib.parse import urlparse
from urllib.request import urlopen
from zipfile import BadZipFile, ZipFile

import geocoder
import numpy as np
from pyaro.timeseries import Data, NpStructuredData, Flag, Reader, Station, Engine
import requests
from pyaro.timeseries import (
AutoFilterReaderEngine,
Data,
Engine,
Flag,
NpStructuredData,
Station,
)
from tqdm import tqdm

# default URL
BASE_URL = "https://aeronet.gsfc.nasa.gov/data_push/V3/All_Sites_Times_Daily_Averages_AOD20.zip"
# number of lines to read before the reading is handed to Pythobn's csv reader
HEADER_LINE_NO = 7
DELIMITER = ","
#
NAN_VAL = -999.0
# update progress bar every N lines...
PG_UPDATE_LINES = 100
# main variables to store
LAT_NAME = "Site_Latitude(Degrees)"
LON_NAME = "Site_Longitude(Degrees)"
ALT_NAME = "Site_Elevation(m)"
SITE_NAME = "AERONET_Site_Name"
DATE_NAME = "Date(dd:mm:yyyy)"
TIME_NAME: str = "Time(hh:mm:ss)"
AOD500_NAME = "AOD_500nm"
ANG4487_NAME = "440-870_Angstrom_Exponent"
AOD440_NAME = "AOD_440nm"
AOD870_NAME = "AOD_870nm"
AOD550_NAME = "AOD_550nm"

DATA_VARS = [AOD500_NAME, ANG4487_NAME, AOD440_NAME, AOD870_NAME]
COMPUTED_VARS = [AOD550_NAME]
# The computed variables have to be named after the read ones, otherwise the calculation will fail!
DATA_VARS.extend(COMPUTED_VARS)

FILL_COUNTRY_FLAG = False


class AeronetSunTimeseriesReader(Reader):
class AeronetSunTimeseriesReader(AutoFilterReaderEngine.AutoFilterReader):
def __init__(
self,
filename,
columns=dict(
variable=0,
station=1,
longitude=2,
latitude=3,
value=4,
units=5,
start_time=6,
end_time=7,
),
variable_units={"SOx": "Gg", "NOx": "Mg"},
csvreader_kwargs={"delimiter": ","},
csvreader_kwargs={"delimiter": DELIMITER},
filters=[],
fill_country_flag: bool = FILL_COUNTRY_FLAG,
):
"""open a new csv timeseries-reader
:param filename_or_obj_or_url: path-like object to csv-file
:param filename_or_obj_or_url: path-like object to csv-file
input file looks like this:
AERONET Version 3;
Cuiaba
Version 3: AOD Level 2.0
The following data are automatically cloud cleared and quality assured with pre-field and post-field calibration applied.
Contact: PI=Pawan Gupta and Elena Lind; PI [email protected] and [email protected]
Daily Averages,UNITS can be found at,,, https://aeronet.gsfc.nasa.gov/new_web/units.html
AERONET_Site,Date(dd:mm:yyyy),Time(hh:mm:ss),Day_of_Year,AOD_1640nm,AOD_1020nm,AOD_870nm,AOD_865nm,AOD_779nm,AOD_675nm,AOD_667nm,AOD_620nm,AOD_560nm,AOD_555nm,AOD_551nm,AOD_532nm,AOD_531nm,AOD_510nm,AOD_500nm,AOD_490nm,AOD_443nm,AOD_440nm,AOD_412nm,AOD_400nm,AOD_380nm,AOD_340nm,Precipitable_Water(cm),AOD_681nm,AOD_709nm,AOD_Empty,AOD_Empty,AOD_Empty,AOD_Empty,AOD_Empty,440-870_Angstrom_Exponent,380-500_Angstrom_Exponent,440-675_Angstrom_Exponent,500-870_Angstrom_Exponent,340-440_Angstrom_Exponent,440-675_Angstrom_Exponent[Polar],N[AOD_1640nm],N[AOD_1020nm],N[AOD_870nm],N[AOD_865nm],N[AOD_779nm],N[AOD_675nm],N[AOD_667nm],N[AOD_620nm],N[AOD_560nm],N[AOD_555nm],N[AOD_551nm],N[AOD_532nm],N[AOD_531nm],N[AOD_510nm],N[AOD_500nm],N[AOD_490nm],N[AOD_443nm],N[AOD_440nm],N[AOD_412nm],N[AOD_400nm],N[AOD_380nm],N[AOD_340nm],N[Precipitable_Water(cm)],N[AOD_681nm],N[AOD_709nm],N[AOD_Empty],N[AOD_Empty],N[AOD_Empty],N[AOD_Empty],N[AOD_Empty],N[440-870_Angstrom_Exponent],N[380-500_Angstrom_Exponent],N[440-675_Angstrom_Exponent],N[500-870_Angstrom_Exponent],N[340-440_Angstrom_Exponent],N[440-675_Angstrom_Exponent[Polar]],Data_Quality_Level,AERONET_Instrument_Number,AERONET_Site_Name,Site_Latitude(Degrees),Site_Longitude(Degrees),Site_Elevation(m)
Cuiaba,16:06:1993,12:00:00,167,-999.,0.081800,0.088421,-999.,-999.,0.095266,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,0.117581,-999.,-999.,-999.,0.149887,2.487799,-999.,-999.,-999.,-999.,-999.,-999.,-999.,0.424234,-999.,0.497630,-999.,0.924333,-999.,0,3,3,0,0,3,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,3,6,0,0,0,0,0,0,0,3,0,3,0,3,0,lev20,3,Cuiaba,-15.555244,-56.070214,234.000000
Cuiaba,17:06:1993,12:00:00,168,-999.,0.092246,0.099877,-999.,-999.,0.110915,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,0.144628,-999.,-999.,-999.,0.187276,2.592902,-999.,-999.,-999.,-999.,-999.,-999.,-999.,0.547807,-999.,0.628609,-999.,0.988320,-999.,0,16,16,0,0,16,0,0,0,0,0,0,0,0,0,0,0,16,0,0,0,16,32,0,0,0,0,0,0,0,16,0,16,0,16,0,lev20,3,Cuiaba,-15.555244,-56.070214,234.000000
"""
self._filename = filename
self._stations = {}
self._data = {} # var -> {data-array}
self._filters = filters
with open(self._filename, newline="") as csvfile:
crd = csv.reader(csvfile, **csvreader_kwargs)
for row in crd:
variable = row[columns["variable"]]
value = float(row[columns["value"]])
station = row[columns["station"]]
lon = float(row[columns["longitude"]])
lat = float(row[columns["latitude"]])
start = np.datetime64(row[columns["start_time"]])
end = np.datetime64(row[columns["end_time"]])
if "altitude" in columns:
alt = float(row[columns["altitude"]])
else:
alt = 0
if "units" in columns:
units = row[columns["units"]]
else:
units = variable_units[variable]
self._header = []
_laststatstr = ""

if variable in self._data:
da = self._data[variable]
if da.units != units:
raise Exception(f"unit change from '{da.units}' to 'units'")
# check if file is a URL
if self.is_valid_url(self._filename):
# try to open as zipfile
try:
r = requests.get(self._filename)
zip_ref = ZipFile(BytesIO(r.content))
for file in zip_ref.namelist():
with zip_ref.open(file) as response:
lines = [line.decode("utf-8") for line in response.readlines()]
# read only 1st file here
break
except BadZipFile:
response = urlopen(self._filename)
lines = [line.decode("utf-8") for line in response.readlines()]

else:
with open(self._filename, newline="") as csvfile:
lines = csvfile.readlines()

for _hidx in range(HEADER_LINE_NO - 1):
self._header.append(lines.pop(0))
# get fields from header line although csv can do that as well since we might want to adjust these names
self._fields = lines.pop(0).strip().split(",")

crd = csv.DictReader(lines, fieldnames=self._fields, **csvreader_kwargs)
bar = tqdm(total=len(lines))
for _ridx, row in enumerate(crd):
bar.update(1)
if row[SITE_NAME] != _laststatstr:
_laststatstr = row[SITE_NAME]
# new station
station = row[SITE_NAME]
lon = float(row[LON_NAME])
lat = float(row[LAT_NAME])
alt = float(row["Site_Elevation(m)"])
if fill_country_flag:
try:
country = geocoder.osm([lat, lon], method="reverse").json[
"country_code"
]
country = country.upper()
except:
country = "NN"
else:
da = NpStructuredData(variable, units)
self._data[variable] = da
da.append(value, station, lat, lon, alt, start, end, Flag.VALID, np.nan)
country = "NN"

# units of Aeronet data are always 1
units = "1"
if not station in self._stations:
self._stations[station] = Station(
{
"station": station,
"longitude": lon,
"latitude": lat,
"altitude": 0,
"country": "NO",
"altitude": alt,
"country": country,
"url": "",
"long_name": station,
}
)
# every line contains all variables, sometimes filled with NaNs though
if _ridx == 0:
for variable in DATA_VARS:
if variable in self._data:
da = self._data[variable]
if da.units != units:
raise Exception(
f"unit change from '{da.units}' to 'units'"
)
else:
da = NpStructuredData(variable, units)
self._data[variable] = da

day, month, year = row[DATE_NAME].split(":")
datestring = "-".join([year, month, day])
datestring = "T".join([datestring, row[TIME_NAME]])
start = np.datetime64(datestring)
end = start

ts_dummy_data = {}
for variable in DATA_VARS:
try:
value = float(row[variable])
if value == NAN_VAL:
value = np.nan
# store value in ts_dummy_data, so we don't need to perform the nan check
# for each component of calculated values again
ts_dummy_data[variable] = value
except KeyError:
# computed variable
if variable == AOD550_NAME:
value = self.compute_od_from_angstromexp(
0.55,
ts_dummy_data[AOD440_NAME],
0.44,
ts_dummy_data[ANG4487_NAME],
)
self._data[variable].append(
value, station, lat, lon, alt, start, end, Flag.VALID, np.nan
)
bar.close()

def _unfiltered_data(self, varname) -> Data:
return self._data[varname]
Expand All @@ -78,29 +188,57 @@ def _unfiltered_stations(self) -> dict[str, Station]:
def _unfiltered_variables(self) -> list[str]:
return list(self._data.keys())

def variables(self) -> list[str]:
vars = self._unfiltered_variables()
for fi in self._filters:
vars = fi.filter_variables(vars)
return vars

def stations(self) -> dict[str, Station]:
stats = self._unfiltered_stations()
for fi in self._filters:
stats = fi.filter_stations(stats)
return stats

def data(self, varname) -> Data:
dat = self._unfiltered_data(varname)
stats = self._unfiltered_stations()
vars = self._unfiltered_variables()
for fi in self._filters:
dat = fi.filter_data(dat, stats, vars)
return dat

def close(self):
pass

def compute_od_from_angstromexp(
self, to_lambda: float, od_ref: float, lambda_ref: float, angstrom_coeff: float
) -> float:
"""Compute AOD at specified wavelength
Uses Angstrom coefficient and reference AOD to compute the
corresponding wavelength shifted AOD
Parameters
----------
to_lambda : :obj:`float` or :obj:`ndarray`
wavelength for which AOD is calculated
od_ref : :obj:`float` or :obj:`ndarray`
reference AOD
lambda_ref : :obj:`float` or :obj:`ndarray`
wavelength corresponding to reference AOD
angstrom_coeff : :obj:`float` or :obj:`ndarray`
Angstrom coefficient
Returns
-------
:obj:`float` or :obj:`ndarray`
AOD(s) at shifted wavelength
"""
return od_ref * (lambda_ref / to_lambda) ** angstrom_coeff

def calc_angstroem_coeff(
self, od1: float, od2: float, wl1: float, wl2: float
) -> float:
"""
small helper method to calculate angstroem coefficient
:param od1:
:param od2:
:param wl1:
:param wl2:
:return:
"""
return -np.log(od1 / od2) / np.log(wl1 / wl2)

def is_valid_url(self, url):
try:
result = urlparse(url)
return all([result.scheme, result.netloc])
except ValueError:
return False


class AeronetSunTimeseriesEngine(Engine):
def open(self, filename, *args, **kwargs) -> AeronetSunTimeseriesReader:
Expand Down
Loading

0 comments on commit 6a953e0

Please sign in to comment.