From 1456727abc66fe51bf9f9627d6af69ad17c33e2e Mon Sep 17 00:00:00 2001
From: Panos Mavrogiorgos <pmav99@gmail.com>
Date: Wed, 26 Jun 2024 13:22:39 +0300
Subject: [PATCH 01/15] chore: Update pre-commit

---
 .pre-commit-config.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index fdaf062..053a4d2 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -41,7 +41,7 @@ repos:
       - id: "shellcheck"
 
   - repo: "https://github.com/python-jsonschema/check-jsonschema"
-    rev: "0.28.5"
+    rev: "0.28.6"
     hooks:
       - id: "check-github-workflows"
       - id: "check-readthedocs"
@@ -60,7 +60,7 @@ repos:
 
   - repo: "https://github.com/charliermarsh/ruff-pre-commit"
     # Ruff version.
-    rev: 'v0.4.9'
+    rev: 'v0.4.10'
     hooks:
       - id: "ruff"
 

From 0209bcdf7d78a3b066aa0eff2c9e09de7f61ddea Mon Sep 17 00:00:00 2001
From: Panos Mavrogiorgos <pmav99@gmail.com>
Date: Wed, 26 Jun 2024 13:37:29 +0300
Subject: [PATCH 02/15] ci: Use release/v1 branch for build.yml

Fixes #106

[skip ci]
---
 .github/workflows/build.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 41d02e4..b3e65c7 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -13,13 +13,13 @@ jobs:
       - uses: actions/checkout@main
       - uses: actions/setup-python@main
         with:
-          python-version: '3.x'
+          python-version: "3.x"
       - uses: actions/cache@main
         with:
           path: ${{ env.pythonLocation }}
           key: build-${{ runner.os }}-${{ env.pythonLocation }}-${{ hashFiles('pyproject.toml', 'setup.*') }}
       - run: pip wheel . --no-deps -w dist
-      - uses: pypa/gh-action-pypi-publish@master
+      - uses: pypa/gh-action-pypi-publish@release/v1
         with:
           user: __token__
           password: ${{ secrets.PYPI_TOKEN }}

From 958ae13ab2678c3fab454b6fd1a99ce9e8261390 Mon Sep 17 00:00:00 2001
From: abdu558 <arefabdu55@gmail.com>
Date: Sun, 18 Aug 2024 21:56:34 +0100
Subject: [PATCH 03/15] Add fetch_usace_station function to init

---
 searvey/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/searvey/__init__.py b/searvey/__init__.py
index 5d621e9..8e2c628 100644
--- a/searvey/__init__.py
+++ b/searvey/__init__.py
@@ -10,7 +10,7 @@
 from searvey.stations import get_stations
 from searvey.stations import Provider
 from searvey.usgs import get_usgs_stations
-
+from searvey._usace_api import fetch_usace_station
 __version__ = importlib.metadata.version(__name__)
 
 
@@ -24,4 +24,5 @@
     "get_usgs_stations",
     "Provider",
     "__version__",
+    "fetch_usace_station",
 ]

From d2d9edaeeb12aed9eb00355755ef7a51fd6bd499 Mon Sep 17 00:00:00 2001
From: abdu558 <arefabdu55@gmail.com>
Date: Sun, 18 Aug 2024 21:58:43 +0100
Subject: [PATCH 04/15] Add usace api and fetch station method

---
 searvey/_usace_api.py | 61 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)
 create mode 100644 searvey/_usace_api.py

diff --git a/searvey/_usace_api.py b/searvey/_usace_api.py
new file mode 100644
index 0000000..d5694f0
--- /dev/null
+++ b/searvey/_usace_api.py
@@ -0,0 +1,61 @@
+import logging
+import xml.etree.ElementTree as ET
+from datetime import datetime
+from collections import abc
+
+import httpx
+import multifutures
+import pandas as pd
+
+from ._common import _fetch_url, _resolve_end_date, _resolve_http_client, _resolve_rate_limit, _resolve_start_date, _to_utc
+from .custom_types import DatetimeLike
+
+logger = logging.getLogger(__name__)
+
+BASE_URL = "https://rivergages.mvr.usace.army.mil/watercontrol/webservices/rest/webserviceWaterML.cfc?method=RGWML&meth=getValues&location={location}&site={site}&variable={variable}&beginDate={begin_date}&endDate={end_date}&authToken=RiverGages"
+
+def fetch_usace_station(
+    station_id: str,
+    start_date: DatetimeLike | None = None,
+    end_date: DatetimeLike | None = None,
+    *,
+    rate_limit: multifutures.RateLimit | None = None,
+    http_client: httpx.Client | None = None,
+    multiprocessing_executor: multifutures.ExecutorProtocol | None = None,
+    multithreading_executor: multifutures.ExecutorProtocol | None = None,
+) -> pd.DataFrame:
+    """
+    Make a query to the USACE API for river gauge data for ``station_id``
+    and return the results as a ``pandas.DataFrame``.
+
+    :param station_id: The station identifier.
+    :param start_date: The starting date of the query. Defaults to 7 days ago.
+    :param end_date: The finishing date of the query. Defaults to "now".
+    :param variable: The variable to fetch. Defaults to "HG" (gauge height).
+    :param rate_limit: The rate limit for making requests to the USACE servers.
+    :param http_client: The ``httpx.Client``.
+    :param multiprocessing_executor: An instance of a class implementing the ``concurrent.futures.Executor`` API.
+    :param multithreading_executor: An instance of a class implementing the ``concurrent.futures.Executor`` API.
+    """
+    logger.info("USACE-%s: Starting scraping: %s - %s", station_id, start_date, end_date)
+    now = pd.Timestamp.now("utc")
+    try:
+        df = _fetch_usace(
+            station_ids=[station_id],
+            start_dates=_resolve_start_date(now, start_date),
+            end_dates=_resolve_end_date(now, end_date),
+            rate_limit=rate_limit,
+            http_client=http_client,
+            multiprocessing_executor=multiprocessing_executor,
+            multithreading_executor=multithreading_executor,
+        ).get(station_id, pd.DataFrame())
+    except Exception as e:
+        logger.error(f"USACE-{station_id}: An error occurred while fetching data: {str(e)}")
+        df = pd.DataFrame()
+
+    if df.empty:
+        logger.warning(f"USACE-{station_id}: No data retrieved for the specified period.")
+    else:
+        logger.info("USACE-%s: Finished scraping: %s - %s", station_id, start_date, end_date)
+
+    return df
\ No newline at end of file

From c26f1078adcf63b06630241b3a23525ce0932c81 Mon Sep 17 00:00:00 2001
From: abdu558 <arefabdu55@gmail.com>
Date: Sun, 18 Aug 2024 22:00:35 +0100
Subject: [PATCH 05/15] Add _fetch_usace function for retrieving USACE data

---
 searvey/_usace_api.py | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/searvey/_usace_api.py b/searvey/_usace_api.py
index d5694f0..adf0e84 100644
--- a/searvey/_usace_api.py
+++ b/searvey/_usace_api.py
@@ -14,6 +14,44 @@
 
 BASE_URL = "https://rivergages.mvr.usace.army.mil/watercontrol/webservices/rest/webserviceWaterML.cfc?method=RGWML&meth=getValues&location={location}&site={site}&variable={variable}&beginDate={begin_date}&endDate={end_date}&authToken=RiverGages"
 
+def _fetch_usace(
+    station_ids: abc.Collection[str],
+    start_dates: pd.DatetimeIndex,
+    end_dates: pd.DatetimeIndex,
+    *,
+    rate_limit: multifutures.RateLimit | None,
+    http_client: httpx.Client | None,
+    multiprocessing_executor: multifutures.ExecutorProtocol | None,
+    multithreading_executor: multifutures.ExecutorProtocol | None,
+) -> dict[str, pd.DataFrame]:
+    rate_limit = _resolve_rate_limit(rate_limit)
+    http_client = _resolve_http_client(http_client)
+    start_dates = _to_utc(start_dates)
+    end_dates = _to_utc(end_dates)
+
+    usace_responses = _retrieve_usace_data(
+        station_ids=station_ids,
+        start_dates=start_dates,
+        end_dates=end_dates,
+        rate_limit=rate_limit,
+        http_client=http_client,
+        executor=multithreading_executor,
+    )
+
+    dataframes = {}
+    for response in usace_responses:
+        station_id = response.kwargs["station_id"]
+        if response.exception:
+            logger.error(f"USACE-{station_id}: Failed to retrieve data. Error: {response.exception}")
+            continue
+        df = _parse_xml_data(response.result, station_id)
+        if not df.empty:
+            dataframes[station_id] = df
+        else:
+            logger.warning(f"USACE-{station_id}: No data retrieved or parsed.")
+
+    return dataframes
+
 def fetch_usace_station(
     station_id: str,
     start_date: DatetimeLike | None = None,

From 660ab6dc45410d437c248afcec5b68f0bdc69bdf Mon Sep 17 00:00:00 2001
From: abdu558 <arefabdu55@gmail.com>
Date: Sun, 18 Aug 2024 22:01:36 +0100
Subject: [PATCH 06/15] Add _retrieve_usace_data and url generation functions
 for retrieving USACE data

---
 searvey/_usace_api.py | 50 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/searvey/_usace_api.py b/searvey/_usace_api.py
index adf0e84..0975869 100644
--- a/searvey/_usace_api.py
+++ b/searvey/_usace_api.py
@@ -14,6 +14,56 @@
 
 BASE_URL = "https://rivergages.mvr.usace.army.mil/watercontrol/webservices/rest/webserviceWaterML.cfc?method=RGWML&meth=getValues&location={location}&site={site}&variable={variable}&beginDate={begin_date}&endDate={end_date}&authToken=RiverGages"
 
+
+def _generate_urls(
+    station_id: str,
+    start_date: pd.Timestamp,
+    end_date: pd.Timestamp,
+) -> list[str]:
+    if end_date < start_date:
+        raise ValueError(f"'end_date' must be after 'start_date': {end_date} vs {start_date}")
+    if end_date == start_date:
+        return []
+
+    url = BASE_URL.format(
+        location=station_id,
+        site=station_id,
+        variable="HG",
+        begin_date=start_date.strftime("%Y-%m-%dT%H:%M"),
+        end_date=end_date.strftime("%Y-%m-%dT%H:%M")
+    )
+    print(url)
+    return [url]
+
+def _retrieve_usace_data(
+    station_ids: abc.Collection[str],
+    start_dates: abc.Collection[pd.Timestamp],
+    end_dates: abc.Collection[pd.Timestamp],
+    rate_limit: multifutures.RateLimit,
+    http_client: httpx.Client,
+    executor: multifutures.ExecutorProtocol | None,
+) -> list[multifutures.FutureResult]:
+    kwargs = []
+    for station_id, start_date, end_date in zip(station_ids, start_dates, end_dates):
+        for url in _generate_urls(station_id=station_id, start_date=start_date, end_date=end_date):
+            if url:
+                kwargs.append(
+                    dict(
+                        station_id=station_id,
+                        url=url,
+                        client=http_client,
+                        rate_limit=rate_limit,
+                    ),
+                )
+    with http_client:
+        logger.debug("Starting data retrieval")
+        results = multifutures.multithread(
+            func=_fetch_url, func_kwargs=kwargs, check=False, executor=executor
+        )
+        logger.debug("Finished data retrieval")
+    return results
+
+
 def _fetch_usace(
     station_ids: abc.Collection[str],
     start_dates: pd.DatetimeIndex,

From 49d4db5df1dd7a269418e7624a20579e493edc6a Mon Sep 17 00:00:00 2001
From: abdu558 <arefabdu55@gmail.com>
Date: Sun, 18 Aug 2024 22:02:09 +0100
Subject: [PATCH 07/15] Add XML data parsing function for USACE API

---
 searvey/_usace_api.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/searvey/_usace_api.py b/searvey/_usace_api.py
index 0975869..e77d56c 100644
--- a/searvey/_usace_api.py
+++ b/searvey/_usace_api.py
@@ -14,6 +14,31 @@
 
 BASE_URL = "https://rivergages.mvr.usace.army.mil/watercontrol/webservices/rest/webserviceWaterML.cfc?method=RGWML&meth=getValues&location={location}&site={site}&variable={variable}&beginDate={begin_date}&endDate={end_date}&authToken=RiverGages"
 
+def _parse_xml_data(content: str, station_id: str) -> pd.DataFrame:
+    try:
+        namespace = {'wml': 'http://www.cuahsi.org/waterML/1.0/'}
+        root = ET.fromstring(content)
+        values_element = root.find(".//wml:values", namespaces=namespace)
+
+        if values_element is None:
+            logger.warning(f"{station_id}: No 'values' element found in the XML.")
+            return pd.DataFrame()
+
+        data = []
+        for value_element in values_element.findall("wml:value", namespaces=namespace):
+            date_time = value_element.get("dateTime")
+            value = value_element.text
+            date_time_obj = datetime.strptime(date_time, "%Y-%m-%dT%H:%M:%S")
+            data.append({'time': date_time_obj, 'value': float(value)})
+
+        df = pd.DataFrame(data)
+        df.set_index('time', inplace=True)
+        df.index = pd.to_datetime(df.index, utc=True)
+        df.attrs["station_id"] = f"USACE-{station_id}"
+        return df
+    except ET.ParseError:
+        logger.error(f"{station_id}: Failed to parse XML data.")
+        return pd.DataFrame()
 
 def _generate_urls(
     station_id: str,

From e78ca7c09c2bd723c6da030c0ad9e8814d7e182d Mon Sep 17 00:00:00 2001
From: abdu558 <arefabdu55@gmail.com>
Date: Sun, 18 Aug 2024 22:11:19 +0100
Subject: [PATCH 08/15] Add example code for Army Corps WL

---
 USACE_data.ipynb | 125 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 125 insertions(+)
 create mode 100644 USACE_data.ipynb

diff --git a/USACE_data.ipynb b/USACE_data.ipynb
new file mode 100644
index 0000000..68f62f0
--- /dev/null
+++ b/USACE_data.ipynb
@@ -0,0 +1,125 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Set up for Army Corps WL data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import logging\n",
+    "import geopandas as gpd\n",
+    "import matplotlib.pyplot as plt\n",
+    "import pandas as pd\n",
+    "import httpx\n",
+    "from searvey._usace_api import fetch_usace_station\n",
+    "\n",
+    "logging.basicConfig(\n",
+    "    level=20,\n",
+    "    style=\"{\",\n",
+    "    format=\"{asctime:s}; {levelname:8s}; {threadName:23s}; {name:<25s} {lineno:5d}; {message:s}\",\n",
+    ")\n",
+    "\n",
+    "logging.getLogger(\"urllib3\").setLevel(30)\n",
+    "logging.getLogger(\"parso\").setLevel(30)\n",
+    "\n",
+    "logger = logging.getLogger(__name__)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Fetch WL data from a single station"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "# Define start and end dates for data retrieval\n",
+    "\n",
+    "df = fetch_usace_station(\"01300\", start_date=\"2020-04-05\", end_date=\"2020-04-10\",http_client=httpx.Client(verify=False))\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Fetch Army Corps Water Level Data from multiple station"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from searvey._usace_api import _fetch_usace\n",
+    "import httpx\n",
+    "import pandas as pd\n",
+    "# df = _fetch_usace(station_ids=[\"01300\"], start_dates=\"2020-04-05\", end_dates=\"2020-04-10\",http_client=httpx.Client(verify=False))\n",
+    "\n",
+    "\n",
+    "df = _fetch_usace(\n",
+    "    station_ids=[\"01300\"],\n",
+    "    start_dates=pd.DatetimeIndex([\"2020-04-05\"]),\n",
+    "    end_dates=pd.DatetimeIndex([\"2020-04-10\"]),\n",
+    "    rate_limit=None,\n",
+    "    http_client=httpx.Client(verify=False),\n",
+    "    multiprocessing_executor=None,\n",
+    "    multithreading_executor=None\n",
+    ")\n",
+    "df['01300']"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Graph the data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import hvplot.pandas\n",
+    "\n",
+    "df[\"01300\"].hvplot(title=\"Army Corps WL values\", xlabel=\"Index\", ylabel=\"Value\")\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From 5f7e7d866c88009de556c580add7e410e9e3bbac Mon Sep 17 00:00:00 2001
From: abdu558 <arefabdu55@gmail.com>
Date: Sun, 18 Aug 2024 22:37:37 +0100
Subject: [PATCH 09/15] Move USACE example file into example folder

---
 USACE_data.ipynb => examples/USACE_data.ipynb | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename USACE_data.ipynb => examples/USACE_data.ipynb (100%)

diff --git a/USACE_data.ipynb b/examples/USACE_data.ipynb
similarity index 100%
rename from USACE_data.ipynb
rename to examples/USACE_data.ipynb

From 61b209210f09dcbe3de9a80793ccd02b9836f265 Mon Sep 17 00:00:00 2001
From: abdu558 <arefabdu55@gmail.com>
Date: Tue, 20 Aug 2024 00:14:44 +0100
Subject: [PATCH 10/15] Update USACE examples and comments

---
 examples/USACE_data.ipynb | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/examples/USACE_data.ipynb b/examples/USACE_data.ipynb
index 68f62f0..8ca959c 100644
--- a/examples/USACE_data.ipynb
+++ b/examples/USACE_data.ipynb
@@ -46,9 +46,11 @@
    "outputs": [],
    "source": [
     "\n",
-    "# Define start and end dates for data retrieval\n",
+    "# Define start and end dates for data retrieval, you can use either datetime.date or string for the dates\n",
+    "import datetime\n",
+    "df = fetch_usace_station(\"01300\", datetime.date(2020, 4, 5), end_date=\"2020-04-10\",http_client=httpx.Client(verify=False))\n",
     "\n",
-    "df = fetch_usace_station(\"01300\", start_date=\"2020-04-05\", end_date=\"2020-04-10\",http_client=httpx.Client(verify=False))\n"
+    "df"
    ]
   },
   {
@@ -67,19 +69,17 @@
     "from searvey._usace_api import _fetch_usace\n",
     "import httpx\n",
     "import pandas as pd\n",
-    "# df = _fetch_usace(station_ids=[\"01300\"], start_dates=\"2020-04-05\", end_dates=\"2020-04-10\",http_client=httpx.Client(verify=False))\n",
-    "\n",
     "\n",
     "df = _fetch_usace(\n",
     "    station_ids=[\"01300\"],\n",
-    "    start_dates=pd.DatetimeIndex([\"2020-04-05\"]),\n",
-    "    end_dates=pd.DatetimeIndex([\"2020-04-10\"]),\n",
+    "    start_dates=[\"2020-04-05\"],\n",
+    "    end_dates=[\"2020-04-10\"],\n",
     "    rate_limit=None,\n",
     "    http_client=httpx.Client(verify=False),\n",
     "    multiprocessing_executor=None,\n",
     "    multithreading_executor=None\n",
     ")\n",
-    "df['01300']"
+    "df['01300']\n"
    ]
   },
   {

From 85dfc4610511514a8b5a818bd21b2f054450bd8d Mon Sep 17 00:00:00 2001
From: abdu558 <arefabdu55@gmail.com>
Date: Tue, 20 Aug 2024 00:39:17 +0100
Subject: [PATCH 11/15] update usace to handle multiple start and end dates

---
 searvey/_usace_api.py | 37 ++++++++++++++++++++++---------------
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/searvey/_usace_api.py b/searvey/_usace_api.py
index e77d56c..9aaa18f 100644
--- a/searvey/_usace_api.py
+++ b/searvey/_usace_api.py
@@ -2,7 +2,9 @@
 import xml.etree.ElementTree as ET
 from datetime import datetime
 from collections import abc
-
+from typing import List
+from typing import Union
+from searvey.custom_types import DatetimeLike
 import httpx
 import multifutures
 import pandas as pd
@@ -57,7 +59,6 @@ def _generate_urls(
         begin_date=start_date.strftime("%Y-%m-%dT%H:%M"),
         end_date=end_date.strftime("%Y-%m-%dT%H:%M")
     )
-    print(url)
     return [url]
 
 def _retrieve_usace_data(
@@ -71,6 +72,7 @@ def _retrieve_usace_data(
     kwargs = []
     for station_id, start_date, end_date in zip(station_ids, start_dates, end_dates):
         for url in _generate_urls(station_id=station_id, start_date=start_date, end_date=end_date):
+            logger.info("USACE-%s: Starting scraping: %s - %s", station_id, start_date, end_date)
             if url:
                 kwargs.append(
                     dict(
@@ -91,8 +93,8 @@ def _retrieve_usace_data(
 
 def _fetch_usace(
     station_ids: abc.Collection[str],
-    start_dates: pd.DatetimeIndex,
-    end_dates: pd.DatetimeIndex,
+    start_dates: Union[DatetimeLike, List[DatetimeLike]] = None,
+    end_dates: Union[DatetimeLike, List[DatetimeLike]] = None,
     *,
     rate_limit: multifutures.RateLimit | None,
     http_client: httpx.Client | None,
@@ -101,8 +103,15 @@ def _fetch_usace(
 ) -> dict[str, pd.DataFrame]:
     rate_limit = _resolve_rate_limit(rate_limit)
     http_client = _resolve_http_client(http_client)
-    start_dates = _to_utc(start_dates)
-    end_dates = _to_utc(end_dates)
+
+    now = pd.Timestamp.now("utc")
+
+    start_dates = [start_dates] if not isinstance(start_dates, list) else start_dates
+    end_dates = [end_dates] if not isinstance(end_dates, list) else end_dates
+
+    #we get the first index because the output is (DatetimeIndex(['2020-04-05'], dtype='datetime64[ns]', freq=None)
+    start_dates = [_resolve_start_date(now, date)[0] for date in start_dates]
+    end_dates = [_resolve_end_date(now, date)[0] for date in end_dates]
 
     usace_responses = _retrieve_usace_data(
         station_ids=station_ids,
@@ -142,21 +151,19 @@ def fetch_usace_station(
     and return the results as a ``pandas.DataFrame``.
 
     :param station_id: The station identifier.
-    :param start_date: The starting date of the query. Defaults to 7 days ago.
-    :param end_date: The finishing date of the query. Defaults to "now".
-    :param variable: The variable to fetch. Defaults to "HG" (gauge height).
+    :param start_date: The starting date of the query.
+    :param end_date: The finishing date of the query.
     :param rate_limit: The rate limit for making requests to the USACE servers.
-    :param http_client: The ``httpx.Client``.
-    :param multiprocessing_executor: An instance of a class implementing the ``concurrent.futures.Executor`` API.
-    :param multithreading_executor: An instance of a class implementing the ``concurrent.futures.Executor`` API.
+    :param http_client: The ``httpx.Client``, this should have the parameter verify=False.
+    :param multiprocessing_executor
+    :param multithreading_executor
     """
     logger.info("USACE-%s: Starting scraping: %s - %s", station_id, start_date, end_date)
-    now = pd.Timestamp.now("utc")
     try:
         df = _fetch_usace(
             station_ids=[station_id],
-            start_dates=_resolve_start_date(now, start_date),
-            end_dates=_resolve_end_date(now, end_date),
+            start_dates=start_date,
+            end_dates=end_date,
             rate_limit=rate_limit,
             http_client=http_client,
             multiprocessing_executor=multiprocessing_executor,

From 7ec7723aa1c6138d432c2604afe02ab68c1ba1a8 Mon Sep 17 00:00:00 2001
From: abdu558 <arefabdu55@gmail.com>
Date: Tue, 20 Aug 2024 00:39:40 +0100
Subject: [PATCH 12/15] Add tests for USACE API functions

---
 tests/usace_test.py | 70 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)
 create mode 100644 tests/usace_test.py

diff --git a/tests/usace_test.py b/tests/usace_test.py
new file mode 100644
index 0000000..e45c35a
--- /dev/null
+++ b/tests/usace_test.py
@@ -0,0 +1,70 @@
+import pandas as pd
+import pytest
+import httpx
+from unittest.mock import patch, MagicMock
+
+from searvey._usace_api import fetch_usace_station, _fetch_usace, _generate_urls
+
+
+def test_generate_urls():
+    station_id = "01300"
+    start_date = pd.Timestamp("2020-04-05")
+    end_date = pd.Timestamp("2020-04-10")
+
+    urls = _generate_urls(station_id, start_date, end_date)
+
+    assert len(urls) == 1
+    assert station_id in urls[0]
+    assert "2020-04-05" in urls[0]
+    assert "2020-04-10" in urls[0]
+
+def test_fetch_usace():
+    result = _fetch_usace(
+        station_ids=["01300"],
+        start_dates=["2020-04-05"],
+        end_dates=["2020-04-10"],
+        rate_limit=None,
+        http_client=httpx.Client(verify=False),
+        multiprocessing_executor=None,
+        multithreading_executor=None
+    )
+    assert "01300" in result
+    assert isinstance(result["01300"], pd.DataFrame)
+    assert len(result) == 1
+
+@patch('searvey._usace_api._fetch_usace')
+def test_fetch_usace_station(mock_fetch):
+    mock_df = pd.DataFrame({
+        'value': [10.5, 11.2, 10.8]
+    }, index=pd.date_range("2020-04-05", periods=3, freq='D'))
+    mock_df.index.name = 'time'
+    mock_df.attrs["station_id"] = "USACE-01300"
+
+    mock_fetch.return_value = {"01300": mock_df}
+
+    result = fetch_usace_station(
+        "01300",
+        start_date="2020-04-05",
+        end_date="2020-04-10",
+        http_client=httpx.Client(verify=False)
+    )
+
+    assert isinstance(result, pd.DataFrame)
+    assert len(result) == 3
+    assert result.index.name == 'time'
+    assert 'value' in result.columns
+    assert result.attrs["station_id"] == "USACE-01300"
+
+def test_fetch_usace_station_error_handling():
+    with patch('searvey._usace_api._fetch_usace', side_effect=Exception("API Error")):
+        result = fetch_usace_station(
+            "01300",
+            start_date="2020-04-05",
+            end_date="2020-04-10",
+            http_client=httpx.Client(verify=False)
+        )
+        assert result.empty
+
+if __name__ == "__main__":
+    pytest.main()
+

From 9a187d3a6a586c26b42e8c33e8e46eb94b0dd935 Mon Sep 17 00:00:00 2001
From: abdu558 <arefabdu55@gmail.com>
Date: Tue, 20 Aug 2024 00:51:14 +0100
Subject: [PATCH 13/15] Apply pre commit changes

---
 searvey/__init__.py   |  3 ++-
 searvey/_usace_api.py | 26 +++++++++++++++++---------
 tests/usace_test.py   | 42 +++++++++++++++++++++---------------------
 3 files changed, 40 insertions(+), 31 deletions(-)

diff --git a/searvey/__init__.py b/searvey/__init__.py
index 8e2c628..0aded10 100644
--- a/searvey/__init__.py
+++ b/searvey/__init__.py
@@ -4,13 +4,14 @@
 
 from searvey._coops_api import fetch_coops_station
 from searvey._ioc_api import fetch_ioc_station
+from searvey._usace_api import fetch_usace_station
 from searvey.coops import get_coops_stations
 from searvey.ioc import get_ioc_data
 from searvey.ioc import get_ioc_stations
 from searvey.stations import get_stations
 from searvey.stations import Provider
 from searvey.usgs import get_usgs_stations
-from searvey._usace_api import fetch_usace_station
+
 __version__ = importlib.metadata.version(__name__)
 
 
diff --git a/searvey/_usace_api.py b/searvey/_usace_api.py
index 9aaa18f..d07aa43 100644
--- a/searvey/_usace_api.py
+++ b/searvey/_usace_api.py
@@ -1,24 +1,29 @@
 import logging
 import xml.etree.ElementTree as ET
-from datetime import datetime
 from collections import abc
+from datetime import datetime
 from typing import List
 from typing import Union
-from searvey.custom_types import DatetimeLike
+
 import httpx
 import multifutures
 import pandas as pd
 
-from ._common import _fetch_url, _resolve_end_date, _resolve_http_client, _resolve_rate_limit, _resolve_start_date, _to_utc
+from ._common import _fetch_url
+from ._common import _resolve_end_date
+from ._common import _resolve_http_client
+from ._common import _resolve_rate_limit
+from ._common import _resolve_start_date
 from .custom_types import DatetimeLike
 
 logger = logging.getLogger(__name__)
 
 BASE_URL = "https://rivergages.mvr.usace.army.mil/watercontrol/webservices/rest/webserviceWaterML.cfc?method=RGWML&meth=getValues&location={location}&site={site}&variable={variable}&beginDate={begin_date}&endDate={end_date}&authToken=RiverGages"
 
+
 def _parse_xml_data(content: str, station_id: str) -> pd.DataFrame:
     try:
-        namespace = {'wml': 'http://www.cuahsi.org/waterML/1.0/'}
+        namespace = {"wml": "http://www.cuahsi.org/waterML/1.0/"}
         root = ET.fromstring(content)
         values_element = root.find(".//wml:values", namespaces=namespace)
 
@@ -31,10 +36,10 @@ def _parse_xml_data(content: str, station_id: str) -> pd.DataFrame:
             date_time = value_element.get("dateTime")
             value = value_element.text
             date_time_obj = datetime.strptime(date_time, "%Y-%m-%dT%H:%M:%S")
-            data.append({'time': date_time_obj, 'value': float(value)})
+            data.append({"time": date_time_obj, "value": float(value)})
 
         df = pd.DataFrame(data)
-        df.set_index('time', inplace=True)
+        df.set_index("time", inplace=True)
         df.index = pd.to_datetime(df.index, utc=True)
         df.attrs["station_id"] = f"USACE-{station_id}"
         return df
@@ -42,6 +47,7 @@ def _parse_xml_data(content: str, station_id: str) -> pd.DataFrame:
         logger.error(f"{station_id}: Failed to parse XML data.")
         return pd.DataFrame()
 
+
 def _generate_urls(
     station_id: str,
     start_date: pd.Timestamp,
@@ -57,10 +63,11 @@ def _generate_urls(
         site=station_id,
         variable="HG",
         begin_date=start_date.strftime("%Y-%m-%dT%H:%M"),
-        end_date=end_date.strftime("%Y-%m-%dT%H:%M")
+        end_date=end_date.strftime("%Y-%m-%dT%H:%M"),
     )
     return [url]
 
+
 def _retrieve_usace_data(
     station_ids: abc.Collection[str],
     start_dates: abc.Collection[pd.Timestamp],
@@ -109,7 +116,7 @@ def _fetch_usace(
     start_dates = [start_dates] if not isinstance(start_dates, list) else start_dates
     end_dates = [end_dates] if not isinstance(end_dates, list) else end_dates
 
-    #we get the first index because the output is (DatetimeIndex(['2020-04-05'], dtype='datetime64[ns]', freq=None)
+    # we get the first index because the output is (DatetimeIndex(['2020-04-05'], dtype='datetime64[ns]', freq=None)
     start_dates = [_resolve_start_date(now, date)[0] for date in start_dates]
     end_dates = [_resolve_end_date(now, date)[0] for date in end_dates]
 
@@ -136,6 +143,7 @@ def _fetch_usace(
 
     return dataframes
 
+
 def fetch_usace_station(
     station_id: str,
     start_date: DatetimeLike | None = None,
@@ -178,4 +186,4 @@ def fetch_usace_station(
     else:
         logger.info("USACE-%s: Finished scraping: %s - %s", station_id, start_date, end_date)
 
-    return df
\ No newline at end of file
+    return df
diff --git a/tests/usace_test.py b/tests/usace_test.py
index e45c35a..0e79fb2 100644
--- a/tests/usace_test.py
+++ b/tests/usace_test.py
@@ -1,9 +1,12 @@
+from unittest.mock import patch
+
+import httpx
 import pandas as pd
 import pytest
-import httpx
-from unittest.mock import patch, MagicMock
 
-from searvey._usace_api import fetch_usace_station, _fetch_usace, _generate_urls
+from searvey._usace_api import _fetch_usace
+from searvey._usace_api import _generate_urls
+from searvey._usace_api import fetch_usace_station
 
 
 def test_generate_urls():
@@ -18,6 +21,7 @@ def test_generate_urls():
     assert "2020-04-05" in urls[0]
     assert "2020-04-10" in urls[0]
 
+
 def test_fetch_usace():
     result = _fetch_usace(
         station_ids=["01300"],
@@ -26,45 +30,41 @@ def test_fetch_usace():
         rate_limit=None,
         http_client=httpx.Client(verify=False),
         multiprocessing_executor=None,
-        multithreading_executor=None
+        multithreading_executor=None,
     )
     assert "01300" in result
     assert isinstance(result["01300"], pd.DataFrame)
     assert len(result) == 1
 
-@patch('searvey._usace_api._fetch_usace')
+
+@patch("searvey._usace_api._fetch_usace")
 def test_fetch_usace_station(mock_fetch):
-    mock_df = pd.DataFrame({
-        'value': [10.5, 11.2, 10.8]
-    }, index=pd.date_range("2020-04-05", periods=3, freq='D'))
-    mock_df.index.name = 'time'
+    mock_df = pd.DataFrame(
+        {"value": [10.5, 11.2, 10.8]}, index=pd.date_range("2020-04-05", periods=3, freq="D")
+    )
+    mock_df.index.name = "time"
     mock_df.attrs["station_id"] = "USACE-01300"
 
     mock_fetch.return_value = {"01300": mock_df}
 
     result = fetch_usace_station(
-        "01300",
-        start_date="2020-04-05",
-        end_date="2020-04-10",
-        http_client=httpx.Client(verify=False)
+        "01300", start_date="2020-04-05", end_date="2020-04-10", http_client=httpx.Client(verify=False)
     )
 
     assert isinstance(result, pd.DataFrame)
     assert len(result) == 3
-    assert result.index.name == 'time'
-    assert 'value' in result.columns
+    assert result.index.name == "time"
+    assert "value" in result.columns
     assert result.attrs["station_id"] == "USACE-01300"
 
+
 def test_fetch_usace_station_error_handling():
-    with patch('searvey._usace_api._fetch_usace', side_effect=Exception("API Error")):
+    with patch("searvey._usace_api._fetch_usace", side_effect=Exception("API Error")):
         result = fetch_usace_station(
-            "01300",
-            start_date="2020-04-05",
-            end_date="2020-04-10",
-            http_client=httpx.Client(verify=False)
+            "01300", start_date="2020-04-05", end_date="2020-04-10", http_client=httpx.Client(verify=False)
         )
         assert result.empty
 
+
 if __name__ == "__main__":
     pytest.main()
-

From 4cbff3386ac961e1e80749e9a3ebd809099be609 Mon Sep 17 00:00:00 2001
From: abdu558 <arefabdu55@gmail.com>
Date: Tue, 20 Aug 2024 01:04:39 +0100
Subject: [PATCH 14/15] Update documentation for army corp WL

---
 README.md             |  1 +
 docs/source/usace.rst | 15 +++++++++++++++
 2 files changed, 16 insertions(+)
 create mode 100644 docs/source/usace.rst

diff --git a/README.md b/README.md
index 14123a6..8842a7b 100644
--- a/README.md
+++ b/README.md
@@ -16,6 +16,7 @@ Searvey aims to provide the following functionality:
     - U.S. Center for Operational Oceanographic Products and Services (CO-OPS)
     - Flanders Marine Institute (VLIZ); Intergovernmental Oceanographic Commission (IOC)
     - U.S. Geological Survey (USGS)
+    - Army Corp WL
 
 ## Installation
 
diff --git a/docs/source/usace.rst b/docs/source/usace.rst
new file mode 100644
index 0000000..c94147c
--- /dev/null
+++ b/docs/source/usace.rst
@@ -0,0 +1,15 @@
+USACE RiverGages
+==============
+The U.S. Army Corps of Engineers RiverGages <https://rivergages.mvr.usace.army.mil/>_
+system provides water level data for rivers and waterways across the United States.
+searvey uses the RiverGages REST API to access this data. Currently, water level
+data is exposed in searvey.
+
+The data from an individual station can be retrieved with:
+.. autofunction:: searvey.usace.get_usace_station
+
+You can fetch data from multiple stations and multiple different dates with:
+.. autofunction:: searvey.usace.fetch_usace
+
+Note: The verify=False parameter in the httpx.Client() is used here to bypass
+SSL verification, which is the only way to access the USACE RiverGages API.
\ No newline at end of file

From 95331b9d82f60df950bc244544c8909518fbc9a1 Mon Sep 17 00:00:00 2001
From: abdu558 <arefabdu55@gmail.com>
Date: Tue, 20 Aug 2024 08:16:21 +0100
Subject: [PATCH 15/15] Improve compatibility by adding optional

---
 searvey/_usace_api.py | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/searvey/_usace_api.py b/searvey/_usace_api.py
index d07aa43..23f12fb 100644
--- a/searvey/_usace_api.py
+++ b/searvey/_usace_api.py
@@ -4,6 +4,7 @@
 from datetime import datetime
 from typing import List
 from typing import Union
+from typing import Optional
 
 import httpx
 import multifutures
@@ -74,7 +75,7 @@ def _retrieve_usace_data(
     end_dates: abc.Collection[pd.Timestamp],
     rate_limit: multifutures.RateLimit,
     http_client: httpx.Client,
-    executor: multifutures.ExecutorProtocol | None,
+    executor: Optional[multifutures.ExecutorProtocol] = None,
 ) -> list[multifutures.FutureResult]:
     kwargs = []
     for station_id, start_date, end_date in zip(station_ids, start_dates, end_dates):
@@ -103,10 +104,10 @@ def _fetch_usace(
     start_dates: Union[DatetimeLike, List[DatetimeLike]] = None,
     end_dates: Union[DatetimeLike, List[DatetimeLike]] = None,
     *,
-    rate_limit: multifutures.RateLimit | None,
-    http_client: httpx.Client | None,
-    multiprocessing_executor: multifutures.ExecutorProtocol | None,
-    multithreading_executor: multifutures.ExecutorProtocol | None,
+    rate_limit: Optional[multifutures.RateLimit] = None,
+    http_client: Optional[httpx.Client] = None,
+    multiprocessing_executor: Optional[multifutures.ExecutorProtocol] = None,
+    multithreading_executor: Optional[multifutures.ExecutorProtocol] = None,
 ) -> dict[str, pd.DataFrame]:
     rate_limit = _resolve_rate_limit(rate_limit)
     http_client = _resolve_http_client(http_client)
@@ -146,13 +147,13 @@ def _fetch_usace(
 
 def fetch_usace_station(
     station_id: str,
-    start_date: DatetimeLike | None = None,
-    end_date: DatetimeLike | None = None,
+    start_date: Optional[DatetimeLike] = None,
+    end_date: Optional[DatetimeLike] = None,
     *,
-    rate_limit: multifutures.RateLimit | None = None,
-    http_client: httpx.Client | None = None,
-    multiprocessing_executor: multifutures.ExecutorProtocol | None = None,
-    multithreading_executor: multifutures.ExecutorProtocol | None = None,
+    rate_limit: Optional[multifutures.RateLimit] = None,
+    http_client: Optional[httpx.Client] = None,
+    multiprocessing_executor: Optional[multifutures.ExecutorProtocol] = None,
+    multithreading_executor: Optional[multifutures.ExecutorProtocol] = None,
 ) -> pd.DataFrame:
     """
     Make a query to the USACE API for river gauge data for ``station_id``