Skip to content

Commit

Permalink
Merge pull request #22 from developmentseed/removeList
Browse files Browse the repository at this point in the history
remove LIST and some requirements
  • Loading branch information
vincentsarago authored Oct 18, 2023
2 parents 5eb3ef6 + b538ccf commit de287c1
Show file tree
Hide file tree
Showing 11 changed files with 48 additions and 103 deletions.
7 changes: 7 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@
* update requirements
- `rio-tiler>=6.0,<7.0`
- `fastapi>=0.100.0`
- `rasterio>=1.3.8`

* remove `wurlitzer` dependency

* only use `rasterio` logs

* remove `LIST` information **breaking change**

## 0.10.0 (2023-06-02)

Expand Down
45 changes: 6 additions & 39 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
---


Inspect HEAD/LIST/GET requests withing Rasterio.
Inspect HEAD/GET requests withing Rasterio.

Note: In GDAL 3.2, logging capabilities for /vsicurl, /vsis3 and the like was added (ref: https://github.com/OSGeo/gdal/pull/2742).

Expand Down Expand Up @@ -66,7 +66,7 @@ def info(src_path: str):

meta = info("https://noaa-eri-pds.s3.amazonaws.com/2022_Hurricane_Ian/20221002a_RGB/20221002aC0795145w325100n.tif")

> 2022-10-25T00:20:24.215385+0200 | TILEBENCH | {"LIST": {"count": 0}, "HEAD": {"count": 1}, "GET": {"count": 1, "bytes": 32768, "ranges": ["0-32767"]}, "Timing": 0.8705799579620361}
> 2023-10-18T23:00:11.184745+0200 | TILEBENCH | {"HEAD": {"count": 1}, "GET": {"count": 1, "bytes": 32768, "ranges": ["0-32767"]}, "Timing": 0.7379939556121826}
```

```python
Expand All @@ -85,7 +85,7 @@ img = _read_tile(
15,
)

> 2022-10-25T00:21:32.895752+0200 | TILEBENCH | {"LIST": {"count": 0}, "HEAD": {"count": 1}, "GET": {"count": 2, "bytes": 409600, "ranges": ["0-32767", "32768-409599"]}, "Timing": 1.2970409393310547}
> 2023-10-18T23:01:00.572263+0200 | TILEBENCH | {"HEAD": {"count": 1}, "GET": {"count": 2, "bytes": 409600, "ranges": ["0-32767", "32768-409599"]}, "Timing": 1.0749869346618652}
```

## Command Line Interface (CLI)
Expand Down Expand Up @@ -119,9 +119,6 @@ $ tilebench random https://noaa-eri-pds.s3.amazonaws.com/2022_Hurricane_Ian/2022
$ tilebench profile https://noaa-eri-pds.s3.amazonaws.com/2022_Hurricane_Ian/20221002a_RGB/20221002aC0795145w325100n.tif --tile 15-9114-13215 --config GDAL_DISABLE_READDIR_ON_OPEN=EMPTY_DIR | jq
{
"LIST": {
"count": 0
},
"HEAD": {
"count": 1
},
Expand All @@ -133,14 +130,11 @@ $ tilebench profile https://noaa-eri-pds.s3.amazonaws.com/2022_Hurricane_Ian/202
"32768-409599"
]
},
"Timing": 1.2364399433135986
"Timing": 0.9715230464935303
}
$ tilebench profile https://noaa-eri-pds.s3.amazonaws.com/2022_Hurricane_Ian/20221002a_RGB/20221002aC0795145w325100n.tif --tile 15-9114-13215 --config GDAL_DISABLE_READDIR_ON_OPEN=FALSE | jq
{
"LIST": {
"count": 1
},
"HEAD": {
"count": 8
},
Expand All @@ -152,7 +146,7 @@ $ tilebench profile https://noaa-eri-pds.s3.amazonaws.com/2022_Hurricane_Ian/202
"32768-409599"
]
},
"Timing": 2.2018940448760986
"Timing": 2.1837549209594727
}
```

Expand Down Expand Up @@ -227,34 +221,7 @@ docker run \

## Contribution & Development

Issues and pull requests are more than welcome.

**dev install**

```bash
$ git clone https://github.com/developmentseed/tilebench.git
$ cd tilebench
$ pip install -e .[dev]
```

**pre-commit**

This repo is set to use `pre-commit` to run *isort*, *flake8*, *pydocstring*, *black* ("uncompromising Python code formatter") and mypy when committing new code.

```
$ pre-commit install
$ git add .
$ git commit -m'my change'
isort....................................................................Passed
black....................................................................Passed
Flake8...................................................................Passed
Verifying PEP257 Compliance..............................................Passed
mypy.....................................................................Passed
$ git push origin
```
See [CONTRIBUTING.md](https://github.com/developmentseed/tilebench/blob/main/CONTRIBUTING.md)

## License

Expand Down
3 changes: 1 addition & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,8 @@ dependencies = [
"fastapi>=0.100.0",
"jinja2>=3.0,<4.0.0",
"loguru",
"rasterio>=1.3.0",
"rasterio>=1.3.8",
"rio-tiler>=6.0,<7.0",
"wurlitzer",
"uvicorn[standard]",
]

Expand Down
10 changes: 4 additions & 6 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def test_profile():
assert not result.exception
assert result.exit_code == 0
log = json.loads(result.output)
assert ["LIST", "HEAD", "GET", "Timing"] == list(log)
assert ["HEAD", "GET", "Timing"] == list(log)
# Make sure we didn't cache any request when `--tile` is not provided
assert "0-" in log["GET"]["ranges"][0]

Expand All @@ -38,15 +38,15 @@ def test_profile():
assert not result.exception
assert result.exit_code == 0
log = json.loads(result.output)
assert ["LIST", "HEAD", "GET", "Timing"] == list(log)
assert ["HEAD", "GET", "Timing"] == list(log)

result = runner.invoke(
cli, ["profile", COG_PATH, "--tilesize", 512, "--tile", "16-18229-26433"]
)
assert not result.exception
assert result.exit_code == 0
log = json.loads(result.output)
assert ["LIST", "HEAD", "GET", "Timing"] == list(log)
assert ["HEAD", "GET", "Timing"] == list(log)

result = runner.invoke(
cli, ["profile", COG_PATH, "--add-kernels", "--add-stdout", "--add-cprofile"]
Expand All @@ -55,14 +55,12 @@ def test_profile():
assert result.exit_code == 0
log = json.loads(result.output)
assert [
"LIST",
"HEAD",
"GET",
"WarpKernels",
"Timing",
"cprofile",
"curl",
"rasterio",
"logs",
] == list(log)


Expand Down
2 changes: 0 additions & 2 deletions tests/test_middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ def skip():
assert response.headers["Cache-Control"] == "no-cache"
assert response.headers["VSI-Stats"]
stats = response.headers["VSI-Stats"]
assert "list;count=" in stats
assert "head;count=" in stats
assert "get;count=" in stats

Expand All @@ -50,7 +49,6 @@ def skip():
assert response.headers["content-type"] == "application/json"
assert response.headers["VSI-Stats"]
stats = response.headers["VSI-Stats"]
assert "list;count=" in stats
assert "head;count=" in stats
assert "get;count=" in stats

Expand Down
2 changes: 1 addition & 1 deletion tests/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def _read_tile(src_path: str, x: int, y: int, z: int, tilesize: int = 256):
assert data.shape
assert mask.shape
assert stats
assert stats.get("LIST")
assert stats.get("HEAD")
assert stats.get("GET")
assert stats.get("Timing")
assert stats.get("WarpKernels")
1 change: 0 additions & 1 deletion tests/test_viz.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ def test_viz():
stats = response.headers["VSI-Stats"]
assert "head;count=" in stats
assert "get;count=" in stats
assert "list;count=" in stats

response = client.get("/info.geojson")
assert response.status_code == 200
Expand Down
56 changes: 21 additions & 35 deletions tilebench/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,34 +13,26 @@

import rasterio
from loguru import logger as log
from wurlitzer import pipes

fmt = "{time} | TILEBENCH | {message}"
log.remove()
log.add(sys.stderr, format=fmt)


def parse_logs(rio_lines: List[str], curl_lines: List[str]) -> Dict[str, Any]:
def parse_logs(logs: List[str]) -> Dict[str, Any]:
"""Parse Rasterio and CURL logs."""
# LIST
list_requests = [line for line in rio_lines if " VSICURL: GetFileList" in line]
list_summary = {
"count": len(list_requests),
}

# HEAD
curl_head_requests = [line for line in curl_lines if line.startswith("> HEAD")]
head_requests = len([line for line in logs if "CURL_INFO_HEADER_OUT: HEAD" in line])
head_summary = {
"count": len(curl_head_requests),
"count": head_requests,
}

# CURL GET
# CURL logs failed requests
curl_get_requests = [line for line in curl_lines if line.startswith("> GET")]
# GET
all_get_requests = len(
[line for line in logs if "CURL_INFO_HEADER_OUT: GET" in line]
)

# Rasterio GET
# Rasterio only log successfull requests
get_requests = [line for line in rio_lines if ": Downloading" in line]
get_requests = [line for line in logs if ": Downloading" in line]
get_values = [
map(int, get.split(" Downloading ")[1].split(" ")[0].split("-"))
for get in get_requests
Expand All @@ -49,17 +41,14 @@ def parse_logs(rio_lines: List[str], curl_lines: List[str]) -> Dict[str, Any]:
data_transfer = sum([j - i + 1 for i, j in get_values])

get_summary = {
"count": len(curl_get_requests),
"count": all_get_requests,
"bytes": data_transfer,
"ranges": get_values_str,
}

warp_kernel = [
line.split(" ")[-2:] for line in rio_lines if "GDALWarpKernel" in line
]
warp_kernel = [line.split(" ")[-2:] for line in logs if "GDALWarpKernel" in line]

return {
"LIST": list_summary,
"HEAD": head_summary,
"GET": get_summary,
"WarpKernels": warp_kernel,
Expand Down Expand Up @@ -88,25 +77,23 @@ def wrapped_f(*args, **kwargs):
logger.addHandler(handler)

gdal_config = config or {}
gdal_config.update({"CPL_DEBUG": "ON", "CPL_CURL_VERBOSE": "TRUE"})
gdal_config.update({"CPL_DEBUG": "ON", "CPL_CURL_VERBOSE": "YES"})

with pipes() as (_, curl_stream):
with rasterio.Env(**gdal_config):
with Timer() as t:
prof = cProfile.Profile()
retval = prof.runcall(func, *args, **kwargs)
profile_stream = StringIO()
ps = pstats.Stats(prof, stream=profile_stream)
ps.strip_dirs().sort_stats("time", "ncalls").print_stats()
with rasterio.Env(**gdal_config):
with Timer() as t:
prof = cProfile.Profile()
retval = prof.runcall(func, *args, **kwargs)
profile_stream = StringIO()
ps = pstats.Stats(prof, stream=profile_stream)
ps.strip_dirs().sort_stats("time", "ncalls").print_stats()

logger.removeHandler(handler)
handler.close()

rio_lines = rio_stream.getvalue().splitlines()
curl_lines = curl_stream.read().splitlines()
logs = rio_stream.getvalue().splitlines()
profile_lines = [p for p in profile_stream.getvalue().splitlines() if p]

results = parse_logs(rio_lines, curl_lines)
results = parse_logs(logs)
results["Timing"] = t.elapsed

if cprofile:
Expand All @@ -119,8 +106,7 @@ def wrapped_f(*args, **kwargs):
results.pop("WarpKernels")

if raw:
results["curl"] = curl_lines
results["rasterio"] = rio_lines
results["logs"] = logs

if not quiet:
log.info(json.dumps(results))
Expand Down
14 changes: 5 additions & 9 deletions tilebench/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.requests import Request
from starlette.types import ASGIApp, Message, Receive, Scope, Send
from wurlitzer import pipes

from tilebench import parse_logs

Expand Down Expand Up @@ -41,27 +40,24 @@ async def dispatch(self, request: Request, call_next):
logger.addHandler(handler)

gdal_config = {"CPL_DEBUG": "ON", "CPL_CURL_VERBOSE": "TRUE"}
with pipes() as (_, curl_stream):
with rasterio.Env(**gdal_config, **self.config):
response = await call_next(request)
with rasterio.Env(**gdal_config, **self.config):
response = await call_next(request)

logger.removeHandler(handler)
handler.close()

if rio_stream or curl_stream:
if rio_stream:
rio_lines = rio_stream.getvalue().splitlines()
curl_lines = curl_stream.read().splitlines()

results = parse_logs(rio_lines, curl_lines)
results = parse_logs(rio_lines)
head_results = "head;count={count}".format(**results["HEAD"])
list_results = "list;count={count}".format(**results["LIST"])
get_results = "get;count={count};size={bytes}".format(**results["GET"])
ranges_results = "ranges; values={}".format(
"|".join(results["GET"]["ranges"])
)
response.headers[
"VSI-Stats"
] = f"{list_results}, {head_results}, {get_results}, {ranges_results}"
] = f"{head_results}, {get_results}, {ranges_results}"

return response

Expand Down
8 changes: 2 additions & 6 deletions tilebench/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -449,18 +449,14 @@
// Display the key/value pairs
const stats = data.get("vsi-stats")
if (stats) {
const list = stats.split(',')[0]
const listCount = list.split(';')[1].split('=')[1]

const head = stats.split(',')[1]
const head = stats.split(',')[0]
const headCount = head.split(';')[1].split('=')[1]

const get = stats.split(',')[2]
const get = stats.split(',')[1]
const getCount = get.split(';')[1].split('=')[1]
const getSize = get.split(';')[2].split('=')[1]

html += '<table>'
html += `<tr><td class="align-l">LIST</td><td class="px3 align-r">${listCount}</td></tr>`
html += `<tr><td class="align-l">HEAD</td><td class="px3 align-r">${headCount}</td></tr>`
html += `<tr><td class="align-l">GET</td><td class="px3 align-r">${getCount}</td></tr>`
html += `<tr><td class="align-l">GET (bytes)</td><td class="px3 align-r">${getSize}</td></tr>`
Expand Down
3 changes: 1 addition & 2 deletions tilebench/viz.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,14 +223,13 @@ def _read_tile(src_path: str, x: int, y: int, z: int):
(_, _), stats = _read_tile(self.src_path, x, y, z)

head_results = "head;count={count}".format(**stats["HEAD"])
list_results = "list;count={count}".format(**stats["LIST"])
get_results = "get;count={count};size={bytes}".format(**stats["GET"])
ranges_results = "ranges; values={}".format(
"|".join(stats["GET"]["ranges"])
)
response.headers[
"VSI-Stats"
] = f"{list_results}, {head_results}, {get_results}, {ranges_results}"
] = f"{head_results}, {get_results}, {ranges_results}"

response.headers[
"server-timing"
Expand Down

0 comments on commit de287c1

Please sign in to comment.