From b0145c0488bc26edb784ba800321467546a05203 Mon Sep 17 00:00:00 2001 From: Dan Yazovsky Date: Sat, 20 May 2023 09:03:48 +0100 Subject: [PATCH] V0.1.8 (#10) - Merged metrics and API servers - Added code coverage tracking - Added tests - Updated dependencies --- .coveragerc | 8 ++ .github/workflows/ci.yml | 7 ++ .gitignore | 4 +- .vscode/launch.json | 2 +- Makefile | 4 +- README.md | 5 +- poetry.lock | 231 ++++++++++++++++++---------------- pyproject.toml | 3 +- sneakpeek/api.py | 48 ++++++- sneakpeek/metrics.py | 12 +- sneakpeek/runner.py | 2 +- sneakpeek/server.py | 53 +++----- tests/test_integration.py | 6 +- tests/test_metrics.py | 138 ++++++++++++++++++++ tests/test_runner.py | 198 +++++++++++++++++++++++++++++ tests/test_scraper_context.py | 41 ++++-- 16 files changed, 598 insertions(+), 164 deletions(-) create mode 100644 tests/test_metrics.py create mode 100644 tests/test_runner.py diff --git a/.coveragerc b/.coveragerc index 1b40301..a1a7620 100644 --- a/.coveragerc +++ b/.coveragerc @@ -5,3 +5,11 @@ exclude_lines = raise AssertionError raise NotImplementedError if __name__ == .__main__.: + @entrypoint.method() + pragma: no cover + def __repr__ + if self.debug: + if settings.DEBUG + if 0: + class .*\bProtocol\): + logger. \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 16fbe38..2801c4f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -41,6 +41,13 @@ jobs: - name: Tests coverage run: make coverage + - name: Upload coverage reports to Codecov + uses: codecov/codecov-action@v3 + with: + token: ${{ secrets.CODECOV_TOKEN }} + files: ./coverage.xml + verbose: true + - name: Build package run: make build diff --git a/.gitignore b/.gitignore index ef3dc20..8c77f3f 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,6 @@ dist **/.pytest_cache/* .pytest_cache/ .coverage -htmlcov \ No newline at end of file +htmlcov +demo +coverage.xml \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json index 661c2a3..114987e 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -31,7 +31,7 @@ "name": "Run demo", "type": "python", "request": "launch", - "program": "${workspaceFolder}/demo/main.py", + "program": "${workspaceFolder}/demo/app.py", "console": "integratedTerminal", "justMyCode": true, "env": { diff --git a/Makefile b/Makefile index f9f402f..0946c7d 100644 --- a/Makefile +++ b/Makefile @@ -30,11 +30,11 @@ install: install-py install-js ##Install all dependencies .PHONY: test test: $(PY_INSTALL_STAMP) ##Run tests - $(POETRY) run pytest + $(POETRY) run pytest -n 20 .PHONE: coverage coverage: $(PY_INSTALL_STAMP) ##Run tests - $(POETRY) run pytest --cov=sneakpeek tests --cov-fail-under=70 --cov-report term-missing --cov-report html + $(POETRY) run pytest --cov=sneakpeek tests --cov-fail-under=85 --cov-report term-missing --cov-report html --cov-report xml build-ui: ##Build frontend $(YARN) --cwd $(ROOT_DIR)/front/ quasar build diff --git a/README.md b/README.md index 9bf8c0c..accc74a 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ [![PyPI version](https://badge.fury.io/py/sneakpeek-py.svg)](https://badge.fury.io/py/sneakpeek-py) ![PyPI - Downloads](https://img.shields.io/pypi/dm/sneakpeek-py?color=) [![Documentation Status](https://readthedocs.org/projects/sneakpeek-py/badge/?version=latest)](https://sneakpeek-py.readthedocs.io/en/latest/?badge=latest) +[![codecov](https://codecov.io/gh/flulemon/sneakpeek/branch/main/graph/badge.svg?token=7h45P8qHRG)](https://codecov.io/gh/flulemon/sneakpeek) **Sneakpeek** - is a framework that helps to quickly and conviniently develop scrapers. It's the best choice for scrapers that have some specific complex scraping logic that needs @@ -16,7 +17,7 @@ to be run on a constant basis. You can also run the demo using Docker: ```bash -docker run -it --rm -p 8080:8080 -p 9090:9090 flulemon/sneakpeek-demo +docker run -it --rm -p 8080:8080 flulemon/sneakpeek-demo ``` Once it has started head over to http://localhost:8080 to play around with it. @@ -210,7 +211,7 @@ For the argument `LocalRunner.run` takes: Now you can run you handler as an ordinary Python script. Given it's in `demo_scraper.py` file you can use: ```bash -python3 demo_scraper.py +python demo_scraper.py ``` ## Documentation diff --git a/poetry.lock b/poetry.lock index 0f84f38..3e1ee3f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -326,14 +326,14 @@ files = [ [[package]] name = "certifi" -version = "2022.12.7" +version = "2023.5.7" description = "Python package for providing Mozilla's CA Bundle." category = "main" optional = true python-versions = ">=3.6" files = [ - {file = "certifi-2022.12.7-py3-none-any.whl", hash = "sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18"}, - {file = "certifi-2022.12.7.tar.gz", hash = "sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3"}, + {file = "certifi-2023.5.7-py3-none-any.whl", hash = "sha256:c6c2e98f5c7869efca1f8916fed228dd91539f9f1b444c314c06eef02980c716"}, + {file = "certifi-2023.5.7.tar.gz", hash = "sha256:0f0d56dc5a6ad56fd4ba36484d6cc34451e1c6548c61daad8c320169f91eddc7"}, ] [[package]] @@ -450,63 +450,63 @@ files = [ [[package]] name = "coverage" -version = "7.2.3" +version = "7.2.5" description = "Code coverage measurement for Python" category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "coverage-7.2.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e58c0d41d336569d63d1b113bd573db8363bc4146f39444125b7f8060e4e04f5"}, - {file = "coverage-7.2.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:344e714bd0fe921fc72d97404ebbdbf9127bac0ca1ff66d7b79efc143cf7c0c4"}, - {file = "coverage-7.2.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:974bc90d6f6c1e59ceb1516ab00cf1cdfbb2e555795d49fa9571d611f449bcb2"}, - {file = "coverage-7.2.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0743b0035d4b0e32bc1df5de70fba3059662ace5b9a2a86a9f894cfe66569013"}, - {file = "coverage-7.2.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d0391fb4cfc171ce40437f67eb050a340fdbd0f9f49d6353a387f1b7f9dd4fa"}, - {file = "coverage-7.2.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:4a42e1eff0ca9a7cb7dc9ecda41dfc7cbc17cb1d02117214be0561bd1134772b"}, - {file = "coverage-7.2.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:be19931a8dcbe6ab464f3339966856996b12a00f9fe53f346ab3be872d03e257"}, - {file = "coverage-7.2.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:72fcae5bcac3333a4cf3b8f34eec99cea1187acd55af723bcbd559adfdcb5535"}, - {file = "coverage-7.2.3-cp310-cp310-win32.whl", hash = "sha256:aeae2aa38395b18106e552833f2a50c27ea0000122bde421c31d11ed7e6f9c91"}, - {file = "coverage-7.2.3-cp310-cp310-win_amd64.whl", hash = "sha256:83957d349838a636e768251c7e9979e899a569794b44c3728eaebd11d848e58e"}, - {file = "coverage-7.2.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:dfd393094cd82ceb9b40df4c77976015a314b267d498268a076e940fe7be6b79"}, - {file = "coverage-7.2.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:182eb9ac3f2b4874a1f41b78b87db20b66da6b9cdc32737fbbf4fea0c35b23fc"}, - {file = "coverage-7.2.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1bb1e77a9a311346294621be905ea8a2c30d3ad371fc15bb72e98bfcfae532df"}, - {file = "coverage-7.2.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca0f34363e2634deffd390a0fef1aa99168ae9ed2af01af4a1f5865e362f8623"}, - {file = "coverage-7.2.3-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:55416d7385774285b6e2a5feca0af9652f7f444a4fa3d29d8ab052fafef9d00d"}, - {file = "coverage-7.2.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:06ddd9c0249a0546997fdda5a30fbcb40f23926df0a874a60a8a185bc3a87d93"}, - {file = "coverage-7.2.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:fff5aaa6becf2c6a1699ae6a39e2e6fb0672c2d42eca8eb0cafa91cf2e9bd312"}, - {file = "coverage-7.2.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:ea53151d87c52e98133eb8ac78f1206498c015849662ca8dc246255265d9c3c4"}, - {file = "coverage-7.2.3-cp311-cp311-win32.whl", hash = "sha256:8f6c930fd70d91ddee53194e93029e3ef2aabe26725aa3c2753df057e296b925"}, - {file = "coverage-7.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:fa546d66639d69aa967bf08156eb8c9d0cd6f6de84be9e8c9819f52ad499c910"}, - {file = "coverage-7.2.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b2317d5ed777bf5a033e83d4f1389fd4ef045763141d8f10eb09a7035cee774c"}, - {file = "coverage-7.2.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:be9824c1c874b73b96288c6d3de793bf7f3a597770205068c6163ea1f326e8b9"}, - {file = "coverage-7.2.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2c3b2803e730dc2797a017335827e9da6da0e84c745ce0f552e66400abdfb9a1"}, - {file = "coverage-7.2.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f69770f5ca1994cb32c38965e95f57504d3aea96b6c024624fdd5bb1aa494a1"}, - {file = "coverage-7.2.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:1127b16220f7bfb3f1049ed4a62d26d81970a723544e8252db0efde853268e21"}, - {file = "coverage-7.2.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:aa784405f0c640940595fa0f14064d8e84aff0b0f762fa18393e2760a2cf5841"}, - {file = "coverage-7.2.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:3146b8e16fa60427e03884301bf8209221f5761ac754ee6b267642a2fd354c48"}, - {file = "coverage-7.2.3-cp37-cp37m-win32.whl", hash = "sha256:1fd78b911aea9cec3b7e1e2622c8018d51c0d2bbcf8faaf53c2497eb114911c1"}, - {file = "coverage-7.2.3-cp37-cp37m-win_amd64.whl", hash = "sha256:0f3736a5d34e091b0a611964c6262fd68ca4363df56185902528f0b75dbb9c1f"}, - {file = "coverage-7.2.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:981b4df72c93e3bc04478153df516d385317628bd9c10be699c93c26ddcca8ab"}, - {file = "coverage-7.2.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c0045f8f23a5fb30b2eb3b8a83664d8dc4fb58faddf8155d7109166adb9f2040"}, - {file = "coverage-7.2.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f760073fcf8f3d6933178d67754f4f2d4e924e321f4bb0dcef0424ca0215eba1"}, - {file = "coverage-7.2.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c86bd45d1659b1ae3d0ba1909326b03598affbc9ed71520e0ff8c31a993ad911"}, - {file = "coverage-7.2.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:172db976ae6327ed4728e2507daf8a4de73c7cc89796483e0a9198fd2e47b462"}, - {file = "coverage-7.2.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:d2a3a6146fe9319926e1d477842ca2a63fe99af5ae690b1f5c11e6af074a6b5c"}, - {file = "coverage-7.2.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:f649dd53833b495c3ebd04d6eec58479454a1784987af8afb77540d6c1767abd"}, - {file = "coverage-7.2.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:7c4ed4e9f3b123aa403ab424430b426a1992e6f4c8fd3cb56ea520446e04d152"}, - {file = "coverage-7.2.3-cp38-cp38-win32.whl", hash = "sha256:eb0edc3ce9760d2f21637766c3aa04822030e7451981ce569a1b3456b7053f22"}, - {file = "coverage-7.2.3-cp38-cp38-win_amd64.whl", hash = "sha256:63cdeaac4ae85a179a8d6bc09b77b564c096250d759eed343a89d91bce8b6367"}, - {file = "coverage-7.2.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:20d1a2a76bb4eb00e4d36b9699f9b7aba93271c9c29220ad4c6a9581a0320235"}, - {file = "coverage-7.2.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4ea748802cc0de4de92ef8244dd84ffd793bd2e7be784cd8394d557a3c751e21"}, - {file = "coverage-7.2.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:21b154aba06df42e4b96fc915512ab39595105f6c483991287021ed95776d934"}, - {file = "coverage-7.2.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fd214917cabdd6f673a29d708574e9fbdb892cb77eb426d0eae3490d95ca7859"}, - {file = "coverage-7.2.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c2e58e45fe53fab81f85474e5d4d226eeab0f27b45aa062856c89389da2f0d9"}, - {file = "coverage-7.2.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:87ecc7c9a1a9f912e306997ffee020297ccb5ea388421fe62a2a02747e4d5539"}, - {file = "coverage-7.2.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:387065e420aed3c71b61af7e82c7b6bc1c592f7e3c7a66e9f78dd178699da4fe"}, - {file = "coverage-7.2.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ea3f5bc91d7d457da7d48c7a732beaf79d0c8131df3ab278e6bba6297e23c6c4"}, - {file = "coverage-7.2.3-cp39-cp39-win32.whl", hash = "sha256:ae7863a1d8db6a014b6f2ff9c1582ab1aad55a6d25bac19710a8df68921b6e30"}, - {file = "coverage-7.2.3-cp39-cp39-win_amd64.whl", hash = "sha256:3f04becd4fcda03c0160d0da9c8f0c246bc78f2f7af0feea1ec0930e7c93fa4a"}, - {file = "coverage-7.2.3-pp37.pp38.pp39-none-any.whl", hash = "sha256:965ee3e782c7892befc25575fa171b521d33798132692df428a09efacaffe8d0"}, - {file = "coverage-7.2.3.tar.gz", hash = "sha256:d298c2815fa4891edd9abe5ad6e6cb4207104c7dd9fd13aea3fdebf6f9b91259"}, + {file = "coverage-7.2.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:883123d0bbe1c136f76b56276074b0c79b5817dd4238097ffa64ac67257f4b6c"}, + {file = "coverage-7.2.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d2fbc2a127e857d2f8898aaabcc34c37771bf78a4d5e17d3e1f5c30cd0cbc62a"}, + {file = "coverage-7.2.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f3671662dc4b422b15776cdca89c041a6349b4864a43aa2350b6b0b03bbcc7f"}, + {file = "coverage-7.2.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:780551e47d62095e088f251f5db428473c26db7829884323e56d9c0c3118791a"}, + {file = "coverage-7.2.5-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:066b44897c493e0dcbc9e6a6d9f8bbb6607ef82367cf6810d387c09f0cd4fe9a"}, + {file = "coverage-7.2.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b9a4ee55174b04f6af539218f9f8083140f61a46eabcaa4234f3c2a452c4ed11"}, + {file = "coverage-7.2.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:706ec567267c96717ab9363904d846ec009a48d5f832140b6ad08aad3791b1f5"}, + {file = "coverage-7.2.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ae453f655640157d76209f42c62c64c4d4f2c7f97256d3567e3b439bd5c9b06c"}, + {file = "coverage-7.2.5-cp310-cp310-win32.whl", hash = "sha256:f81c9b4bd8aa747d417407a7f6f0b1469a43b36a85748145e144ac4e8d303cb5"}, + {file = "coverage-7.2.5-cp310-cp310-win_amd64.whl", hash = "sha256:dc945064a8783b86fcce9a0a705abd7db2117d95e340df8a4333f00be5efb64c"}, + {file = "coverage-7.2.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:40cc0f91c6cde033da493227797be2826cbf8f388eaa36a0271a97a332bfd7ce"}, + {file = "coverage-7.2.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a66e055254a26c82aead7ff420d9fa8dc2da10c82679ea850d8feebf11074d88"}, + {file = "coverage-7.2.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c10fbc8a64aa0f3ed136b0b086b6b577bc64d67d5581acd7cc129af52654384e"}, + {file = "coverage-7.2.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9a22cbb5ede6fade0482111fa7f01115ff04039795d7092ed0db43522431b4f2"}, + {file = "coverage-7.2.5-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:292300f76440651529b8ceec283a9370532f4ecba9ad67d120617021bb5ef139"}, + {file = "coverage-7.2.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:7ff8f3fb38233035028dbc93715551d81eadc110199e14bbbfa01c5c4a43f8d8"}, + {file = "coverage-7.2.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:a08c7401d0b24e8c2982f4e307124b671c6736d40d1c39e09d7a8687bddf83ed"}, + {file = "coverage-7.2.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:ef9659d1cda9ce9ac9585c045aaa1e59223b143f2407db0eaee0b61a4f266fb6"}, + {file = "coverage-7.2.5-cp311-cp311-win32.whl", hash = "sha256:30dcaf05adfa69c2a7b9f7dfd9f60bc8e36b282d7ed25c308ef9e114de7fc23b"}, + {file = "coverage-7.2.5-cp311-cp311-win_amd64.whl", hash = "sha256:97072cc90f1009386c8a5b7de9d4fc1a9f91ba5ef2146c55c1f005e7b5c5e068"}, + {file = "coverage-7.2.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:bebea5f5ed41f618797ce3ffb4606c64a5de92e9c3f26d26c2e0aae292f015c1"}, + {file = "coverage-7.2.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:828189fcdda99aae0d6bf718ea766b2e715eabc1868670a0a07bf8404bf58c33"}, + {file = "coverage-7.2.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6e8a95f243d01ba572341c52f89f3acb98a3b6d1d5d830efba86033dd3687ade"}, + {file = "coverage-7.2.5-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8834e5f17d89e05697c3c043d3e58a8b19682bf365048837383abfe39adaed5"}, + {file = "coverage-7.2.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d1f25ee9de21a39b3a8516f2c5feb8de248f17da7eead089c2e04aa097936b47"}, + {file = "coverage-7.2.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:1637253b11a18f453e34013c665d8bf15904c9e3c44fbda34c643fbdc9d452cd"}, + {file = "coverage-7.2.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8e575a59315a91ccd00c7757127f6b2488c2f914096077c745c2f1ba5b8c0969"}, + {file = "coverage-7.2.5-cp37-cp37m-win32.whl", hash = "sha256:509ecd8334c380000d259dc66feb191dd0a93b21f2453faa75f7f9cdcefc0718"}, + {file = "coverage-7.2.5-cp37-cp37m-win_amd64.whl", hash = "sha256:12580845917b1e59f8a1c2ffa6af6d0908cb39220f3019e36c110c943dc875b0"}, + {file = "coverage-7.2.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b5016e331b75310610c2cf955d9f58a9749943ed5f7b8cfc0bb89c6134ab0a84"}, + {file = "coverage-7.2.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:373ea34dca98f2fdb3e5cb33d83b6d801007a8074f992b80311fc589d3e6b790"}, + {file = "coverage-7.2.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a063aad9f7b4c9f9da7b2550eae0a582ffc7623dca1c925e50c3fbde7a579771"}, + {file = "coverage-7.2.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:38c0a497a000d50491055805313ed83ddba069353d102ece8aef5d11b5faf045"}, + {file = "coverage-7.2.5-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a2b3b05e22a77bb0ae1a3125126a4e08535961c946b62f30985535ed40e26614"}, + {file = "coverage-7.2.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:0342a28617e63ad15d96dca0f7ae9479a37b7d8a295f749c14f3436ea59fdcb3"}, + {file = "coverage-7.2.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:cf97ed82ca986e5c637ea286ba2793c85325b30f869bf64d3009ccc1a31ae3fd"}, + {file = "coverage-7.2.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:c2c41c1b1866b670573657d584de413df701f482574bad7e28214a2362cb1fd1"}, + {file = "coverage-7.2.5-cp38-cp38-win32.whl", hash = "sha256:10b15394c13544fce02382360cab54e51a9e0fd1bd61ae9ce012c0d1e103c813"}, + {file = "coverage-7.2.5-cp38-cp38-win_amd64.whl", hash = "sha256:a0b273fe6dc655b110e8dc89b8ec7f1a778d78c9fd9b4bda7c384c8906072212"}, + {file = "coverage-7.2.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5c587f52c81211d4530fa6857884d37f514bcf9453bdeee0ff93eaaf906a5c1b"}, + {file = "coverage-7.2.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4436cc9ba5414c2c998eaedee5343f49c02ca93b21769c5fdfa4f9d799e84200"}, + {file = "coverage-7.2.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6599bf92f33ab041e36e06d25890afbdf12078aacfe1f1d08c713906e49a3fe5"}, + {file = "coverage-7.2.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:857abe2fa6a4973f8663e039ead8d22215d31db613ace76e4a98f52ec919068e"}, + {file = "coverage-7.2.5-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6f5cab2d7f0c12f8187a376cc6582c477d2df91d63f75341307fcdcb5d60303"}, + {file = "coverage-7.2.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:aa387bd7489f3e1787ff82068b295bcaafbf6f79c3dad3cbc82ef88ce3f48ad3"}, + {file = "coverage-7.2.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:156192e5fd3dbbcb11cd777cc469cf010a294f4c736a2b2c891c77618cb1379a"}, + {file = "coverage-7.2.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bd3b4b8175c1db502adf209d06136c000df4d245105c8839e9d0be71c94aefe1"}, + {file = "coverage-7.2.5-cp39-cp39-win32.whl", hash = "sha256:ddc5a54edb653e9e215f75de377354e2455376f416c4378e1d43b08ec50acc31"}, + {file = "coverage-7.2.5-cp39-cp39-win_amd64.whl", hash = "sha256:338aa9d9883aaaad53695cb14ccdeb36d4060485bb9388446330bef9c361c252"}, + {file = "coverage-7.2.5-pp37.pp38.pp39-none-any.whl", hash = "sha256:8877d9b437b35a85c18e3c6499b23674684bf690f5d96c1006a1ef61f9fdf0f3"}, + {file = "coverage-7.2.5.tar.gz", hash = "sha256:f99ef080288f09ffc687423b8d60978cf3a465d3f404a18d1a05474bd8575a47"}, ] [package.dependencies] @@ -542,6 +542,21 @@ files = [ [package.extras] test = ["pytest (>=6)"] +[[package]] +name = "execnet" +version = "1.9.0" +description = "execnet: rapid multi-Python deployment" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + {file = "execnet-1.9.0-py2.py3-none-any.whl", hash = "sha256:a295f7cc774947aac58dde7fdc85f4aa00c42adf5d8f5468fc630c1acf30a142"}, + {file = "execnet-1.9.0.tar.gz", hash = "sha256:8f694f3ba9cc92cab508b152dcfe322153975c29bda272e2fd7f3f00f36e47c5"}, +] + +[package.extras] +testing = ["pre-commit"] + [[package]] name = "fake-useragent" version = "1.1.3" @@ -576,19 +591,19 @@ lua = ["lupa (>=1.14,<2.0)"] [[package]] name = "fastapi" -version = "0.95.1" +version = "0.95.2" description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "fastapi-0.95.1-py3-none-any.whl", hash = "sha256:a870d443e5405982e1667dfe372663abf10754f246866056336d7f01c21dab07"}, - {file = "fastapi-0.95.1.tar.gz", hash = "sha256:9569f0a381f8a457ec479d90fa01005cfddaae07546eb1f3fa035bc4797ae7d5"}, + {file = "fastapi-0.95.2-py3-none-any.whl", hash = "sha256:d374dbc4ef2ad9b803899bd3360d34c534adc574546e25314ab72c0c4411749f"}, + {file = "fastapi-0.95.2.tar.gz", hash = "sha256:4d9d3e8c71c73f11874bcf5e33626258d143252e329a01002f767306c64fb982"}, ] [package.dependencies] pydantic = ">=1.6.2,<1.7 || >1.7,<1.7.1 || >1.7.1,<1.7.2 || >1.7.2,<1.7.3 || >1.7.3,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0" -starlette = ">=0.26.1,<0.27.0" +starlette = ">=0.27.0,<0.28.0" [package.extras] all = ["email-validator (>=1.1.1)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=2.11.2)", "orjson (>=3.2.1)", "python-multipart (>=0.0.5)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"] @@ -946,18 +961,18 @@ files = [ [[package]] name = "platformdirs" -version = "3.5.0" +version = "3.5.1" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "platformdirs-3.5.0-py3-none-any.whl", hash = "sha256:47692bc24c1958e8b0f13dd727307cff1db103fca36399f457da8e05f222fdc4"}, - {file = "platformdirs-3.5.0.tar.gz", hash = "sha256:7954a68d0ba23558d753f73437c55f89027cf8f5108c19844d4b82e5af396335"}, + {file = "platformdirs-3.5.1-py3-none-any.whl", hash = "sha256:e2378146f1964972c03c085bb5662ae80b2b8c06226c54b2ff4aa9483e8a13a5"}, + {file = "platformdirs-3.5.1.tar.gz", hash = "sha256:412dae91f52a6f84830f39a8078cecd0e866cb72294a5c66808e74d5e88d251f"}, ] [package.extras] -docs = ["furo (>=2023.3.27)", "proselint (>=0.13)", "sphinx (>=6.1.3)", "sphinx-autodoc-typehints (>=1.23,!=1.23.4)"] +docs = ["furo (>=2023.3.27)", "proselint (>=0.13)", "sphinx (>=6.2.1)", "sphinx-autodoc-typehints (>=1.23,!=1.23.4)"] test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.3.1)", "pytest-cov (>=4)", "pytest-mock (>=3.10)"] [[package]] @@ -1150,6 +1165,27 @@ files = [ [package.dependencies] pytest = ">=3.2.5" +[[package]] +name = "pytest-xdist" +version = "3.3.1" +description = "pytest xdist plugin for distributed testing, most importantly across multiple CPUs" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-xdist-3.3.1.tar.gz", hash = "sha256:d5ee0520eb1b7bcca50a60a518ab7a7707992812c578198f8b44fdfac78e8c93"}, + {file = "pytest_xdist-3.3.1-py3-none-any.whl", hash = "sha256:ff9daa7793569e6a68544850fd3927cd257cc03a7ef76c95e86915355e82b5f2"}, +] + +[package.dependencies] +execnet = ">=1.1" +pytest = ">=6.2.0" + +[package.extras] +psutil = ["psutil (>=3.0)"] +setproctitle = ["setproctitle"] +testing = ["filelock"] + [[package]] name = "pytz" version = "2023.3" @@ -1162,35 +1198,20 @@ files = [ {file = "pytz-2023.3.tar.gz", hash = "sha256:1d8ce29db189191fb55338ee6d0387d82ab59f3d00eac103412d64e0ebd0c588"}, ] -[[package]] -name = "pytz-deprecation-shim" -version = "0.1.0.post0" -description = "Shims to make deprecation of pytz easier" -category = "main" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" -files = [ - {file = "pytz_deprecation_shim-0.1.0.post0-py2.py3-none-any.whl", hash = "sha256:8314c9692a636c8eb3bda879b9f119e350e93223ae83e70e80c31675a0fdc1a6"}, - {file = "pytz_deprecation_shim-0.1.0.post0.tar.gz", hash = "sha256:af097bae1b616dde5c5744441e2ddc69e74dfdcb0c263129610d85b87445a59d"}, -] - -[package.dependencies] -tzdata = {version = "*", markers = "python_version >= \"3.6\""} - [[package]] name = "redis" -version = "4.5.4" +version = "4.5.5" description = "Python client for Redis database and key-value store" category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "redis-4.5.4-py3-none-any.whl", hash = "sha256:2c19e6767c474f2e85167909061d525ed65bea9301c0770bb151e041b7ac89a2"}, - {file = "redis-4.5.4.tar.gz", hash = "sha256:73ec35da4da267d6847e47f68730fdd5f62e2ca69e3ef5885c6a78a9374c3893"}, + {file = "redis-4.5.5-py3-none-any.whl", hash = "sha256:77929bc7f5dab9adf3acba2d3bb7d7658f1e0c2f1cafe7eb36434e751c471119"}, + {file = "redis-4.5.5.tar.gz", hash = "sha256:dc87a0bdef6c8bfe1ef1e1c40be7034390c2ae02d92dcd0c7ca1729443899880"}, ] [package.dependencies] -async-timeout = {version = ">=4.0.2", markers = "python_version <= \"3.11.2\""} +async-timeout = {version = ">=4.0.2", markers = "python_full_version <= \"3.11.2\""} [package.extras] hiredis = ["hiredis (>=1.0.0)"] @@ -1198,21 +1219,21 @@ ocsp = ["cryptography (>=36.0.1)", "pyopenssl (==20.0.1)", "requests (>=2.26.0)" [[package]] name = "requests" -version = "2.29.0" +version = "2.30.0" description = "Python HTTP for Humans." category = "main" optional = true python-versions = ">=3.7" files = [ - {file = "requests-2.29.0-py3-none-any.whl", hash = "sha256:e8f3c9be120d3333921d213eef078af392fba3933ab7ed2d1cba3b56f2568c3b"}, - {file = "requests-2.29.0.tar.gz", hash = "sha256:f2e34a75f4749019bb0e3effb66683630e4ffeaf75819fb51bebef1bf5aef059"}, + {file = "requests-2.30.0-py3-none-any.whl", hash = "sha256:10e94cc4f3121ee6da529d358cdaeaff2f1c409cd377dbc72b825852f2f7e294"}, + {file = "requests-2.30.0.tar.gz", hash = "sha256:239d7d4458afcb28a692cdd298d87542235f4ca8d36d03a15bfc128a6559a2f4"}, ] [package.dependencies] certifi = ">=2017.4.17" charset-normalizer = ">=2,<4" idna = ">=2.5,<4" -urllib3 = ">=1.21.1,<1.27" +urllib3 = ">=1.21.1,<3" [package.extras] socks = ["PySocks (>=1.5.6,!=1.5.7)"] @@ -1220,19 +1241,19 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] [[package]] name = "setuptools" -version = "67.7.2" +version = "67.8.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "setuptools-67.7.2-py3-none-any.whl", hash = "sha256:23aaf86b85ca52ceb801d32703f12d77517b2556af839621c641fca11287952b"}, - {file = "setuptools-67.7.2.tar.gz", hash = "sha256:f104fa03692a2602fa0fec6c6a9e63b6c8a968de13e17c026957dd1f53d80990"}, + {file = "setuptools-67.8.0-py3-none-any.whl", hash = "sha256:5df61bf30bb10c6f756eb19e7c9f3b473051f48db77fddbe06ff2ca307df9a6f"}, + {file = "setuptools-67.8.0.tar.gz", hash = "sha256:62642358adc77ffa87233bc4d2354c4b2682d214048f500964dbe760ccedf102"}, ] [package.extras] docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (==0.8.3)", "sphinx-reredirects", "sphinxcontrib-towncrier"] -testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8 (<5)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pip-run (>=8.8)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pip-run (>=8.8)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] [[package]] @@ -1451,14 +1472,14 @@ test = ["pytest"] [[package]] name = "starlette" -version = "0.26.1" +version = "0.27.0" description = "The little ASGI library that shines." category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "starlette-0.26.1-py3-none-any.whl", hash = "sha256:e87fce5d7cbdde34b76f0ac69013fd9d190d581d80681493016666e6f96c6d5e"}, - {file = "starlette-0.26.1.tar.gz", hash = "sha256:41da799057ea8620e4667a3e69a5b1923ebd32b1819c8fa75634bbe8d8bea9bd"}, + {file = "starlette-0.27.0-py3-none-any.whl", hash = "sha256:918416370e846586541235ccd38a474c08b80443ed31c578a418e2209b3eef91"}, + {file = "starlette-0.27.0.tar.gz", hash = "sha256:6a6b0d042acb8d469a01eba54e9cda6cbd24ac602c4cd016723117d6a7e73b75"}, ] [package.dependencies] @@ -1505,18 +1526,17 @@ files = [ [[package]] name = "tzlocal" -version = "4.3" +version = "5.0.1" description = "tzinfo object for the local timezone" category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "tzlocal-4.3-py3-none-any.whl", hash = "sha256:b44c4388f3d34f25862cfbb387578a4d70fec417649da694a132f628a23367e2"}, - {file = "tzlocal-4.3.tar.gz", hash = "sha256:3f21d09e1b2aa9f2dacca12da240ca37de3ba5237a93addfd6d593afe9073355"}, + {file = "tzlocal-5.0.1-py3-none-any.whl", hash = "sha256:f3596e180296aaf2dbd97d124fe76ae3a0e3d32b258447de7b939b3fd4be992f"}, + {file = "tzlocal-5.0.1.tar.gz", hash = "sha256:46eb99ad4bdb71f3f72b7d24f4267753e240944ecfc16f25d2719ba89827a803"}, ] [package.dependencies] -pytz-deprecation-shim = "*" tzdata = {version = "*", markers = "platform_system == \"Windows\""} [package.extras] @@ -1524,20 +1544,21 @@ devenv = ["black", "check-manifest", "flake8", "pyroma", "pytest (>=4.3)", "pyte [[package]] name = "urllib3" -version = "1.26.15" +version = "2.0.2" description = "HTTP library with thread-safe connection pooling, file post, and more." category = "main" optional = true -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" +python-versions = ">=3.7" files = [ - {file = "urllib3-1.26.15-py2.py3-none-any.whl", hash = "sha256:aa751d169e23c7479ce47a0cb0da579e3ede798f994f5816a74e4f4500dcea42"}, - {file = "urllib3-1.26.15.tar.gz", hash = "sha256:8a388717b9476f934a21484e8c8e61875ab60644d29b9b39e11e4b9dc1c6b305"}, + {file = "urllib3-2.0.2-py3-none-any.whl", hash = "sha256:d055c2f9d38dc53c808f6fdc8eab7360b6fdbbde02340ed25cfbcd817c62469e"}, + {file = "urllib3-2.0.2.tar.gz", hash = "sha256:61717a1095d7e155cdb737ac7bb2f4324a858a1e2e6466f6d03ff630ca68d3cc"}, ] [package.extras] -brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] -secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] -socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +secure = ["certifi", "cryptography (>=1.9)", "idna (>=2.0.0)", "pyopenssl (>=17.1.0)", "urllib3-secure-extra"] +socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] +zstd = ["zstandard (>=0.18.0)"] [[package]] name = "uvicorn" @@ -1652,4 +1673,4 @@ docs = ["Sphinx", "sphinx-rtd-theme", "sphinxcontrib-napoleon"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "655b2ffeb244dee0b37c0d757072db9afd8f35f0299a4a8cb469477e930d0c41" +content-hash = "070a25f1e976c5588db065f85d345ca966aa8683d5b29b5e2de4cc5a0b52de89" diff --git a/pyproject.toml b/pyproject.toml index a4110d5..dfb92b0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] name = "sneakpeek-py" packages = [{ include = "sneakpeek" }] -version = "0.1.7" +version = "0.1.8" description = "Sneakpeek is a framework that helps to quickly and conviniently develop scrapers. It's the best choice for scrapers that have some specific complex scraping logic that needs to be run on a constant basis." authors = ["Dan Yazovsky "] maintainers = ["Dan Yazovsky "] @@ -50,6 +50,7 @@ pytest-lazy-fixture = "^0.6.3" pytest-asyncio = "^0.21.0" pytest-cov = "^4.0.0" aioresponses = "^0.7.4" +pytest-xdist = "^3.3.0" [build-system] requires = ["poetry-core"] diff --git a/sneakpeek/api.py b/sneakpeek/api.py index 91efe93..6499cac 100644 --- a/sneakpeek/api.py +++ b/sneakpeek/api.py @@ -1,9 +1,17 @@ +import os import pathlib import fastapi_jsonrpc as jsonrpc -from fastapi import Body +from fastapi import Body, Request, Response from fastapi.middleware.cors import CORSMiddleware from fastapi.staticfiles import StaticFiles +from prometheus_client import ( + CONTENT_TYPE_LATEST, + REGISTRY, + CollectorRegistry, + generate_latest, +) +from prometheus_client.multiprocess import MultiProcessCollector from pydantic import BaseModel from sneakpeek.lib.errors import ScraperHasActiveRunError, ScraperNotFoundError @@ -15,6 +23,7 @@ ) from sneakpeek.lib.queue import Queue, QueueABC from sneakpeek.lib.storage.base import ScraperJobsStorage, ScrapersStorage +from sneakpeek.metrics import count_invocations, measure_latency from sneakpeek.scraper_handler import ScraperHandler @@ -23,12 +32,24 @@ class Priority(BaseModel): value: int +def metrics(request: Request) -> Response: # pragma: no cover + if "prometheus_multiproc_dir" in os.environ: + registry = CollectorRegistry() + MultiProcessCollector(registry) + else: + registry = REGISTRY + + return Response( + generate_latest(registry), headers={"Content-Type": CONTENT_TYPE_LATEST} + ) + + def get_api_entrypoint( scrapers_storage: ScrapersStorage, jobs_storage: ScraperJobsStorage, queue: Queue, handlers: list[ScraperHandler], -) -> jsonrpc.Entrypoint: +) -> jsonrpc.Entrypoint: # pragma: no cover """ Create public JsonRPC API entrypoint (mostly mimics storage and queue API) @@ -51,18 +72,26 @@ async def search_scrapers( return await scrapers_storage.search_scrapers(name_filter, max_items, last_id) @entrypoint.method() + @count_invocations(subsystem="api") + @measure_latency(subsystem="api") async def get_scrapers() -> list[Scraper]: return await scrapers_storage.get_scrapers() @entrypoint.method(errors=[ScraperNotFoundError]) + @count_invocations(subsystem="api") + @measure_latency(subsystem="api") async def get_scraper(id: int = Body(...)) -> Scraper: return await scrapers_storage.get_scraper(id) @entrypoint.method() + @count_invocations(subsystem="api") + @measure_latency(subsystem="api") async def create_scraper(scraper: Scraper = Body(...)) -> Scraper: return await scrapers_storage.create_scraper(scraper) @entrypoint.method(errors=[ScraperNotFoundError, ScraperHasActiveRunError]) + @count_invocations(subsystem="api") + @measure_latency(subsystem="api") async def enqueue_scraper( scraper_id: int = Body(...), priority: ScraperJobPriority = Body(...), @@ -70,26 +99,38 @@ async def enqueue_scraper( return await queue.enqueue(scraper_id, priority) @entrypoint.method(errors=[ScraperNotFoundError]) + @count_invocations(subsystem="api") + @measure_latency(subsystem="api") async def update_scraper(scraper: Scraper = Body(...)) -> Scraper: return await scrapers_storage.update_scraper(scraper) @entrypoint.method(errors=[ScraperNotFoundError]) + @count_invocations(subsystem="api") + @measure_latency(subsystem="api") async def delete_scraper(id: int = Body(...)) -> Scraper: return await scrapers_storage.delete_scraper(id) @entrypoint.method(errors=[ScraperNotFoundError]) + @count_invocations(subsystem="api") + @measure_latency(subsystem="api") async def get_scraper_jobs(scraper_id: int = Body(...)) -> list[ScraperJob]: return await jobs_storage.get_scraper_jobs(scraper_id) @entrypoint.method() + @count_invocations(subsystem="api") + @measure_latency(subsystem="api") async def get_scraper_handlers() -> list[str]: return [handler.name for handler in handlers] @entrypoint.method() + @count_invocations(subsystem="api") + @measure_latency(subsystem="api") async def get_schedules() -> list[str]: return [schedule.value for schedule in ScraperSchedule] @entrypoint.method() + @count_invocations(subsystem="api") + @measure_latency(subsystem="api") async def get_priorities() -> list[Priority]: return [ Priority(name=priority.name, value=priority.value) @@ -108,7 +149,7 @@ def create_api( jobs_storage: ScraperJobsStorage, queue: QueueABC, handlers: list[ScraperHandler], -) -> jsonrpc.API: +) -> jsonrpc.API: # pragma: no cover """ Create JsonRPC API (FastAPI is used under the hood) @@ -132,6 +173,7 @@ def create_api( handlers, ) ) + app.add_route("/metrics", metrics) app.mount( "/docs/", StaticFiles( diff --git a/sneakpeek/metrics.py b/sneakpeek/metrics.py index 262c0ce..ff5d373 100644 --- a/sneakpeek/metrics.py +++ b/sneakpeek/metrics.py @@ -1,5 +1,6 @@ import asyncio from functools import wraps +from typing import Any from prometheus_client import Counter, Gauge, Histogram @@ -29,6 +30,13 @@ ) +def _get_full_class_name(obj: Any) -> str: + module = obj.__class__.__module__ + if module is None or module == str.__class__.__module__: + return obj.__class__.__name__ + return module + "." + obj.__class__.__name__ + + def measure_latency(subsystem: str): """ Decorator for measuring latency of the function (works for both sync and async functions). @@ -121,7 +129,7 @@ def sync_wrapper(*args, **kwargs): subsystem=subsystem, method=func.__name__, type="error", - error=e.__class__, + error=_get_full_class_name(e), ).inc() raise @@ -147,7 +155,7 @@ async def async_wrapper(*args, **kwargs): subsystem=subsystem, method=func.__name__, type="error", - error=e.__class__, + error=_get_full_class_name(e), ).inc() raise diff --git a/sneakpeek/runner.py b/sneakpeek/runner.py index 32f842b..c899547 100644 --- a/sneakpeek/runner.py +++ b/sneakpeek/runner.py @@ -97,7 +97,7 @@ async def ping_session(): context = ScraperContext(job.scraper.config, self._plugins, ping_session) try: await context.start_session() - await self._queue.ping_scraper_job(job.scraper.id, job.id) + await ping_session() handler = self._get_handler(job) job.result = await handler.run(context) job.status = ScraperJobStatus.SUCCEEDED diff --git a/sneakpeek/server.py b/sneakpeek/server.py index c297eb9..edbbdaf 100644 --- a/sneakpeek/server.py +++ b/sneakpeek/server.py @@ -5,7 +5,6 @@ from traceback import format_exc import fastapi_jsonrpc as jsonrpc -import prometheus_client import uvicorn from sneakpeek.api import create_api @@ -17,8 +16,7 @@ from sneakpeek.scraper_handler import ScraperHandler from sneakpeek.worker import Worker, WorkerABC -API_DEFAULT_PORT = 8080 -METRICS_DEFAULT_PORT = 9090 +WEB_SERVER_DEFAULT_PORT = 8080 WORKER_DEFAULT_CONCURRENCY = 50 SCHEDULER_DEFAULT_LEASE_DURATION = timedelta(minutes=1) SCHEDULER_DEFAULT_STORAGE_POLL_DELAY = timedelta(seconds=5) @@ -37,32 +35,28 @@ def __init__( self, worker: WorkerABC | None = None, scheduler: SchedulerABC | None = None, - api: jsonrpc.API | None = None, - api_port: int = API_DEFAULT_PORT, - expose_metrics: bool = True, - metrics_port: int = METRICS_DEFAULT_PORT, + web_server: jsonrpc.API | None = None, + web_server_port: int = WEB_SERVER_DEFAULT_PORT, ) -> None: """ Args: worker (WorkerABC | None, optional): Worker that consumes scraper jobs queue. Defaults to None. scheduler (SchedulerABC | None, optional): Scrapers scheduler. Defaults to None. - api (jsonrpc.API | None, optional): API to interact with the system. Defaults to None. - api_port (int, optional): Port which is used for API and UI. Defaults to 8080. - expose_metrics (bool, optional): Whether to expose metrics (prometheus format). Defaults to True. - metrics_port (int, optional): Port which is used to expose metric. Defaults to 9090. + web_server (jsonrpc.API | None, optional): Web Server that implements API and exposes UI to interact with the system. Defaults to None. + web_server_port (int, optional): Port which is used for Web Server (API, UI and metrics). Defaults to 8080. """ self._logger = logging.getLogger(__name__) self.worker = worker self.scheduler = scheduler self.api_config = ( - uvicorn.Config(api, host="0.0.0.0", port=api_port, log_config=None) - if api + uvicorn.Config( + web_server, host="0.0.0.0", port=web_server_port, log_config=None + ) + if web_server else None ) - self.api_server = uvicorn.Server(self.api_config) if api else None + self.web_server = uvicorn.Server(self.api_config) if web_server else None self.scheduler = scheduler - self.expose_metrics = expose_metrics - self.metrics_port = metrics_port @staticmethod def create( @@ -70,16 +64,14 @@ def create( scrapers_storage: ScrapersStorage, jobs_storage: ScraperJobsStorage, lease_storage: LeaseStorage, - with_api: bool = True, + with_web_server: bool = True, with_worker: bool = True, with_scheduler: bool = True, - expose_metrics: bool = True, worker_max_concurrency: int = WORKER_DEFAULT_CONCURRENCY, - api_port: int = API_DEFAULT_PORT, + web_server_port: int = WEB_SERVER_DEFAULT_PORT, scheduler_storage_poll_delay: timedelta = SCHEDULER_DEFAULT_STORAGE_POLL_DELAY, scheduler_lease_duration: timedelta = SCHEDULER_DEFAULT_LEASE_DURATION, plugins: list[Plugin] | None = None, - metrics_port: int = METRICS_DEFAULT_PORT, ): """ Create Sneakpeek server using default API, worker and scheduler implementations @@ -89,12 +81,11 @@ def create( scrapers_storage (ScrapersStorage): Scrapers storage jobs_storage (ScraperJobsStorage): Jobs storage lease_storage (LeaseStorage): Lease storage - run_api (bool, optional): Whether to run API service. Defaults to True. - run_worker (bool, optional): Whether to run worker service. Defaults to True. - run_scheduler (bool, optional): Whether to run scheduler service. Defaults to True. - expose_metrics (bool, optional): Whether to expose metrics (prometheus format). Defaults to True. + with_web_server (bool, optional): Whether to run API service. Defaults to True. + with_worker (bool, optional): Whether to run worker service. Defaults to True. + with_scheduler (bool, optional): Whether to run scheduler service. Defaults to True. worker_max_concurrency (int, optional): Maximum number of concurrently executed scrapers. Defaults to 50. - api_port (int, optional): Port which is used for API and UI. Defaults to 8080. + web_server_port (int, optional): Port which is used for Web Server (API, UI and metrics). Defaults to 8080. scheduler_storage_poll_delay (timedelta, optional): How much scheduler wait before polling storage for scrapers updates. Defaults to 5 seconds. scheduler_lease_duration (timedelta, optional): How long scheduler lease lasts. Lease is required for scheduler to be able to create new scraper jobs. This is needed so at any point of time there's only one active scheduler instance. Defaults to 1 minute. plugins (list[Plugin] | None, optional): List of plugins that will be used by scraper runner. Can be omitted if run_worker is False. Defaults to None. @@ -121,12 +112,10 @@ def create( ) api = ( create_api(scrapers_storage, jobs_storage, queue, handlers) - if with_api + if with_web_server else None ) - return SneakpeekServer( - worker, scheduler, api, api_port, expose_metrics, metrics_port - ) + return SneakpeekServer(worker, scheduler, api, web_server_port) def serve( self, @@ -146,10 +135,8 @@ def serve( loop.create_task(self.scheduler.start()) if self.worker: loop.create_task(self.worker.start()) - if self.api_server: - loop.create_task(self.api_server.serve()) - if self.expose_metrics: - prometheus_client.start_http_server(self.metrics_port) + if self.web_server: + loop.create_task(self.web_server.serve()) loop.create_task(self._install_signals()) if blocking: loop.run_forever() diff --git a/tests/test_integration.py b/tests/test_integration.py index 5079ecb..4acee4e 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -107,9 +107,8 @@ def server_with_scheduler(storages: Storages) -> SneakpeekServer: scrapers_storage=scrapers_storage, jobs_storage=jobs_storage, lease_storage=lease_storage, - with_api=False, + with_web_server=False, scheduler_storage_poll_delay=timedelta(seconds=1), - expose_metrics=False, ) @@ -121,10 +120,9 @@ def server_with_worker_only(storages: Storages) -> SneakpeekServer: scrapers_storage=scrapers_storage, jobs_storage=jobs_storage, lease_storage=lease_storage, - with_api=False, + with_web_server=False, with_scheduler=False, worker_max_concurrency=1, - expose_metrics=False, ) diff --git a/tests/test_metrics.py b/tests/test_metrics.py new file mode 100644 index 0000000..69b8867 --- /dev/null +++ b/tests/test_metrics.py @@ -0,0 +1,138 @@ +import pytest +from prometheus_client import REGISTRY + +from sneakpeek.metrics import count_invocations, measure_latency + +SUBSYSTEM = "test" + +exception_to_raise = ValueError() +exception_to_raise_name = ValueError.__name__ + + +@count_invocations(SUBSYSTEM) +@measure_latency(SUBSYSTEM) +async def async_test_fn(fail: bool = False): + if fail: + raise exception_to_raise + return 1 + + +@count_invocations(SUBSYSTEM) +@measure_latency(SUBSYSTEM) +def sync_test_fn(fail: bool = False): + if fail: + raise exception_to_raise + return 1 + + +latency_labels_sync = { + "subsystem": SUBSYSTEM, + "method": sync_test_fn.__name__, +} +latency_labels_async = { + "subsystem": SUBSYSTEM, + "method": async_test_fn.__name__, +} + + +def invocation_labels_sync(type: str, error: str = ""): + return { + "subsystem": SUBSYSTEM, + "method": sync_test_fn.__name__, + "type": type, + "error": error, + } + + +def invocation_labels_async(type: str, error: str = ""): + return { + "subsystem": SUBSYSTEM, + "method": async_test_fn.__name__, + "type": type, + "error": error, + } + + +@pytest.mark.asyncio +async def test_measure_latency_async(): + before = REGISTRY.get_sample_value("sneakpeek_latency_count", latency_labels_async) + await async_test_fn() + after = REGISTRY.get_sample_value("sneakpeek_latency_count", latency_labels_async) + assert after - (before or 0) == 1 + + +@pytest.mark.asyncio +async def test_measure_latency_sync(): + before = REGISTRY.get_sample_value("sneakpeek_latency_count", latency_labels_sync) + sync_test_fn() + after = REGISTRY.get_sample_value("sneakpeek_latency_count", latency_labels_sync) + assert after - (before or 0) == 1 + + +@pytest.mark.asyncio +async def test_count_invocations_async(): + before_total = REGISTRY.get_sample_value( + "sneakpeek_invocations_total", + invocation_labels_async("total"), + ) + before_success = REGISTRY.get_sample_value( + "sneakpeek_invocations_total", + invocation_labels_async("success"), + ) + before_error = REGISTRY.get_sample_value( + "sneakpeek_invocations_total", + invocation_labels_async("error", exception_to_raise_name), + ) + await async_test_fn(fail=False) + with pytest.raises(type(exception_to_raise)): + await async_test_fn(fail=True) + + after_total = REGISTRY.get_sample_value( + "sneakpeek_invocations_total", + invocation_labels_async("total"), + ) + after_success = REGISTRY.get_sample_value( + "sneakpeek_invocations_total", + invocation_labels_async("success"), + ) + after_error = REGISTRY.get_sample_value( + "sneakpeek_invocations_total", + invocation_labels_async("error", exception_to_raise_name), + ) + assert after_total - (before_total or 0) == 2 + assert after_success - (before_success or 0) == 1 + assert after_error - (before_error or 0) == 1 + + +def test_count_invocations_sync(): + before_total = REGISTRY.get_sample_value( + "sneakpeek_invocations_total", + invocation_labels_sync("total"), + ) + before_success = REGISTRY.get_sample_value( + "sneakpeek_invocations_total", + invocation_labels_sync("success"), + ) + before_error = REGISTRY.get_sample_value( + "sneakpeek_invocations_total", + invocation_labels_sync("error", exception_to_raise_name), + ) + sync_test_fn(fail=False) + with pytest.raises(type(exception_to_raise)): + sync_test_fn(fail=True) + + after_total = REGISTRY.get_sample_value( + "sneakpeek_invocations_total", + invocation_labels_sync("total"), + ) + after_success = REGISTRY.get_sample_value( + "sneakpeek_invocations_total", + invocation_labels_sync("success"), + ) + after_error = REGISTRY.get_sample_value( + "sneakpeek_invocations_total", + invocation_labels_sync("error", exception_to_raise_name), + ) + assert after_total - (before_total or 0) == 2 + assert after_success - (before_success or 0) == 1 + assert after_error - (before_error or 0) == 1 diff --git a/tests/test_runner.py b/tests/test_runner.py new file mode 100644 index 0000000..3a165d2 --- /dev/null +++ b/tests/test_runner.py @@ -0,0 +1,198 @@ +from datetime import datetime +from unittest.mock import AsyncMock + +import pytest + +from sneakpeek.lib.errors import ScraperJobPingFinishedError +from sneakpeek.lib.models import ( + Scraper, + ScraperJob, + ScraperJobPriority, + ScraperJobStatus, + ScraperSchedule, +) +from sneakpeek.lib.queue import QueueABC +from sneakpeek.lib.storage.base import ScraperJobsStorage +from sneakpeek.runner import LocalRunner, Runner, RunnerABC +from sneakpeek.scraper_config import ScraperConfig +from sneakpeek.scraper_context import ScraperContext +from sneakpeek.scraper_handler import ScraperHandler + +FAILURE_TEXT = "failure" +RESULT = "result" + +EXISTING_SCRAPER_HANDLER = "ExistingScraperHandler" +NON_EXISTING_SCRAPER_HANDLER = "NonExistingScraperHandler" + + +class TestScraper(ScraperHandler): + def __init__(self, succees_func, failure_func) -> None: + self.success_func = succees_func + self.failure_func = failure_func + + @property + def name(self) -> str: + return EXISTING_SCRAPER_HANDLER + + async def run(self, context: ScraperContext) -> str: + await context.update_scraper_state("some state") + await context.ping_session() + if context.params["fail"]: + return await self.failure_func() + return await self.success_func() + + +@pytest.fixture +def scraper_handler_succeeding_impl(): + yield AsyncMock(return_value=RESULT) + + +@pytest.fixture +def scraper_handler_failing_impl(): + yield AsyncMock(side_effect=Exception(FAILURE_TEXT)) + + +@pytest.fixture +def scraper_handler( + scraper_handler_succeeding_impl, + scraper_handler_failing_impl, +) -> ScraperHandler: + yield TestScraper(scraper_handler_succeeding_impl, scraper_handler_failing_impl) + + +@pytest.fixture +def queue(): + yield AsyncMock() + + +@pytest.fixture +def storage(): + yield AsyncMock() + + +@pytest.fixture +def runner( + scraper_handler: ScraperHandler, queue: QueueABC, storage: ScraperJobsStorage +) -> RunnerABC: + yield Runner( + handlers=[scraper_handler], + queue=queue, + storage=storage, + ) + + +@pytest.fixture +def local_runner() -> LocalRunner: + yield LocalRunner() + + +def get_scraper_job( + *, + fail: bool, + existing: bool, + status: ScraperJobStatus = ScraperJobStatus.STARTED, +) -> ScraperJob: + return ScraperJob( + id=100, + scraper=Scraper( + id=1, + name="test_scraper", + schedule=ScraperSchedule.INACTIVE, + handler=( + EXISTING_SCRAPER_HANDLER if existing else NON_EXISTING_SCRAPER_HANDLER + ), + config=ScraperConfig(params={"fail": fail}), + ), + status=status, + priority=ScraperJobPriority.NORMAL, + created_at=datetime.now(), + ) + + +@pytest.mark.asyncio +async def test_runner_run_job_success( + scraper_handler_succeeding_impl: AsyncMock, + scraper_handler_failing_impl: AsyncMock, + runner: RunnerABC, + queue: QueueABC, + storage: ScraperJobsStorage, +) -> None: + job = get_scraper_job(fail=False, existing=True) + await runner.run(job) + scraper_handler_succeeding_impl.assert_awaited_once() + scraper_handler_failing_impl.assert_not_awaited() + queue.ping_scraper_job.assert_awaited() + assert job.status == ScraperJobStatus.SUCCEEDED + assert job.result == RESULT + storage.update_scraper_job.assert_awaited_once_with(job) + + +@pytest.mark.asyncio +async def test_runner_run_job_failure( + scraper_handler_succeeding_impl: AsyncMock, + scraper_handler_failing_impl: AsyncMock, + runner: RunnerABC, + queue: QueueABC, + storage: ScraperJobsStorage, +) -> None: + job = get_scraper_job(fail=True, existing=True) + await runner.run(job) + scraper_handler_succeeding_impl.assert_not_awaited() + scraper_handler_failing_impl.assert_awaited_once() + queue.ping_scraper_job.assert_awaited() + assert job.status == ScraperJobStatus.FAILED + assert job.result == FAILURE_TEXT + storage.update_scraper_job.assert_awaited_once_with(job) + + +@pytest.mark.asyncio +async def test_runner_run_job_non_existent( + runner: RunnerABC, + queue: QueueABC, + storage: ScraperJobsStorage, +) -> None: + job = get_scraper_job(fail=False, existing=False) + await runner.run(job) + queue.ping_scraper_job.assert_awaited() + assert job.status == ScraperJobStatus.FAILED + assert "NonExistingScraperHandler" in job.result + storage.update_scraper_job.assert_awaited_once_with(job) + + +@pytest.mark.asyncio +async def test_runner_ping_killed( + runner: RunnerABC, + queue: QueueABC, + storage: ScraperJobsStorage, +) -> None: + job = get_scraper_job(fail=False, existing=False, status=ScraperJobStatus.KILLED) + queue.ping_scraper_job.side_effect = ScraperJobPingFinishedError() + await runner.run(job) + queue.ping_scraper_job.assert_awaited() + # job status is not overriden + assert job.status == ScraperJobStatus.KILLED + storage.update_scraper_job.assert_awaited_once_with(job) + + +def test_local_runner_job_succeeds( + scraper_handler_succeeding_impl: AsyncMock, + scraper_handler_failing_impl: AsyncMock, + scraper_handler: ScraperHandler, + local_runner: LocalRunner, +): + config = ScraperConfig(params={"fail": False}) + local_runner.run(scraper_handler, config) + scraper_handler_succeeding_impl.assert_awaited_once() + scraper_handler_failing_impl.assert_not_awaited() + + +def test_local_runner_job_fails( + scraper_handler_succeeding_impl: AsyncMock, + scraper_handler_failing_impl: AsyncMock, + scraper_handler: ScraperHandler, + local_runner: LocalRunner, +): + config = ScraperConfig(params={"fail": True}) + local_runner.run(scraper_handler, config) + scraper_handler_succeeding_impl.assert_not_awaited() + scraper_handler_failing_impl.assert_awaited_once() diff --git a/tests/test_scraper_context.py b/tests/test_scraper_context.py index 23e89e7..2752dc5 100644 --- a/tests/test_scraper_context.py +++ b/tests/test_scraper_context.py @@ -192,7 +192,7 @@ async def test_download_file_with_no_file_path_specified(): @pytest.mark.asyncio async def test_download_file_with_file_path_specified(): with aioresponses() as response: - file_path = "tmp_test_file_path" + file_path = test_download_file_with_file_path_specified.__name__ try: url = "test_url" body = "test body" @@ -218,7 +218,7 @@ async def test_download_file_with_file_path_specified(): @pytest.mark.asyncio async def test_download_file_with_process_fn(): with aioresponses() as response: - file_path = "tmp_test_file_path" + file_path = test_download_file_with_process_fn.__name__ try: url = "test_url" body = "test body" @@ -249,9 +249,19 @@ async def test_download_file_with_process_fn(): @pytest.mark.asyncio async def test_download_multiple_files(): with aioresponses() as response: - urls = ["url1", "url2", "url3"] - file_paths = ["file1", "file2", "file3"] - responses = ["body1", "body2", "body3"] + concurrent_responses = 3 + urls = [ + f"{test_download_multiple_files.__name__}_url_{i}" + for i in range(concurrent_responses) + ] + file_paths = [ + f"{test_download_multiple_files.__name__}_file_{i}" + for i in range(concurrent_responses) + ] + responses = [ + f"{test_download_multiple_files.__name__}_resp_{i}" + for i in range(concurrent_responses) + ] for url, resp in zip(urls, responses): response.get(url, status=200, body=resp) @@ -285,10 +295,23 @@ async def test_download_multiple_files(): @pytest.mark.asyncio async def test_download_multiple_files_with_process_fn(): with aioresponses() as response: - urls = ["url1", "url2", "url3"] - file_paths = ["file1", "file2", "file3"] - responses = ["body1", "body2", "body3"] - results = ["result1", "result2", "result3"] + concurrent_responses = 3 + urls = [ + f"{test_download_multiple_files_with_process_fn.__name__}_url_{i}" + for i in range(concurrent_responses) + ] + file_paths = [ + f"{test_download_multiple_files_with_process_fn.__name__}_file_{i}" + for i in range(concurrent_responses) + ] + responses = [ + f"{test_download_multiple_files_with_process_fn.__name__}_resp_{i}" + for i in range(concurrent_responses) + ] + results = [ + f"{test_download_multiple_files_with_process_fn.__name__}_results_{i}" + for i in range(concurrent_responses) + ] process_fn = AsyncMock(side_effect=results) for url, resp in zip(urls, responses):