From cc8f98e85e78b11586e06eb41958fc0a2c091f18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Bournhonesque?= Date: Wed, 6 Nov 2024 11:50:09 +0100 Subject: [PATCH] feat: add ingredient analysis endpoint --- docs/usage.md | 52 +++++++++++++++++++++++++-- openfoodfacts/api.py | 85 ++++++++++++++++++++++++++++++++++++++++++++ tests/test_api.py | 75 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 209 insertions(+), 3 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 0dcba6f..6b5c10b 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -30,20 +30,20 @@ All parameters are optional with the exception of user_agent, but here is a desc - `version`: API version (v2 is the default) - `environment`: either `org` for production environment (openfoodfacts.org) or `net` for staging (openfoodfacts.net) -*Get information about a product* +### Get information about a product ```python code = "3017620422003" api.product.get(code) ``` -*Perform text search* +### Perform text search ```python results = api.product.text_search("pizza") ``` -*Create a new product or update an existing one* +### Create a new product or update an existing one ```python results = api.product.update(body) @@ -54,6 +54,52 @@ the key "code" and its value, corresponding to the product that we want to update. Example: ```body = {'code': '3850334341389', 'product_name': 'Mlinci'}``` +### Perform ingredient analysis + +You can perform the ingredient analysis of a text in a given language using the API. Please note that ingredient analysis is costly, so prefer using the preprod server for this operation. + +```python +from openfoodfacts import API, APIVersion, Environment + +api = API(user_agent="", + version=APIVersion.v3, + environment=Environment.net) + +results = api.product.parse_ingredients("water, sugar, salt", lang="en") + +print(results) + +## [{'ciqual_food_code': '18066', +# 'ecobalyse_code': 'tap-water', +# 'id': 'en:water', +# 'is_in_taxonomy': 1, +# 'percent_estimate': 66.6666666666667, +# 'percent_max': 100, +# 'percent_min': 33.3333333333333, +# 'text': 'water', +# 'vegan': 'yes', +# 'vegetarian': 'yes'}, +# {'ciqual_proxy_food_code': '31016', +# 'ecobalyse_code': 'sugar', +# 'id': 'en:sugar', +# 'is_in_taxonomy': 1, +# 'percent_estimate': 16.6666666666667, +# 'percent_max': 50, +# 'percent_min': 0, +# 'text': 'sugar', +# 'vegan': 'yes', +# 'vegetarian': 'yes'}, +# {'ciqual_food_code': '11058', +# 'id': 'en:salt', +# 'is_in_taxonomy': 1, +# 'percent_estimate': 16.6666666666667, +# 'percent_max': 33.3333333333333, +# 'percent_min': 0, +# 'text': 'salt', +# 'vegan': 'yes', +# 'vegetarian': 'yes'}] +``` + ## Using the dataset If you're planning to perform data analysis on Open Food Facts, the easiest way is to download and use the Open Food Facts dataset dump. Fortunately it can be done really easily using the SDK: diff --git a/openfoodfacts/api.py b/openfoodfacts/api.py index 58160ad..20665a2 100644 --- a/openfoodfacts/api.py +++ b/openfoodfacts/api.py @@ -1,3 +1,4 @@ +import logging from typing import Any, Dict, List, Optional, Tuple, Union, cast import requests @@ -5,6 +6,8 @@ from .types import APIConfig, APIVersion, Country, Environment, Facet, Flavor, JSONType from .utils import URLBuilder, http_session +logger = logging.getLogger(__name__) + def get_http_auth(environment: Environment) -> Optional[Tuple[str, str]]: return ("off", "off") if environment is Environment.net else None @@ -311,6 +314,88 @@ def select_image( r.raise_for_status() return r + def parse_ingredients( + self, text: str, lang: str, timeout: int = 10 + ) -> list[JSONType]: + """Parse ingredients text using Product Opener API. + + It is only available for `off` flavor (food). + + The result is a list of ingredients, each ingredient is a dict with the + following keys: + + - id: the ingredient ID. Having an ID does not means that the + ingredient is recognized, you must check if it exists in the + taxonomy. + - text: the ingredient text (as it appears in the input ingredients + list) + - percent_min: the minimum percentage of the ingredient in the product + - percent_max: the maximum percentage of the ingredient in the product + - percent_estimate: the estimated percentage of the ingredient in the + product + - vegan (bool): optional key indicating if the ingredient is vegan + - vegetarian (bool): optional key indicating if the ingredient is + vegetarian + + :param server_type: the server type (project) to use + :param text: the ingredients text to parse + :param lang: the language of the text (used for parsing) as a 2-letter + code + :param timeout: the request timeout in seconds, defaults to 10s + :raises RuntimeError: a RuntimeError is raised if the parsing fails + :return: the list of parsed ingredients + """ + if self.api_config.flavor != Flavor.off: + raise ValueError("ingredient parsing is only available for food") + + if self.api_config.version != APIVersion.v3: + logger.warning( + "ingredient parsing is only available in v3 of the API (here: %s), using v3", + self.api_config.version, + ) + # by using "test" as code, we don't save any information to database + # This endpoint is specifically designed for testing purposes + url = f"{self.base_url}/api/v3/product/test" + + if len(text) == 0: + raise ValueError("text must be a non-empty string") + + try: + r = http_session.patch( + url, + auth=get_http_auth(self.api_config.environment), + json={ + "fields": "ingredients", + "lc": lang, + "tags_lc": lang, + "product": { + "lang": lang, + f"ingredients_text_{lang}": text, + }, + }, + timeout=timeout, + ) + except ( + requests.exceptions.ConnectionError, + requests.exceptions.SSLError, + requests.exceptions.Timeout, + ) as e: + raise RuntimeError( + f"Unable to parse ingredients: error during HTTP request: {e}" + ) + + if not r.ok: + raise RuntimeError( + f"Unable to parse ingredients (non-200 status code): {r.status_code}, {r.text}" + ) + + response_data = r.json() + + if response_data.get("status") != "success": + raise RuntimeError(f"Unable to parse ingredients: {response_data}") + + return response_data["product"].get("ingredients", []) + class API: def __init__( diff --git a/tests/test_api.py b/tests/test_api.py index d0d7698..c1235bc 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1,4 +1,5 @@ import json +import re import unittest import pytest @@ -105,6 +106,80 @@ def test_text_search(self): ) self.assertEqual(res["products"], ["banania", "banania big"]) + def test_parse_ingredients(self): + api = openfoodfacts.API(user_agent=TEST_USER_AGENT, version="v2") + ingredients_data = [ + { + "ciqual_food_code": "18066", + "ecobalyse_code": "tap-water", + "id": "en:water", + "is_in_taxonomy": 1, + "percent_estimate": 75, + "percent_max": 100, + "percent_min": 50, + "text": "eau", + "vegan": "yes", + "vegetarian": "yes", + }, + { + "ciqual_proxy_food_code": "31016", + "ecobalyse_code": "sugar", + "id": "en:sugar", + "is_in_taxonomy": 1, + "percent_estimate": 25, + "percent_max": 50, + "percent_min": 0, + "text": "sucre", + "vegan": "yes", + "vegetarian": "yes", + }, + ] + with requests_mock.mock() as mock: + response_data = { + "product": {"ingredients": ingredients_data}, + "status": "success", + } + mock.patch( + "https://world.openfoodfacts.org/api/v3/product/test", + text=json.dumps(response_data), + ) + res = api.product.parse_ingredients("eau, sucre", lang="fr") + assert res == ingredients_data + + def test_parse_ingredients_fail(self): + api = openfoodfacts.API(user_agent=TEST_USER_AGENT, version="v2") + with requests_mock.mock() as mock: + response_data = { + "status": "fail", + } + mock.patch( + "https://world.openfoodfacts.org/api/v3/product/test", + text=json.dumps(response_data), + ) + + with pytest.raises( + RuntimeError, + match="Unable to parse ingredients: {'status': 'fail'}", + ): + api.product.parse_ingredients("eau, sucre", lang="fr") + + def test_parse_ingredients_fail_non_HTTP_200(self): + api = openfoodfacts.API(user_agent=TEST_USER_AGENT, version="v2") + with requests_mock.mock() as mock: + mock.patch( + "https://world.openfoodfacts.org/api/v3/product/test", + status_code=400, + text='{"error": "Bad Request"}', + ) + + with pytest.raises( + RuntimeError, + match=re.escape( + 'Unable to parse ingredients (non-200 status code): 400, {"error": "Bad Request"}' + ), + ): + api.product.parse_ingredients("eau, sucre", lang="fr") + if __name__ == "__main__": unittest.main()