feat: add ingredient analysis endpoint

openfoodfacts · Nov 6, 2024 · cc8f98e · cc8f98e
1 parent c9508d7
commit cc8f98e
Show file tree

Hide file tree

Showing 3 changed files with 209 additions and 3 deletions.
diff --git a/docs/usage.md b/docs/usage.md
@@ -30,20 +30,20 @@ All parameters are optional with the exception of user_agent, but here is a desc
 - `version`: API version (v2 is the default)
 - `environment`: either `org` for production environment (openfoodfacts.org) or `net` for staging (openfoodfacts.net)
 
-*Get information about a product*
+### Get information about a product
 
 ```python
 code = "3017620422003"
 api.product.get(code)
 ```
 
-*Perform text search*
+### Perform text search
 
 ```python
 results = api.product.text_search("pizza")
 ```
 
-*Create a new product or update an existing one*
+### Create a new product or update an existing one
 
 ```python
 results = api.product.update(body)
@@ -54,6 +54,52 @@ the key "code" and its value, corresponding to the product that we
 want to update. Example:
 ```body = {'code': '3850334341389', 'product_name': 'Mlinci'}```
 
+### Perform ingredient analysis
+
+You can perform the ingredient analysis of a text in a given language using the API. Please note that ingredient analysis is costly, so prefer using the preprod server for this operation.
+
+```python
+from openfoodfacts import API, APIVersion, Environment
+
+api = API(user_agent="<application name>",
+          version=APIVersion.v3,
+          environment=Environment.net)
+
+results = api.product.parse_ingredients("water, sugar, salt", lang="en")
+
+print(results)
+
+## [{'ciqual_food_code': '18066',
+#  'ecobalyse_code': 'tap-water',
+#  'id': 'en:water',
+#  'is_in_taxonomy': 1,
+#  'percent_estimate': 66.6666666666667,
+#  'percent_max': 100,
+#  'percent_min': 33.3333333333333,
+#  'text': 'water',
+#  'vegan': 'yes',
+#  'vegetarian': 'yes'},
+# {'ciqual_proxy_food_code': '31016',
+#  'ecobalyse_code': 'sugar',
+#  'id': 'en:sugar',
+#  'is_in_taxonomy': 1,
+#  'percent_estimate': 16.6666666666667,
+#  'percent_max': 50,
+#  'percent_min': 0,
+#  'text': 'sugar',
+#  'vegan': 'yes',
+#  'vegetarian': 'yes'},
+# {'ciqual_food_code': '11058',
+#  'id': 'en:salt',
+#  'is_in_taxonomy': 1,
+#  'percent_estimate': 16.6666666666667,
+#  'percent_max': 33.3333333333333,
+#  'percent_min': 0,
+#  'text': 'salt',
+#  'vegan': 'yes',
+#  'vegetarian': 'yes'}]
+```
+
 ## Using the dataset
 
 If you're planning to perform data analysis on Open Food Facts, the easiest way is to download and use the Open Food Facts dataset dump. Fortunately it can be done really easily using the SDK:

diff --git a/openfoodfacts/api.py b/openfoodfacts/api.py
@@ -1,10 +1,13 @@
+import logging
 from typing import Any, Dict, List, Optional, Tuple, Union, cast
 
 import requests
 
 from .types import APIConfig, APIVersion, Country, Environment, Facet, Flavor, JSONType
 from .utils import URLBuilder, http_session
 
+logger = logging.getLogger(__name__)
+
 
 def get_http_auth(environment: Environment) -> Optional[Tuple[str, str]]:
     return ("off", "off") if environment is Environment.net else None
@@ -311,6 +314,88 @@ def select_image(
         r.raise_for_status()
         return r
 
+    def parse_ingredients(
+        self, text: str, lang: str, timeout: int = 10
+    ) -> list[JSONType]:
+        """Parse ingredients text using Product Opener API.
+
+        It is only available for `off` flavor (food).
+
+        The result is a list of ingredients, each ingredient is a dict with the
+        following keys:
+
+        - id: the ingredient ID. Having an ID does not means that the
+            ingredient is recognized, you must check if it exists in the
+            taxonomy.
+        - text: the ingredient text (as it appears in the input ingredients
+            list)
+        - percent_min: the minimum percentage of the ingredient in the product
+        - percent_max: the maximum percentage of the ingredient in the product
+        - percent_estimate: the estimated percentage of the ingredient in the
+            product
+        - vegan (bool): optional key indicating if the ingredient is vegan
+        - vegetarian (bool): optional key indicating if the ingredient is
+            vegetarian
+
+        :param server_type: the server type (project) to use
+        :param text: the ingredients text to parse
+        :param lang: the language of the text (used for parsing) as a 2-letter
+            code
+        :param timeout: the request timeout in seconds, defaults to 10s
+        :raises RuntimeError: a RuntimeError is raised if the parsing fails
+        :return: the list of parsed ingredients
+        """
+        if self.api_config.flavor != Flavor.off:
+            raise ValueError("ingredient parsing is only available for food")
+
+        if self.api_config.version != APIVersion.v3:
+            logger.warning(
+                "ingredient parsing is only available in v3 of the API (here: %s), using v3",
+                self.api_config.version,
+            )
+        # by using "test" as code, we don't save any information to database
+        # This endpoint is specifically designed for testing purposes
+        url = f"{self.base_url}/api/v3/product/test"
+
+        if len(text) == 0:
+            raise ValueError("text must be a non-empty string")
+
+        try:
+            r = http_session.patch(
+                url,
+                auth=get_http_auth(self.api_config.environment),
+                json={
+                    "fields": "ingredients",
+                    "lc": lang,
+                    "tags_lc": lang,
+                    "product": {
+                        "lang": lang,
+                        f"ingredients_text_{lang}": text,
+                    },
+                },
+                timeout=timeout,
+            )
+        except (
+            requests.exceptions.ConnectionError,
+            requests.exceptions.SSLError,
+            requests.exceptions.Timeout,
+        ) as e:
+            raise RuntimeError(
+                f"Unable to parse ingredients: error during HTTP request: {e}"
+            )
+
+        if not r.ok:
+            raise RuntimeError(
+                f"Unable to parse ingredients (non-200 status code): {r.status_code}, {r.text}"
+            )
+
+        response_data = r.json()
+
+        if response_data.get("status") != "success":
+            raise RuntimeError(f"Unable to parse ingredients: {response_data}")
+
+        return response_data["product"].get("ingredients", [])
+
 
 class API:
     def __init__(

diff --git a/tests/test_api.py b/tests/test_api.py
@@ -1,4 +1,5 @@
 import json
+import re
 import unittest
 
 import pytest
@@ -105,6 +106,80 @@ def test_text_search(self):
             )
             self.assertEqual(res["products"], ["banania", "banania big"])
 
+    def test_parse_ingredients(self):
+        api = openfoodfacts.API(user_agent=TEST_USER_AGENT, version="v2")
+        ingredients_data = [
+            {
+                "ciqual_food_code": "18066",
+                "ecobalyse_code": "tap-water",
+                "id": "en:water",
+                "is_in_taxonomy": 1,
+                "percent_estimate": 75,
+                "percent_max": 100,
+                "percent_min": 50,
+                "text": "eau",
+                "vegan": "yes",
+                "vegetarian": "yes",
+            },
+            {
+                "ciqual_proxy_food_code": "31016",
+                "ecobalyse_code": "sugar",
+                "id": "en:sugar",
+                "is_in_taxonomy": 1,
+                "percent_estimate": 25,
+                "percent_max": 50,
+                "percent_min": 0,
+                "text": "sucre",
+                "vegan": "yes",
+                "vegetarian": "yes",
+            },
+        ]
+        with requests_mock.mock() as mock:
+            response_data = {
+                "product": {"ingredients": ingredients_data},
+                "status": "success",
+            }
+            mock.patch(
+                "https://world.openfoodfacts.org/api/v3/product/test",
+                text=json.dumps(response_data),
+            )
+            res = api.product.parse_ingredients("eau, sucre", lang="fr")
+            assert res == ingredients_data
+
+    def test_parse_ingredients_fail(self):
+        api = openfoodfacts.API(user_agent=TEST_USER_AGENT, version="v2")
+        with requests_mock.mock() as mock:
+            response_data = {
+                "status": "fail",
+            }
+            mock.patch(
+                "https://world.openfoodfacts.org/api/v3/product/test",
+                text=json.dumps(response_data),
+            )
+
+            with pytest.raises(
+                RuntimeError,
+                match="Unable to parse ingredients: {'status': 'fail'}",
+            ):
+                api.product.parse_ingredients("eau, sucre", lang="fr")
+
+    def test_parse_ingredients_fail_non_HTTP_200(self):
+        api = openfoodfacts.API(user_agent=TEST_USER_AGENT, version="v2")
+        with requests_mock.mock() as mock:
+            mock.patch(
+                "https://world.openfoodfacts.org/api/v3/product/test",
+                status_code=400,
+                text='{"error": "Bad Request"}',
+            )
+
+            with pytest.raises(
+                RuntimeError,
+                match=re.escape(
+                    'Unable to parse ingredients (non-200 status code): 400, {"error": "Bad Request"}'
+                ),
+            ):
+                api.product.parse_ingredients("eau, sucre", lang="fr")
+
 
 if __name__ == "__main__":
     unittest.main()