diff --git a/README.md b/README.md index 946c533..77cfd89 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,7 @@ A reverse-engineered asynchronous python wrapper for [Google Gemini](https://gem ## Features - **ImageFx Support** - Supports retrieving images generated by ImageFx, Google's latest AI image generator. +- **Extension Support** - Supports generating contents with [Gemini extensions](https://gemini.google.com/extensions) on, like YouTube and Gmail. - **Classified Outputs** - Auto categorizes texts, web images and AI generated images from the response. - **Official Flavor** - Provides a simple and elegant interface inspired by [Google Generative AI](https://ai.google.dev/tutorials/python_quickstart)'s official API. - **Asynchronous** - Utilizes `asyncio` to run generating tasks and return outputs efficiently. @@ -125,6 +126,20 @@ async def main(): asyncio.run(main()) ``` +### Generate contents with Gemini extensions + +**Important**: To access Gemini extensions in API, you must activate them on the [Gemini website](https://gemini.google.com/extensions) first. Same as image generation, Google also has limitations on the availability of Gemini extensions. Here's a summary copied from [official documentation](https://support.google.com/gemini/answer/13695044) (as of February 18th, 2024): + +>To use extensions in Gemini Apps: +> +>Sign in with your personal Google Account that you manage on your own. Extensions, including the Google Workspace extension, are currently not available to Google Workspace accounts for school, business, or other organizations. +> +>Have Gemini Apps Activity on. Extensions are only available when Gemini Apps Activity is turned on. +> +>Important: For now, extensions are available in **English, Japanese, and Korean** only. + +Note: for the last item, instead of region requirement, it actually only requires your Google account's **preferred language** to be set to one of the supported languages. You can change your language settings [here](https://myaccount.google.com/language). + ### Check and switch to other answer candidates A response from Gemini usually contains multiple reply candidates with different generated contents. You can check all candidates and choose one to continue the conversation. By default, the first candidate will be chosen automatically. diff --git a/src/gemini/client.py b/src/gemini/client.py index 07362e1..e91140b 100644 --- a/src/gemini/client.py +++ b/src/gemini/client.py @@ -202,52 +202,65 @@ async def generate_content( ) else: try: - body = json.loads(json.loads(response.text.split("\n")[2])[0][2]) - except (TypeError, json.JSONDecodeError): + body = json.loads(json.loads(response.text.split("\n")[2])[0][2]) # Plain request + + if not body[4]: + body = json.loads(json.loads(response.text.split("\n")[2])[4][2]) # Request with extensions as middleware + + if not body[4]: + raise APIError( + "Failed to parse response body. Data structure is invalid. To report this error, please submit an issue at https://github.com/HanaokaYuzu/Gemini-API/issues" + ) + except Exception: await self.close(0) raise APIError( "Failed to generate contents. Invalid response data received. Client will try to re-initiate on next request." ) - candidates = [] - for candidate in body[4]: - web_images = ( - candidate[4] - and [ - WebImage(url=image[0][0][0], title=image[2], alt=image[0][4]) - for image in candidate[4] - ] - or [] - ) - generated_images = ( - candidate[12] - and candidate[12][7] - and candidate[12][7][0] - and [ - GeneratedImage( - url=image[0][3][3], - title=f"[Generated Image {image[3][6]}]", - alt=image[3][5][i], - cookies=self.cookies, + try: + candidates = [] + for candidate in body[4]: + web_images = ( + candidate[4] + and [ + WebImage(url=image[0][0][0], title=image[2], alt=image[0][4]) + for image in candidate[4] + ] + or [] + ) + generated_images = ( + candidate[12] + and candidate[12][7] + and candidate[12][7][0] + and [ + GeneratedImage( + url=image[0][3][3], + title=f"[Generated Image {image[3][6]}]", + alt=image[3][5][i], + cookies=self.cookies, + ) + for i, image in enumerate(candidate[12][7][0]) + ] + or [] + ) + candidates.append( + Candidate( + rcid=candidate[0], + text=candidate[1][0], + web_images=web_images, + generated_images=generated_images, ) - for i, image in enumerate(candidate[12][7][0]) - ] - or [] - ) - candidates.append( - Candidate( - rcid=candidate[0], - text=candidate[1][0], - web_images=web_images, - generated_images=generated_images, ) - ) - if not candidates: - raise GeminiError( - "Failed to generate contents. No output data found in response." - ) + if not candidates: + raise GeminiError( + "Failed to generate contents. No output data found in response." + ) - output = ModelOutput(metadata=body[1], candidates=candidates) + output = ModelOutput(metadata=body[1], candidates=candidates) + except IndexError: + raise APIError( + "Failed to parse response body. Data structure is invalid. To report this error, please submit an issue at https://github.com/HanaokaYuzu/Gemini-API/issues" + ) if isinstance(chat, ChatSession): chat.last_output = output diff --git a/tests/test_client_features.py b/tests/test_client_features.py index 0c870cc..158a856 100644 --- a/tests/test_client_features.py +++ b/tests/test_client_features.py @@ -48,6 +48,20 @@ async def test_ai_image_generation(self): self.assertTrue(image.url) logger.debug(image) + async def test_extension_google_workspace(self): + response = await self.geminiclient.generate_content( + "@Gmail What's the latest message in my mailbox?" + ) + self.assertTrue(response.text) + logger.debug(response) + + async def test_extension_youtube(self): + response = await self.geminiclient.generate_content( + "@Youtube What's the lastest activity of Taylor Swift?" + ) + self.assertTrue(response.text) + logger.debug(response) + async def test_reply_candidates(self): chat = self.geminiclient.start_chat() response = await chat.send_message(