Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: add text/csv handling to the python sdk [RUDOLPH-90] #194

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions .speakeasy/gen.lock
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
lockVersion: 2.0.0
id: 8b5fa338-9106-4734-abf0-e30d67044a90
management:
docChecksum: 21f469b38bb72725739ee9d9d0fc8780
docVersion: 1.0.51
speakeasyVersion: 1.418.1
generationVersion: 2.438.3
releaseVersion: 0.26.1
configChecksum: 55ded3ef4f1b052725cdab6587da0ea4
docChecksum: 3ffa7dee90c40fda6656210850e12475
docVersion: 1.0.52
speakeasyVersion: 1.421.0
generationVersion: 2.438.15
releaseVersion: 0.27.1
configChecksum: f885d47394f7c0242b6fbfc5f0244886
repoURL: https://github.com/Unstructured-IO/unstructured-python-client.git
repoSubDirectory: .
installationURL: https://github.com/Unstructured-IO/unstructured-python-client.git
published: true
features:
python:
acceptHeaders: 3.0.0
additionalDependencies: 1.0.0
constsAndDefaults: 1.0.4
core: 5.6.0
Expand Down Expand Up @@ -106,6 +107,7 @@ examples:
responses:
"200":
application/json: [{"type": "Title", "element_id": "6aa0ff22f91bbe7e26e8e25ca8052acd", "text": "LayoutParser: A Unified Toolkit for Deep Learning Based Document Image Analysis", "metadata": {"languages": ["eng"], "page_number": 1, "filename": "layout-parser-paper.pdf", "filetype": "application/pdf"}}]
text/csv: "0x42E2Ffaf1A"
"422":
application/json: {"detail": []}
5XX:
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

### Fixes
* Use the configured server_url for our split page "dummy" request
* Handle `text/csv` output format and return accordingly when passing the argument

## 0.26.0

Expand Down
9 changes: 9 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,15 @@ client-generate-local:
speakeasy overlay apply -s ./openapi.json -o ./overlay_client.yaml > ./openapi_client.json
speakeasy generate sdk -s ./openapi_client.json -o ./ -l python

## client-generate-localhost: Generate the SDK using the openapi.json from the unstructured-api running at localhost:5000
.PHONY: client-generate-localhost
client-generate-localhost:
curl -o openapi.json http://localhost:5000/general/openapi.json || { echo "Failed to download openapi.json"; exit 1; }
speakeasy overlay validate -o ./overlay_client.yaml
speakeasy overlay apply -s ./openapi.json -o ./overlay_client.yaml > ./openapi_client.json
python3 -c 'import sys, yaml, json; sys.stdout.write(json.dumps(yaml.safe_load(sys.stdin), indent=2))' < ./openapi_client.json > temp.json && mv temp.json ./openapi_client.json
speakeasy generate sdk -s ./openapi_client.json -o ./ -l python

.PHONY: publish
publish:
./scripts/publish.sh
Expand Down
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ res = s.general.partition(request={
},
RetryConfig("backoff", BackoffStrategy(1, 50, 1.1, 100), False))

if res.elements is not None:
if res.two_hundred_application_json_elements is not None:
# handle response
pass

Expand Down Expand Up @@ -126,7 +126,7 @@ res = s.general.partition(request={
},
})

if res.elements is not None:
if res.two_hundred_application_json_elements is not None:
# handle response
pass

Expand Down Expand Up @@ -181,7 +181,7 @@ try:
},
})

if res.elements is not None:
if res.two_hundred_application_json_elements is not None:
# handle response
pass

Expand Down Expand Up @@ -316,7 +316,7 @@ res = s.general.partition(request={
},
})

if res.elements is not None:
if res.two_hundred_application_json_elements is not None:
# handle response
pass
```
Expand Down Expand Up @@ -346,7 +346,7 @@ async def main():
"strategy": shared.Strategy.HI_RES,
},
})
if res.elements is not None:
if res.two_hundred_application_json_elements is not None:
# handle response
pass

Expand Down Expand Up @@ -444,7 +444,7 @@ res = s.general.partition(request={
},
})

if res.elements is not None:
if res.two_hundred_application_json_elements is not None:
# handle response
pass

Expand Down
4 changes: 2 additions & 2 deletions USAGE.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ res = s.general.partition(request={
},
})

if res.elements is not None:
if res.two_hundred_application_json_elements is not None:
# handle response
pass
```
Expand Down Expand Up @@ -51,7 +51,7 @@ async def main():
"strategy": shared.Strategy.HI_RES,
},
})
if res.elements is not None:
if res.two_hundred_application_json_elements is not None:
# handle response
pass

Expand Down
3 changes: 2 additions & 1 deletion docs/models/operations/partitionresponse.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@
| `content_type` | *str* | :heavy_check_mark: | HTTP response content type for this operation |
| `status_code` | *int* | :heavy_check_mark: | HTTP response status code for this operation |
| `raw_response` | [httpx.Response](https://www.python-httpx.org/api/#response) | :heavy_check_mark: | Raw HTTP response; suitable for custom response parsing |
| `elements` | List[Dict[str, *Any*]] | :heavy_minus_sign: | Successful Response |
| `two_hundred_application_json_elements` | List[Dict[str, *Any*]] | :heavy_minus_sign: | Successful Response |
| `body` | *Optional[bytes]* | :heavy_minus_sign: | N/A |
3 changes: 2 additions & 1 deletion docs/models/shared/strategy.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ The strategy to use for partitioning PDF/image. Options are fast, hi_res, auto.
| `FAST` | fast |
| `HI_RES` | hi_res |
| `AUTO` | auto |
| `OCR_ONLY` | ocr_only |
| `OCR_ONLY` | ocr_only |
| `OD_ONLY` | od_only |
2 changes: 1 addition & 1 deletion gen.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ generation:
auth:
oAuth2ClientCredentialsEnabled: false
python:
version: 0.26.1
version: 0.27.1
additionalDependencies:
dev:
deepdiff: '>=6.0'
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "unstructured-client"
version = "0.26.1"
version = "0.27.1"
description = "Python Client SDK for Unstructured API"
authors = ["Unstructured",]
readme = "README-PYPI.md"
Expand Down
8 changes: 6 additions & 2 deletions src/unstructured_client/_hooks/custom/request_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,12 @@ def create_response(elements: list) -> httpx.Response:
Returns:
The modified response object with updated content.
"""
response = httpx.Response(status_code=200, headers={"Content-Type": "application/json"})
content = json.dumps(elements).encode()
if not isinstance(elements[0], dict):
response = httpx.Response(status_code=200, headers={"Content-Type": "text/csv"})
content = b''.join(elements)
else:
response = httpx.Response(status_code=200, headers={"Content-Type": "application/json"})
content = json.dumps(elements).encode()
content_length = str(len(content))
response.headers.update({"Content-Length": content_length})
setattr(response, "_content", content)
Expand Down
13 changes: 10 additions & 3 deletions src/unstructured_client/_hooks/custom/split_pdf_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,10 @@ def _await_elements(
response_number,
)
successful_responses.append(res)
elements.append(res.json())
if res.headers["Content-Type"] == "application/json":
elements.append(res.json())
else:
elements.append(res.content)
else:
error_message = f"Failed to partition set {response_number}."

Expand All @@ -401,7 +404,12 @@ def _await_elements(

self.api_successful_responses[operation_id] = successful_responses
self.api_failed_responses[operation_id] = failed_responses
flattened_elements = [element for sublist in elements for element in sublist]
flattened_elements = []
for sublist in elements:
if isinstance(sublist, list):
flattened_elements.extend(sublist)
else:
flattened_elements.append(sublist)
return flattened_elements

def after_success(
Expand All @@ -423,7 +431,6 @@ def after_success(
"""
# Grab the correct id out of the dummy request
operation_id = response.request.headers.get("operation_id")

elements = self._await_elements(operation_id)

# if fails are disallowed, return the first failed response
Expand Down
2 changes: 1 addition & 1 deletion src/unstructured_client/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import importlib.metadata

__title__: str = "unstructured-client"
__version__: str = "0.26.1"
__version__: str = "0.27.1"

try:
if __package__ is not None:
Expand Down
50 changes: 42 additions & 8 deletions src/unstructured_client/general.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT."""

from .basesdk import BaseSDK
from enum import Enum
from typing import Any, Dict, List, Optional, Union, cast
from unstructured_client import utils
from unstructured_client._hooks import HookContext
from unstructured_client.models import errors, operations, shared
from unstructured_client.types import BaseModel, OptionalNullable, UNSET


class PartitionAcceptEnum(str, Enum):
APPLICATION_JSON = "application/json"
TEXT_CSV = "text/csv"


class General(BaseSDK):
def partition(
self,
Expand All @@ -18,6 +24,7 @@ def partition(
retries: OptionalNullable[utils.RetryConfig] = UNSET,
server_url: Optional[str] = None,
timeout_ms: Optional[int] = None,
accept_header_override: Optional[PartitionAcceptEnum] = None,
) -> operations.PartitionResponse:
r"""Summary

Expand All @@ -27,6 +34,7 @@ def partition(
:param retries: Override the default retry configuration for this method
:param server_url: Override the default server URL for this method
:param timeout_ms: Override the default request timeout configuration for this method in milliseconds
:param accept_header_override: Override the default accept header for this method
"""
base_url = None
url_variables = None
Expand All @@ -50,7 +58,9 @@ def partition(
request_has_path_params=False,
request_has_query_params=True,
user_agent_header="user-agent",
accept_header_value="application/json",
accept_header_value=accept_header_override.value
if accept_header_override is not None
else "application/json;q=1, text/csv;q=0",
security=self.sdk_configuration.security,
get_serialized_body=lambda: utils.serialize_request_body(
request.partition_parameters,
Expand Down Expand Up @@ -88,29 +98,39 @@ def partition(
data: Any = None
if utils.match_response(http_res, "200", "application/json"):
return operations.PartitionResponse(
elements=utils.unmarshal_json(
two_hundred_application_json_elements=utils.unmarshal_json(
http_res.text, Optional[List[Dict[str, Any]]]
),
status_code=http_res.status_code,
content_type=http_res.headers.get("Content-Type") or "",
raw_response=http_res,
)
if utils.match_response(http_res, "200", "text/csv"):
http_res_bytes = utils.stream_to_bytes(http_res)
return operations.PartitionResponse(
body=http_res_bytes,
status_code=http_res.status_code,
content_type=http_res.headers.get("Content-Type") or "",
raw_response=http_res,
)
if utils.match_response(http_res, "422", "application/json"):
data = utils.unmarshal_json(http_res.text, errors.HTTPValidationErrorData)
raise errors.HTTPValidationError(data=data)
if utils.match_response(http_res, "4XX", "*"):
http_res_text = utils.stream_to_text(http_res)
raise errors.SDKError(
"API error occurred", http_res.status_code, http_res.text, http_res
"API error occurred", http_res.status_code, http_res_text, http_res
)
if utils.match_response(http_res, "5XX", "application/json"):
data = utils.unmarshal_json(http_res.text, errors.ServerErrorData)
raise errors.ServerError(data=data)

content_type = http_res.headers.get("Content-Type")
http_res_text = utils.stream_to_text(http_res)
raise errors.SDKError(
f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
http_res.status_code,
http_res.text,
http_res_text,
http_res,
)

Expand All @@ -123,6 +143,7 @@ async def partition_async(
retries: OptionalNullable[utils.RetryConfig] = UNSET,
server_url: Optional[str] = None,
timeout_ms: Optional[int] = None,
accept_header_override: Optional[PartitionAcceptEnum] = None,
) -> operations.PartitionResponse:
r"""Summary

Expand All @@ -132,6 +153,7 @@ async def partition_async(
:param retries: Override the default retry configuration for this method
:param server_url: Override the default server URL for this method
:param timeout_ms: Override the default request timeout configuration for this method in milliseconds
:param accept_header_override: Override the default accept header for this method
"""
base_url = None
url_variables = None
Expand All @@ -155,7 +177,9 @@ async def partition_async(
request_has_path_params=False,
request_has_query_params=True,
user_agent_header="user-agent",
accept_header_value="application/json",
accept_header_value=accept_header_override.value
if accept_header_override is not None
else "application/json;q=1, text/csv;q=0",
security=self.sdk_configuration.security,
get_serialized_body=lambda: utils.serialize_request_body(
request.partition_parameters,
Expand Down Expand Up @@ -193,28 +217,38 @@ async def partition_async(
data: Any = None
if utils.match_response(http_res, "200", "application/json"):
return operations.PartitionResponse(
elements=utils.unmarshal_json(
two_hundred_application_json_elements=utils.unmarshal_json(
http_res.text, Optional[List[Dict[str, Any]]]
),
status_code=http_res.status_code,
content_type=http_res.headers.get("Content-Type") or "",
raw_response=http_res,
)
if utils.match_response(http_res, "200", "text/csv"):
http_res_bytes = await utils.stream_to_bytes_async(http_res)
return operations.PartitionResponse(
body=http_res_bytes,
status_code=http_res.status_code,
content_type=http_res.headers.get("Content-Type") or "",
raw_response=http_res,
)
if utils.match_response(http_res, "422", "application/json"):
data = utils.unmarshal_json(http_res.text, errors.HTTPValidationErrorData)
raise errors.HTTPValidationError(data=data)
if utils.match_response(http_res, "4XX", "*"):
http_res_text = await utils.stream_to_text_async(http_res)
raise errors.SDKError(
"API error occurred", http_res.status_code, http_res.text, http_res
"API error occurred", http_res.status_code, http_res_text, http_res
)
if utils.match_response(http_res, "5XX", "application/json"):
data = utils.unmarshal_json(http_res.text, errors.ServerErrorData)
raise errors.ServerError(data=data)

content_type = http_res.headers.get("Content-Type")
http_res_text = await utils.stream_to_text_async(http_res)
raise errors.SDKError(
f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
http_res.status_code,
http_res.text,
http_res_text,
http_res,
)
Loading
Loading