Skip to content

Commit

Permalink
Output the endpoint from Bicep
Browse files Browse the repository at this point in the history
  • Loading branch information
pamelafox committed Dec 5, 2024
1 parent 5a3040a commit 2a6e604
Show file tree
Hide file tree
Showing 6 changed files with 24 additions and 8 deletions.
2 changes: 1 addition & 1 deletion app/backend/prepdocslib/mediadescriber.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,4 +105,4 @@ async def describe_image(self, image_bytes) -> str:
results = await self.poll_api(session, poll_url, headers)

fields = results["result"]["contents"][0]["fields"]
return fields["DescriptionHTML"]["valueString"]
return fields["Description"]["valueString"]
8 changes: 5 additions & 3 deletions app/backend/prepdocslib/pdfparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,17 +138,19 @@ class ObjectType(Enum):
added_objects = set() # set of object types todo mypy
for idx, mask_char in enumerate(mask_chars):
object_type, object_idx = mask_char
if object_idx is None:
raise ValueError("object_idx should not be None")
if object_type == ObjectType.NONE:
page_text += form_recognizer_results.content[page_offset + idx]
elif object_type == ObjectType.TABLE:
if object_idx is None:
raise ValueError("Expected object_idx to be set")
if mask_char not in added_objects:
page_text += DocumentAnalysisParser.table_to_html(tables_on_page[object_idx])
added_objects.add(mask_char)
elif object_type == ObjectType.FIGURE:
if cu_describer is None:
raise ValueError("cu_describer should not be None, unable to describe figure")
if object_idx is None:
raise ValueError("Expected object_idx to be set")
if mask_char not in added_objects:
figure_html = await DocumentAnalysisParser.figure_to_html(
doc_for_pymupdf, cu_describer, figures_on_page[object_idx]
Expand Down Expand Up @@ -176,7 +178,7 @@ async def figure_to_html(
doc: pymupdf.Document, cu_describer: ContentUnderstandingDescriber, figure: DocumentFigure
) -> str:
figure_title = (figure.caption and figure.caption.content) or ""
logger.info("Describing figure '%s' with title", figure.id, figure_title)
logger.info("Describing figure %s with title '%s'", figure.id, figure_title)
if not figure.bounding_regions:
return f"<figure><figcaption>{figure_title}</figcaption></figure>"
for region in figure.bounding_regions:
Expand Down
2 changes: 2 additions & 0 deletions infra/main.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,7 @@ var appEnvVariables = {
USE_LOCAL_PDF_PARSER: useLocalPdfParser
USE_LOCAL_HTML_PARSER: useLocalHtmlParser
USE_MEDIA_DESCRIBER_AZURE_CU: useMediaDescriberAzureCU
AZURE_CONTENTUNDERSTANDING_ENDPOINT: useMediaDescriberAzureCU ? contentUnderstanding.outputs.endpoint : ''
RUNNING_IN_PRODUCTION: 'true'
}

Expand Down Expand Up @@ -1193,6 +1194,7 @@ output AZURE_SPEECH_SERVICE_ID string = useSpeechOutputAzure ? speech.outputs.re
output AZURE_SPEECH_SERVICE_LOCATION string = useSpeechOutputAzure ? speech.outputs.location : ''

output AZURE_VISION_ENDPOINT string = useGPT4V ? computerVision.outputs.endpoint : ''
output AZURE_CONTENTUNDERSTANDING_ENDPOINT string = useMediaDescriberAzureCU ? contentUnderstanding.outputs.endpoint : ''

output AZURE_DOCUMENTINTELLIGENCE_SERVICE string = documentIntelligence.outputs.name
output AZURE_DOCUMENTINTELLIGENCE_RESOURCE_GROUP string = documentIntelligenceResourceGroup.name
Expand Down
2 changes: 1 addition & 1 deletion scripts/prepdocs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ if [ $# -gt 0 ]; then
additionalArgs="$@"
fi

./.venv/bin/python ./app/backend/prepdocs.py './data/GPT4V_Examples/Financial Market Analysis Report 2023.pdf' --verbose $additionalArgs
./.venv/bin/python ./app/backend/prepdocs.py './data/*' --verbose $additionalArgs
9 changes: 6 additions & 3 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
MockBlobClient,
MockResponse,
mock_computervision_response,
mock_contentunderstanding_response,
mock_speak_text_cancelled,
mock_speak_text_failed,
mock_speak_text_success,
Expand All @@ -54,10 +55,12 @@ async def mock_search(self, *args, **kwargs):


@pytest.fixture
def mock_compute_embeddings_call(monkeypatch):
def mock_azurehttp_calls(monkeypatch):
def mock_post(*args, **kwargs):
if kwargs.get("url").endswith("computervision/retrieval:vectorizeText"):
return mock_computervision_response()
elif kwargs.get("url").endswith("/contentunderstanding/analyzers/image_analyzer:analyze"):
return mock_contentunderstanding_response()
else:
raise Exception("Unexpected URL for mock call to ClientSession.post()")

Expand Down Expand Up @@ -327,7 +330,7 @@ async def client(
mock_openai_embedding,
mock_acs_search,
mock_blob_container_client,
mock_compute_embeddings_call,
mock_azurehttp_calls,
):
quart_app = app.create_app()

Expand All @@ -346,7 +349,7 @@ async def client_with_expiring_token(
mock_openai_embedding,
mock_acs_search,
mock_blob_container_client,
mock_compute_embeddings_call,
mock_azurehttp_calls,
):
quart_app = app.create_app()

Expand Down
9 changes: 9 additions & 0 deletions tests/mocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,15 @@ def mock_computervision_response():
)


def mock_contentunderstanding_response():
return MockResponse(
status=200,
headers={
"Operation-Location": "https://cu-ztmfrxlgtk3nq.cognitiveservices.azure.com/contentunderstanding/analyzers/image_analyzer/results/53e4c016-d2c0-48a9-a9f4-38891f7d45f0?api-version=2024-12-01-preview"
},
)


class MockAudio:
def __init__(self, audio_data):
self.audio_data = audio_data
Expand Down

0 comments on commit 2a6e604

Please sign in to comment.