diff --git a/.semversioner/next-release/patch-20240812182118180884.json b/.semversioner/next-release/patch-20240812182118180884.json new file mode 100644 index 0000000000..5bc8a2d224 --- /dev/null +++ b/.semversioner/next-release/patch-20240812182118180884.json @@ -0,0 +1,4 @@ +{ + "type": "patch", + "description": "Stabilize smoke tests for query context building" +} diff --git a/CODEOWNERS b/CODEOWNERS index 47b118f4d8..ebfb11b8d4 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -2,5 +2,4 @@ # the repo. Unless a later match takes precedence, # @global-owner1 and @global-owner2 will be requested for # review when someone opens a pull request. -* @microsoft/societal-resilience -* @microsoft/graphrag-core-team +* @microsoft/societal-resilience @microsoft/graphrag-core-team diff --git a/graphrag/query/context_builder/community_context.py b/graphrag/query/context_builder/community_context.py index 398f8ac422..d344e2c06e 100644 --- a/graphrag/query/context_builder/community_context.py +++ b/graphrag/query/context_builder/community_context.py @@ -15,6 +15,10 @@ log = logging.getLogger(__name__) +NO_COMMUNITY_RECORDS_WARNING: str = ( + "Warning: No community records added when building community context." +) + def build_community_context( community_reports: list[CommunityReport], @@ -128,9 +132,9 @@ def _cut_batch() -> None: record_df = _convert_report_context_to_df( context_records=batch_records, header=header, - weight_column=community_weight_name - if entities and include_community_weight - else None, + weight_column=( + community_weight_name if entities and include_community_weight else None + ), rank_column=community_rank_name if include_community_rank else None, ) if len(record_df) == 0: @@ -163,9 +167,7 @@ def _cut_batch() -> None: _cut_batch() if len(all_context_records) == 0: - log.warning( - "Warning: No community records added when building community context." - ) + log.warning(NO_COMMUNITY_RECORDS_WARNING) return ([], {}) return all_context_text, { diff --git a/tests/smoke/test_fixtures.py b/tests/smoke/test_fixtures.py index b5118c6a50..c5aff3d977 100644 --- a/tests/smoke/test_fixtures.py +++ b/tests/smoke/test_fixtures.py @@ -16,6 +16,9 @@ import pytest from graphrag.index.storage.blob_pipeline_storage import BlobPipelineStorage +from graphrag.query.context_builder.community_context import ( + NO_COMMUNITY_RECORDS_WARNING, +) log = logging.getLogger(__name__) @@ -25,6 +28,8 @@ # cspell:disable-next-line well-known-key WELL_KNOWN_AZURITE_CONNECTION_STRING = "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1" +KNOWN_WARNINGS = [NO_COMMUNITY_RECORDS_WARNING] + def _load_fixtures(): """Load all fixtures from the tests/data folder.""" @@ -294,6 +299,8 @@ def test_fixture( result.stderr if "No existing dataset at" not in result.stderr else "" ) - assert stderror == "", f"Query failed with error: {stderror}" + assert ( + stderror == "" or stderror.replace("\n", "") in KNOWN_WARNINGS + ), f"Query failed with error: {stderror}" assert result.stdout is not None, "Query returned no output" assert len(result.stdout) > 0, "Query returned empty output"