diff --git a/server/integration_tests/explore_test.py b/server/integration_tests/explore_test.py index e294354497..12749f1b71 100644 --- a/server/integration_tests/explore_test.py +++ b/server/integration_tests/explore_test.py @@ -119,13 +119,23 @@ def handle_response(self, # TODO: Proper fix should be to make NL server more deterministic if 'variables' in resp: resp_var_to_score = {} + dbg['sv_matching']['CosineScore'] = _format_scores( + dbg['sv_matching']['CosineScore']) for i, sv in enumerate(dbg['sv_matching']['SV']): - score = dbg['sv_matching']['CosineScore'][i] - resp_var_to_score[sv] = float("{:.6f}".format(score)) + resp_var_to_score[sv] = dbg['sv_matching']['CosineScore'][i] sorted_variables = sorted(resp['variables'], key=lambda x: (-resp_var_to_score.get(x, 0), x)) resp['variables'] = sorted_variables + # Truncate CosineScores to 6 decimals to reduce noisy diffs. + for candidate in dbg['sv_matching']['MultiSV'].get('Candidates', []): + for part in candidate.get('Parts', []): + if multisv_scores := part.get('CosineScore', []): + part['CosineScore'] = _format_scores(multisv_scores) + + if props_scores := dbg['props_matching'].get('CosineScore', []): + dbg['props_matching']['CosineScore'] = _format_scores(props_scores) + resp['debug'] = {} resp['context'] = {} for category in resp.get('config', {}).get('categories', []): @@ -793,3 +803,8 @@ def _del_field(d: dict, path: str): del tmp[p] else: tmp = tmp[p] + + +# Helper function to consistently format float scores. +def _format_scores(scores): + return [float("{:.6f}".format(score)) for score in scores] diff --git a/server/integration_tests/test_data/detection_api_bio/whatisthephylumofvolvox/debug_info.json b/server/integration_tests/test_data/detection_api_bio/whatisthephylumofvolvox/debug_info.json index e434859d40..3874180d73 100644 --- a/server/integration_tests/test_data/detection_api_bio/whatisthephylumofvolvox/debug_info.json +++ b/server/integration_tests/test_data/detection_api_bio/whatisthephylumofvolvox/debug_info.json @@ -10,29 +10,29 @@ "query_with_places_removed": "what is the phylum of", "sv_matching": { "CosineScore": [ - 0.35510897636413574, - 0.3460870683193207, - 0.3452577292919159, - 0.34484589099884033, - 0.3411366045475006, - 0.3384982645511627, - 0.3349902629852295, - 0.3316609263420105, - 0.32600605487823486, - 0.3242444694042206, - 0.32060620188713074, - 0.31426557898521423, - 0.3127654492855072, - 0.3126293420791626, - 0.31250452995300293, - 0.31177404522895813, - 0.3103388845920563, - 0.3100748658180237, - 0.3098835051059723, - 0.3080541491508484, - 0.3074587285518646, - 0.3071962296962738, - 0.30394214391708374 + 0.355109, + 0.346087, + 0.345258, + 0.344846, + 0.341137, + 0.338498, + 0.33499, + 0.331661, + 0.326006, + 0.324244, + 0.320606, + 0.314266, + 0.312765, + 0.312629, + 0.312505, + 0.311774, + 0.310339, + 0.310075, + 0.309883, + 0.308054, + 0.307459, + 0.307196, + 0.303942 ], "MultiSV": {}, "Query": "what is the phylum of", @@ -64,35 +64,35 @@ }, "props_matching": { "CosineScore": [ - 1.0000001192092896, - 0.47528401017189026, - 0.3615417182445526, - 0.34543079137802124, - 0.34543079137802124, - 0.3420097231864929, - 0.3293308615684509, - 0.318451464176178, - 0.31466978788375854, - 0.308395117521286, - 0.30794060230255127, - 0.3046301007270813, - 0.2827085852622986, - 0.281875342130661, - 0.28082770109176636, - 0.2804528772830963, - 0.2742983400821686, - 0.2715050280094147, - 0.27071818709373474, - 0.2695467472076416, - 0.25990068912506104, - 0.25990068912506104, - 0.24827904999256134, - 0.24662022292613983, - 0.24632100760936737, - 0.2461635172367096, - 0.24212227761745453, - 0.23135530948638916, - 0.2296244353055954 + 1.0, + 0.475284, + 0.361542, + 0.345431, + 0.345431, + 0.34201, + 0.329331, + 0.318452, + 0.31467, + 0.308395, + 0.307941, + 0.30463, + 0.282709, + 0.281875, + 0.280828, + 0.280453, + 0.274298, + 0.271505, + 0.270718, + 0.269547, + 0.259901, + 0.259901, + 0.248279, + 0.24662, + 0.246321, + 0.246163, + 0.242122, + 0.231355, + 0.229624 ], "PROP": [ "phylum", diff --git a/server/integration_tests/test_data/detection_api_bio/whattypesofgenesarefgfr1,apoe,andache/debug_info.json b/server/integration_tests/test_data/detection_api_bio/whattypesofgenesarefgfr1,apoe,andache/debug_info.json index 4f8acbe246..c505e21788 100644 --- a/server/integration_tests/test_data/detection_api_bio/whattypesofgenesarefgfr1,apoe,andache/debug_info.json +++ b/server/integration_tests/test_data/detection_api_bio/whattypesofgenesarefgfr1,apoe,andache/debug_info.json @@ -12,30 +12,30 @@ "query_with_places_removed": "what types of genes are and", "sv_matching": { "CosineScore": [ - 0.5237791538238525, - 0.4451894760131836, - 0.4198145866394043, - 0.41922125220298767, - 0.41366666555404663, - 0.4096527099609375, - 0.4066110849380493, - 0.4013715386390686, - 0.4010191559791565, - 0.40063756704330444, - 0.39874646067619324, - 0.39836058020591736, - 0.39052361249923706, - 0.38903331756591797, - 0.3884609639644623, - 0.3883824050426483, - 0.3873301148414612, - 0.38642188906669617, - 0.3858962655067444, - 0.3856789469718933, - 0.3854343295097351, - 0.38533878326416016, - 0.38184264302253723, - 0.37970495223999023 + 0.523779, + 0.445189, + 0.419815, + 0.419221, + 0.413667, + 0.409652, + 0.406611, + 0.401371, + 0.401019, + 0.400638, + 0.398746, + 0.398361, + 0.390524, + 0.389033, + 0.388461, + 0.388382, + 0.38733, + 0.386422, + 0.385896, + 0.385679, + 0.385434, + 0.385339, + 0.381843, + 0.379705 ], "MultiSV": {}, "Query": "what types of genes are and", @@ -68,32 +68,32 @@ }, "props_matching": { "CosineScore": [ - 0.9086110591888428, - 0.7445294260978699, - 0.7330226898193359, - 0.7175887823104858, - 0.6834690570831299, - 0.6590196490287781, - 0.6357895731925964, - 0.6340402364730835, - 0.6171568036079407, - 0.5988123416900635, - 0.5958784818649292, - 0.5850145220756531, - 0.5832706689834595, - 0.5810792446136475, - 0.576904296875, - 0.5432419776916504, - 0.5399729013442993, - 0.5320467948913574, - 0.5309585332870483, - 0.5142703056335449, - 0.5125279426574707, - 0.46105992794036865, - 0.4441080689430237, - 0.39822953939437866, - 0.3944427967071533, - 0.39229002594947815 + 0.908611, + 0.744529, + 0.733023, + 0.717589, + 0.683469, + 0.65902, + 0.63579, + 0.63404, + 0.617157, + 0.598812, + 0.595879, + 0.585015, + 0.583271, + 0.581079, + 0.576904, + 0.543242, + 0.539973, + 0.532047, + 0.530959, + 0.51427, + 0.512528, + 0.46106, + 0.444108, + 0.398229, + 0.394443, + 0.39229 ], "PROP": [ "typeOfGene", diff --git a/server/integration_tests/test_data/detection_api_multivar/comparemalepopulationwithfemalepopulation/debug_info.json b/server/integration_tests/test_data/detection_api_multivar/comparemalepopulationwithfemalepopulation/debug_info.json index 5a672b4906..aba5931ec3 100644 --- a/server/integration_tests/test_data/detection_api_multivar/comparemalepopulationwithfemalepopulation/debug_info.json +++ b/server/integration_tests/test_data/detection_api_multivar/comparemalepopulationwithfemalepopulation/debug_info.json @@ -8,43 +8,43 @@ "query_with_places_removed": "compare male population with female population", "sv_matching": { "CosineScore": [ - 0.9187595844268799, - 0.8590543866157532, - 0.8354092836380005, - 0.7823134660720825, - 0.7773187756538391, - 0.7762894630432129, - 0.7663271427154541, - 0.766107439994812, - 0.7614184617996216, - 0.7606041431427002, - 0.7595797181129456, - 0.7573733329772949, - 0.7573024034500122, - 0.757297158241272, - 0.7553009390830994, - 0.7533451318740845, - 0.7532937526702881, - 0.7509560585021973, - 0.7482578754425049, - 0.7451456785202026, - 0.7443947792053223, - 0.7431552410125732, - 0.7426693439483643, - 0.7396597862243652, - 0.7371534109115601, - 0.7366002798080444, - 0.7347153425216675, - 0.733651876449585, - 0.732607901096344, - 0.7325160503387451, - 0.7302997708320618, - 0.7299268841743469, - 0.7289118766784668, - 0.7289034724235535, - 0.7280722260475159, - 0.7259584665298462, - 0.7259330153465271 + 0.918759, + 0.859054, + 0.835409, + 0.782314, + 0.777319, + 0.776289, + 0.766327, + 0.766107, + 0.761419, + 0.760604, + 0.75958, + 0.757373, + 0.757302, + 0.757297, + 0.755301, + 0.753345, + 0.753294, + 0.750956, + 0.748258, + 0.745145, + 0.744395, + 0.743156, + 0.742669, + 0.73966, + 0.737154, + 0.7366, + 0.734715, + 0.733652, + 0.732608, + 0.732516, + 0.730299, + 0.729926, + 0.728912, + 0.728904, + 0.728073, + 0.725958, + 0.725933 ], "MultiSV": { "Candidates": [ @@ -54,7 +54,7 @@ "Parts": [ { "CosineScore": [ - 0.9257855415344238 + 0.925785 ], "QueryPart": "male population", "SV": [ @@ -63,8 +63,8 @@ }, { "CosineScore": [ - 0.9296979904174805, - 0.8847434520721436 + 0.929698, + 0.884743 ], "QueryPart": "female population", "SV": [ @@ -80,7 +80,7 @@ "Parts": [ { "CosineScore": [ - 0.9117770195007324 + 0.911777 ], "QueryPart": "male population female", "SV": [ @@ -89,8 +89,8 @@ }, { "CosineScore": [ - 0.8982378244400024, - 0.8723466396331787 + 0.898237, + 0.872347 ], "QueryPart": "population", "SV": [ @@ -106,10 +106,10 @@ "Parts": [ { "CosineScore": [ - 0.808887243270874, - 0.8034241199493408, - 0.8017654418945312, - 0.775155782699585 + 0.808887, + 0.803424, + 0.801765, + 0.775156 ], "QueryPart": "male", "SV": [ @@ -121,8 +121,8 @@ }, { "CosineScore": [ - 0.9170327186584473, - 0.873018741607666 + 0.917033, + 0.873019 ], "QueryPart": "population female population", "SV": [ diff --git a/server/integration_tests/test_data/detection_api_multivar/compareobesityvs.poverty/debug_info.json b/server/integration_tests/test_data/detection_api_multivar/compareobesityvs.poverty/debug_info.json index 83ff52431b..1568745bee 100644 --- a/server/integration_tests/test_data/detection_api_multivar/compareobesityvs.poverty/debug_info.json +++ b/server/integration_tests/test_data/detection_api_multivar/compareobesityvs.poverty/debug_info.json @@ -8,35 +8,35 @@ "query_with_places_removed": "compare obesity vs poverty", "sv_matching": { "CosineScore": [ - 0.8778442740440369, - 0.7967699766159058, - 0.762496829032898, - 0.7499465942382812, - 0.7484047412872314, - 0.7442895174026489, - 0.7357541918754578, - 0.7350349426269531, - 0.7310354709625244, - 0.7289618253707886, - 0.7266261577606201, - 0.7264261841773987, - 0.7264261841773987, - 0.7253916263580322, - 0.7230079174041748, - 0.7139475345611572, - 0.7126950621604919, - 0.7094853520393372, - 0.7085550427436829, - 0.7085551023483276, - 0.7078822255134583, - 0.7077707052230835, - 0.7045387029647827, - 0.7024303078651428, - 0.7004534602165222, - 0.6994997262954712, - 0.6956704258918762, - 0.6947906613349915, - 0.6913242936134338 + 0.877844, + 0.79677, + 0.762497, + 0.749946, + 0.748405, + 0.74429, + 0.735754, + 0.735035, + 0.731035, + 0.728962, + 0.726626, + 0.726426, + 0.726426, + 0.725391, + 0.723008, + 0.713948, + 0.712695, + 0.709485, + 0.708555, + 0.708554, + 0.707882, + 0.707771, + 0.704539, + 0.702431, + 0.700454, + 0.699499, + 0.69567, + 0.694791, + 0.691324 ], "MultiSV": { "Candidates": [ @@ -46,10 +46,10 @@ "Parts": [ { "CosineScore": [ - 0.8467836380004883, - 0.8316553831100464, - 0.8201552033424377, - 0.8177061080932617 + 0.846783, + 0.831656, + 0.820154, + 0.817707 ], "QueryPart": "obesity", "SV": [ diff --git a/server/integration_tests/test_data/detection_api_multivar/howarefactorslikeobesity,bloodpressureandasthmaimpactedbyclimatechange/debug_info.json b/server/integration_tests/test_data/detection_api_multivar/howarefactorslikeobesity,bloodpressureandasthmaimpactedbyclimatechange/debug_info.json index bef680e6ec..f64e778aa2 100644 --- a/server/integration_tests/test_data/detection_api_multivar/howarefactorslikeobesity,bloodpressureandasthmaimpactedbyclimatechange/debug_info.json +++ b/server/integration_tests/test_data/detection_api_multivar/howarefactorslikeobesity,bloodpressureandasthmaimpactedbyclimatechange/debug_info.json @@ -8,34 +8,34 @@ "query_with_places_removed": "how are factors like obesity , blood pressure and asthma impacted by climate change", "sv_matching": { "CosineScore": [ - 0.7864793539047241, - 0.7135567665100098, - 0.691805362701416, - 0.6740915179252625, - 0.6734591722488403, - 0.6704620718955994, - 0.6635113954544067, - 0.6593924760818481, - 0.6520178318023682, - 0.6484817266464233, - 0.6461696624755859, - 0.6459026336669922, - 0.6435545682907104, - 0.6410900354385376, - 0.6405231952667236, - 0.6402113437652588, - 0.6384210586547852, - 0.6380343437194824, - 0.6376464366912842, - 0.6346377730369568, - 0.6341456174850464, - 0.6338476538658142, - 0.6322696805000305, - 0.6322205662727356, - 0.6304948925971985, - 0.6302965879440308, - 0.6298357844352722, - 0.6295678019523621 + 0.786479, + 0.713557, + 0.691806, + 0.674092, + 0.673459, + 0.670462, + 0.663511, + 0.659393, + 0.652018, + 0.648482, + 0.64617, + 0.645903, + 0.643554, + 0.64109, + 0.640523, + 0.640211, + 0.638421, + 0.638034, + 0.637647, + 0.634637, + 0.634146, + 0.633848, + 0.63227, + 0.632221, + 0.630495, + 0.630297, + 0.629835, + 0.629568 ], "MultiSV": { "Candidates": [ @@ -45,7 +45,7 @@ "Parts": [ { "CosineScore": [ - 0.7997788786888123 + 0.799779 ], "QueryPart": "factors like obesity blood pressure asthma impacted", "SV": [ @@ -69,11 +69,11 @@ "Parts": [ { "CosineScore": [ - 0.7588618993759155, - 0.7463642954826355, - 0.740165650844574, - 0.7351235151290894, - 0.7130995988845825 + 0.758863, + 0.746364, + 0.740165, + 0.735123, + 0.713099 ], "QueryPart": "factors like obesity", "SV": [ @@ -86,7 +86,7 @@ }, { "CosineScore": [ - 0.9999995827674866 + 1.0 ], "QueryPart": "blood pressure", "SV": [ @@ -95,8 +95,8 @@ }, { "CosineScore": [ - 0.8255698084831238, - 0.8027870655059814 + 0.82557, + 0.802787 ], "QueryPart": "asthma impacted climate change", "SV": [ @@ -112,10 +112,10 @@ "Parts": [ { "CosineScore": [ - 0.7768431901931763, - 0.7495046257972717, - 0.7342654466629028, - 0.7325114011764526 + 0.776843, + 0.749504, + 0.734266, + 0.732511 ], "QueryPart": "factors like obesity blood pressure asthma", "SV": [ @@ -127,7 +127,7 @@ }, { "CosineScore": [ - 0.9056921005249023 + 0.905692 ], "QueryPart": "impacted climate change", "SV": [ @@ -142,7 +142,7 @@ "Parts": [ { "CosineScore": [ - 0.8207905888557434 + 0.82079 ], "QueryPart": "factors like obesity blood pressure", "SV": [ @@ -151,8 +151,8 @@ }, { "CosineScore": [ - 0.8255698084831238, - 0.8027870655059814 + 0.82557, + 0.802787 ], "QueryPart": "asthma impacted climate change", "SV": [ diff --git a/server/integration_tests/test_data/detection_api_multivar/numberofpoorhispanicwomenwithphd/debug_info.json b/server/integration_tests/test_data/detection_api_multivar/numberofpoorhispanicwomenwithphd/debug_info.json index 9732c39707..aca640ac56 100644 --- a/server/integration_tests/test_data/detection_api_multivar/numberofpoorhispanicwomenwithphd/debug_info.json +++ b/server/integration_tests/test_data/detection_api_multivar/numberofpoorhispanicwomenwithphd/debug_info.json @@ -8,46 +8,46 @@ "query_with_places_removed": "number of poor hispanic women with phd", "sv_matching": { "CosineScore": [ - 0.7985796332359314, - 0.7883262634277344, - 0.776346743106842, - 0.77406245470047, - 0.7410296201705933, - 0.7334667444229126, - 0.7228284478187561, - 0.722568929195404, - 0.6995474100112915, - 0.6995023488998413, - 0.697618305683136, - 0.6974341869354248, - 0.696003794670105, - 0.6953426003456116, - 0.6945168972015381, - 0.6939643621444702, - 0.6935360431671143, - 0.6908406019210815, - 0.6906974911689758, - 0.6896734237670898, - 0.6889521479606628, - 0.6886201500892639, - 0.6863292455673218, - 0.6859177947044373, - 0.6827540397644043, - 0.6822139620780945, - 0.6815624237060547, - 0.67987459897995, - 0.676856517791748, - 0.6763089299201965, - 0.6751540303230286, - 0.6744991540908813, - 0.6725671291351318, - 0.6717973947525024, - 0.6691675186157227, - 0.6690007448196411, - 0.6682180762290955, - 0.6678104400634766, - 0.6650688052177429, - 0.6648702621459961 + 0.798579, + 0.788326, + 0.776347, + 0.774063, + 0.741029, + 0.733466, + 0.722829, + 0.722569, + 0.699547, + 0.699502, + 0.697618, + 0.697434, + 0.696003, + 0.695344, + 0.694517, + 0.693964, + 0.693537, + 0.69084, + 0.690697, + 0.689673, + 0.688952, + 0.688619, + 0.686329, + 0.685918, + 0.682754, + 0.682214, + 0.681563, + 0.679874, + 0.676856, + 0.676309, + 0.675155, + 0.674499, + 0.672567, + 0.671798, + 0.669168, + 0.669001, + 0.668218, + 0.667811, + 0.665069, + 0.66487 ], "MultiSV": { "Candidates": [ @@ -57,17 +57,17 @@ "Parts": [ { "CosineScore": [ - 0.8311771750450134, - 0.8310011029243469, - 0.8255152702331543, - 0.8195514678955078, - 0.8145602345466614, - 0.8090661764144897, - 0.8086389303207397, - 0.8074402809143066, - 0.8002950549125671, - 0.7985796332359314, - 0.787818193435669 + 0.831177, + 0.831001, + 0.825516, + 0.819551, + 0.814561, + 0.809067, + 0.808639, + 0.80744, + 0.800295, + 0.79858, + 0.787818 ], "QueryPart": "number of poor hispanic", "SV": [ @@ -86,8 +86,8 @@ }, { "CosineScore": [ - 0.8318259119987488, - 0.8029923439025879 + 0.831826, + 0.802992 ], "QueryPart": "women phd", "SV": [ @@ -103,8 +103,8 @@ "Parts": [ { "CosineScore": [ - 0.8366719484329224, - 0.7972691059112549 + 0.836672, + 0.79727 ], "QueryPart": "number of poor", "SV": [ @@ -114,10 +114,10 @@ }, { "CosineScore": [ - 0.8151505589485168, - 0.7885995507240295, - 0.7775554656982422, - 0.7752053141593933 + 0.81515, + 0.788599, + 0.777556, + 0.775205 ], "QueryPart": "hispanic women phd", "SV": [ @@ -135,8 +135,8 @@ "Parts": [ { "CosineScore": [ - 0.8495935201644897, - 0.8335005044937134 + 0.849593, + 0.833502 ], "QueryPart": "number of poor hispanic women", "SV": [ @@ -146,11 +146,11 @@ }, { "CosineScore": [ - 0.7646968960762024, - 0.7476688027381897, - 0.7377066612243652, - 0.7362767457962036, - 0.7305882573127747 + 0.764697, + 0.747669, + 0.737706, + 0.736277, + 0.730588 ], "QueryPart": "phd", "SV": [ diff --git a/server/integration_tests/test_data/detection_api_multivar/showmetheimpactofclimatechangeondrought/debug_info.json b/server/integration_tests/test_data/detection_api_multivar/showmetheimpactofclimatechangeondrought/debug_info.json index 1b3a9e23bc..3e7d7334f1 100644 --- a/server/integration_tests/test_data/detection_api_multivar/showmetheimpactofclimatechangeondrought/debug_info.json +++ b/server/integration_tests/test_data/detection_api_multivar/showmetheimpactofclimatechangeondrought/debug_info.json @@ -8,30 +8,30 @@ "query_with_places_removed": "show me the impact of climate change on drought", "sv_matching": { "CosineScore": [ - 0.8393681645393372, - 0.79311203956604, - 0.7699974775314331, - 0.7208107709884644, - 0.7175306081771851, - 0.6995436549186707, - 0.6863456964492798, - 0.674246609210968, - 0.6730499267578125, - 0.6703540682792664, - 0.6652892827987671, - 0.6644606590270996, - 0.6353792548179626, - 0.6349931359291077, - 0.6347004771232605, - 0.630818247795105, - 0.6290862560272217, - 0.6256592273712158, - 0.6180376410484314, - 0.6162123084068298, - 0.6141720414161682, - 0.6139707565307617, - 0.6138609647750854, - 0.6123877167701721 + 0.839369, + 0.793112, + 0.769998, + 0.72081, + 0.717531, + 0.699544, + 0.686345, + 0.674246, + 0.673051, + 0.670354, + 0.665289, + 0.664461, + 0.635379, + 0.634994, + 0.6347, + 0.630818, + 0.629086, + 0.625659, + 0.618038, + 0.616212, + 0.614172, + 0.613971, + 0.613861, + 0.612388 ], "MultiSV": { "Candidates": [ @@ -41,7 +41,7 @@ "Parts": [ { "CosineScore": [ - 0.8787487149238586 + 0.878748 ], "QueryPart": "show climate change", "SV": [ @@ -50,7 +50,7 @@ }, { "CosineScore": [ - 0.9999998807907104 + 1.0 ], "QueryPart": "drought", "SV": [ @@ -65,8 +65,8 @@ "Parts": [ { "CosineScore": [ - 0.794236958026886, - 0.7929013967514038 + 0.794237, + 0.792901 ], "QueryPart": "show climate", "SV": [ @@ -76,7 +76,7 @@ }, { "CosineScore": [ - 0.866512656211853 + 0.866513 ], "QueryPart": "change drought", "SV": [