switch_vectorstore_to_azure_ai_search (#30)
Browse files- initial commit (5999013f4b21ae9752007771c4e265a4831f3e10)
- Adding necessary files for V0 (29f3cf9d9ef0d5b58b4069452459efc7805a5f53)
- Add app.py & requirements (88b346d939ed4b6b18eec500b5d412e5244868b5)
- correcting path (b37e492b30598e4500c86dea1dbdbd9134c615e8)
- error mixed version files during update (3549f0011605dc3e5180f65c98049a12321310d7)
- Replaced previous files with new files and correct folder organization (4d3e8c02adab31e3dcac1fdca947959eb2f42238)
- add db file (1f05721246cfce83ceca67b180d6de5ee24310d8)
- Correct path (473dcdc77ed812bff6412b3bae2218237f5c1db2)
- delete share because not supported by spaces (ef86508271b1cc86656c5ec4524c42599001dc30)
- Display api key (f7c08a6ee7930a4f564298d94ede4ec9f5e7ec81)
- test correcting openai api key (fc8d922f488a6ffc46f9135a7ad0ad88a0b2b323)
- Changing emoji (42dfe73c6f58d8a25d668faf65d64500d37a196c)
- Correct api key cache (f1d8d2baddbfb182533dbe9396d4e5aeae71ff83)
- Correcting import (3d6fd1bccd5fc377133da36f9d75438e89ec2a20)
- Edit text (73cb5fe63038e43312279c1ea8680a7821093f95)
- Correction indentation error (6f3b001c178c04b25c0f18297dbb3794c980f770)
- Fix indentation bug (00d66b96782500ae359646d677efb862fbbe7c8b)
- Add to secret manager api_key and system (17a978f556ec12c905530e3312a2dca9d994f2b6)
- correcting default api key and secret manager (8b276aaf2daeaff4e388f0161ef42d93d2593ca6)
- test correcting manager secret issue with api key (fd8b8bd3079a0fa556189e398301eaf148126ba7)
- adding sources as secret manager (fd6149f85358646a74a0385a1de48709167e6b0d)
- change to complete api_key as option (a17343b15c2729a472d058bb8f6036ccdb80f203)
- Change color (2dcef4362c33145993dcf7c39413d606b9568673)
- Adding explanation & background & utils file (8685e43cc07ab8f4e5c4ac52f0a1ccff21fe239e)
- Adding background image (7f10fa8fcdccf88b0b3405f4840c5a500334ef8e)
- Merge branch 'main' of https://huggingface.co/spaces/ekimetrics/climategpt (547525dcc8fca1b37aac824c3706aa362442e47b)
- Add Eki logo (470caf5a0bd2feff60f8dd944e0e8fd6d58ae943)
- deleting background & adding logo (03c61a87efa57def426a3841b1fd8e4ecc542b0f)
- Merge branch 'main' of https://huggingface.co/spaces/ekimetrics/climategpt (a757956a0c46cb8c6599b1bc88106546c5e5f91d)
- test background with logo (65d6f70adbc4c9aa04102c07bcf11e760df321df)
- test new background with logo (2a7515a5350bf01073fb88f9a2e057b9dafb8611)
- clean repo & add report selection (affc874a759f1ae536bdc14c61c7a07f595f06e8)
- add giec only files (21da7ea7a5d06ebc1fd362998322d2d9439b31b5)
- add giec only db (ec09a9e73cc162a80ddac46b37a2efc3757278eb)
- Cleaning & wording (2d4b96be6c33d34b69c032eb71125de7ca69015c)
- Correct wording (5b8b52a975b8ca6e7437338f9917424ac9fff83f)
- Merge branch 'main' of https://huggingface.co/spaces/ekimetrics/climategpt (676ad8ddcb4bb0834c6c75571c642d184ba7b035)
- test layout in huggingface (7249e3fb259bdd2a56217fd93e76a90b32b775cd)
- Correct ux design (166d2108229b7da9e9edda090c839588f8d0a367)
- add userid random (d312d8c7adb24811dfe17904cd5590eb127c034d)
- correcting markdown (51d6e66d2d44e3ccde7b9771f36f251cba5aabf2)
- add textbox for feedback (65067f0ce2e1f50217d6ad55b39735faccee5f75)
- tst (69b7bbaa9d567bb76bc9a80c0a42df3674da52f0)
- doc str (75bd17d1df6e17a8cefaf0594d13364557d9ce75)
- ds (0425cb629d7e7748bf86e6bf64d6efc6e9bec836)
- cleaner app (dd909e01f414272396009f59d5cd3817fbd3710a)
- var outside of func (878a987992048be05545e7da673ab976646f3f5c)
- 2 (e2b705f72a8d84faabe1e72d558673117f7fc11d)
- auto update text box + stream (c1e72d1c33335782c6406f5174b334a34e00f0aa)
- queue (49544ee4451eb1ddac6bdaa0b8dd40d38c03d1e4)
- added launch... (db454028eec53f53a6cd6f6bb52448aa94b9f4ce)
- only str in messages content (9ba500ae2826c6a9b4c8fd9ca51eeaca8f900980)
- hopefully this time it works (aeaf522749060e419881bc6155d1889049da7518)
- complete doc string for chat (63bb6284742e7f7a2c168f10511537d73f56b16c)
- safer args + better doc strings (48b7f35b0d1bd741c25c19e1727e531dd99e5a02)
- simpler description (27bb2e17a752fd1f39e9d423bc07510c1045cd15)
- prompt ing pour no docs (66e2136d1aca19bb4a3c455aebd1b2bbc3042ec7)
- no docs prompt ing (8a856ac3d85562040c228353608bc4c3adedc15c)
- try less req (6a20797eb7708cf1eddb6e004fb7312c13edda60)
- better req (0923c05a50bbd030595824e07490a817da96c25a)
- gradio 3.22 (ae77af425e891897096f0e1c2fd03dae668f4c3c)
- - pandas req (8bdb208d9ba1bf07646361b33da5795831c6f584)
- hopefully gradio fixed 3.22 (5fbb7f083fc615523ecf0175437601efdad46670)
- add feedbacks and logs to azure (3afc54c6a317e97c3efdd60530be5960f99e011b)
- create anonymous user_id (5bba46e503a467a65de8776bf36f68b886960e92)
- add azure storage file share lib (8598bf90301d9de15b1bc4ed27b0d49690d79ca2)
- correct spelling azure (14e4b14e16b9d966f8623fc287d09e6ff329b6a7)
- clé azure ekimetrics (0980c4b6db5981883fcf5b6e52ef9c955cba4371)
- eki api key (4c809c51e97ab5019cfbb9f1b5e6f7f8f8264a89)
- usage read me (e873211989ab664dccb49aaf95782cddea90de76)
- delete coms (e944ee08d3b5cdbecc16198782ba4f71ff1149a1)
- change user id (b7409698a33ea0acb083c72dd4111748a3d277a3)
- Update app.py (c49f785dccfdbfc15e65fac661e8911f949247ed)
- minor changes (9c7bf8665d4a373b61f94a3396401d6c832bf8c1)
- Added examples (08fe8f1d09a8490e6a4ca8b42a05ca6fa830a1b0)
- test new background (51aaf6fdcd7dbfcc65288ca437f335efd91e506d)
- change background test (23df579fb56f30350d7e86148bdb50ea462af371)
- fail background with pdf (98a01c107ab8ab8d35d9383d121c477812340705)
- test 2 background as page (2b2fb4307b94118344667c1d7b484eaf89c89b6a)
- change background sauvage (91d84131b03ca3252609afa34ec02eaa4ad0fbe7)
- Delete background_test.jpg (0cc46244bd0b25b619244629078ca6c44910b14d)
- back to previous backgound - test failed (65247c9d001b58757af65c5e640b008015137d6a)
- fix jpg to png (c8f61c0e604cbfe3f9b0dd57d93c5df3fd3fe612)
- Create style.css (d3f68b154ff6a57ce6d7e493d23718e7699fbd98)
- Update app.py (337b70c54d20f394a99396949cb6351ce44d034b)
- Update app.py (f25ba110c3b89419bc1b094cc6bc06650ffd8364)
- Update app.py (7972c2d23ab36cf1ecb1548eeab9b7eced3b14c4)
- Update app.py (3f5636a66997cf1b6df5391ea8febf3c9c5eddba)
- add feedback status (aee88fcba13d6fac402ca48530b0aec1a3f956a5)
- Update app.py (7514e4a1177a023c9228e9a542eec206dfc12dbb)
- Update app.py (0a9558a113d83f10b7261decbc1a3a975b1a9f72)
- Update app.py (5a498af4e1be4c0b939abf1732fdc93aa6232116)
- Update app.py (4ce1abef01e864810507e57a42b82f5ed2f84b88)
- Update app.py (78d3669956449994beb0d6a9e38fed7a758e21fd)
- Update app.py (77796eb54dd53ff24b3f50a992d43e00fa497bbf)
- Update style.css (ed0e6320c71102adbf791aed1793b8e3fcdbe502)
- Update app.py (fe7fa80827e04dbf763b98bf50339d38aff74096)
- Update app.py (6db0fffc1a0b7061cb27fa743b540da5f4f8540a)
- Update app.py (8c1bf4f246260375161aa5fda85e19e57716e3a8)
- Update README.md (01e72a4e33ba3a01eb3e450f100c5526e30af5a2)
- Update app.py (9abbaca4f4e815f76b9dc277160e156a1e589d7b)
- Update app.py (bc15b875b7822fed72b2b2aacd99c3b986224cb0)
- Update app.py (aff6e5711314f48bfc16c0ec813b13fd8a1b2b3e)
- Update app.py (0d9507a5ff0336902aad66edebd25c6509682dd0)
- Safety check on non sourced answers + theme (a22f5f7be311711e474940a0ec74dcaed0ef63ca)
- query reformulation (03cc572798f823bff471f76433b48ffb013516a0)
- Update app.py (b295cc12a7a1f10f2589706ea0c55635de6f27f8)
- Update app.py (89213214884e2f7b1896f444835dfd038daf1a9f)
- reformulation (909dc6d3f4813ad18949a70b778a747c6fae2b96)
- Merge branch 'main' of https://huggingface.co/spaces/Ekimetrics/climate-question-answering (2ed2c26198ce89e632888d6371b885ca079ba884)
- reformulation (e8e3a582ae44b230db09467ace6e7539121e706a)
- BUG FIX: remove custom key option (9c3b10a39f10555ace3e2e58110b075d7c870e17)
- ceci n'est pas un commit (1acb4eba7a80e68be9b32afa7b72cc8a314db085)
- commit non (1b0d9c9e2304bc5d236b1db8a6465831717ac623)
- prompt are now open source (8275289fe27917f6bc7681a1a693d20e658dca7e)
- name change + prompts open source (ebd7fc12539a93255cb919bafb81dbea37be1412)
- Update app.py (9e7c7bed501e205839f401d032bee9165dfd955d)
- Adding translation in (9bc1cf7177418734efabeb776590f3121a74ead7)
- Translation in all languages (f650b513a8e1ad7afbf3e2d7fc6497b0db34115a)
- Update app.py (6fab01ece8f40265f036dd160ba12de6f42ffc27)
- Update app.py (7815517c1e42d30bce7ae203c928ba6f743db48e)
- test data v2 (7d3d48c4dbe32849580f04e071c143856a177d92)
- data v2 (8cfc184e1f36158e95cae8949fce2054feceb24c)
- base de données v2 (cbd66080709bf8c2c9bef7e8e7e63c72845f6622)
- source table up to date (c60b40aa2a676290f51e654f3b6684d96129ba7a)
- Remove message in readme about language (c6f0b32a425f110ea0693bfcd35c941b39e25041)
- modified gitignore (bbcea094f621ca3577e63e2afd3d7ccd4199cc0e)
- Merge branch 'main' of https://huggingface.co/spaces/Ekimetrics/climate-question-answering (8d7ff84233dbdd78e1a7c92b42909af48699541b)
- Updated CSS and app (2e218e7d13e77137a43113c143b329a992685735)
- Added v3 FAISS Flat Files (9e5fc08f5aa319624dbb216e2bbeac10a42344dd)
- Delete documents (b8db6ac29ea2ba76bb673a4ba6f56710b46753b6)
- Update app.py (fbc1facd63ff127ba64e4d95ff5bdb10305b63bb)
- Update app.py (97c1ab968daa18170fbd1ce5594641f7acae6d41)
- Update app.py (3042aa7291312c72f3438ec643c2250019f3d11b)
- Update app.py (3ac1b5433f64ce96538adce66837159b89469a8d)
- Updated app 1.2.0 (eb0f8f83bd87b47d8bcff2bf18eadadd7e138b71)
- Update requirements.txt (5d956cd32db80b12494193d387f6bfb0e6dcde77)
- Updated requirements and changelog (a97a3ea8c6204dfed4965793b0d1491e0a489514)
- Update requirements.txt (2b02c85473a127ae41a59bd92443481e7119424c)
- Update app.py (7b97a461fc26a49ba883170a0962414d2d9e38b9)
- Update app.py (39b3b1fb994f4a2f67c422b1822ed915142b23fc)
- Updated app with config and audiences (fa572c4d96af8a39bf7c4069d19d9e88db1847fe)
- Update app.py (0be7c92dcc71399827ecac33fcb310bc5a52d8ed)
- Update app.py (00b001ee301a41ce83d1751b051880a892be02a4)
- Update app.py (8fc79eade32c0d9959e1e3a00dbba2fef0721b53)
- Update app.py (a8164b46253cbb0943f84b681b8d023f3e5796c1)
- Updated CSS (b0d7ddcf715042a093546a85664fdd6988152e79)
- Update style.css (e07482e059773e8f76093deb8f81e035aedc60d5)
- Update style.css (21b0eb2658d5cc1344975726277afad38e310e42)
- Added CSS (79f4b842aaf281b3158bdedc1ae74e531971e809)
- Updated CSS (e03c50b499faf67a5
- app.py +6 -12
- climateqa/engine/chains/retrieve_documents.py +25 -11
- climateqa/engine/graph_retriever.py +3 -4
- climateqa/engine/llm/openai.py +0 -1
- climateqa/engine/talk_to_data/input_processing.py +73 -8
- climateqa/engine/talk_to_data/ipcc/config.py +16 -4
- climateqa/engine/talk_to_data/ipcc/plot_informations.py +23 -0
- climateqa/engine/talk_to_data/ipcc/plots.py +81 -3
- climateqa/engine/talk_to_data/ipcc/queries.py +65 -5
- climateqa/engine/talk_to_data/main.py +2 -2
- climateqa/engine/talk_to_data/workflow/drias.py +8 -3
- climateqa/engine/talk_to_data/workflow/ipcc.py +20 -7
- climateqa/engine/vectorstore.py +137 -45
- climateqa/utils.py +1 -1
- front/tabs/tab_ipcc.py +2 -0
- requirements.txt +3 -0
- sandbox/20241104 - CQA - StepByStep CQA.ipynb +0 -0
- style.css +0 -1
@@ -7,7 +7,7 @@ from azure.storage.fileshare import ShareServiceClient
|
|
7 |
# Import custom modules
|
8 |
from climateqa.engine.embeddings import get_embeddings_function
|
9 |
from climateqa.engine.llm import get_llm
|
10 |
-
from climateqa.engine.vectorstore import
|
11 |
from climateqa.engine.reranker import get_reranker
|
12 |
from climateqa.engine.graph import make_graph_agent, make_graph_agent_poc
|
13 |
from climateqa.engine.chains.retrieve_papers import find_papers
|
@@ -66,17 +66,11 @@ user_id = create_user_id()
|
|
66 |
|
67 |
# Create vectorstore and retriever
|
68 |
embeddings_function = get_embeddings_function()
|
69 |
-
|
70 |
-
|
71 |
-
)
|
72 |
-
|
73 |
-
|
74 |
-
index_name=os.getenv("PINECONE_API_INDEX_OWID"),
|
75 |
-
text_key="description",
|
76 |
-
)
|
77 |
-
vectorstore_region = get_pinecone_vectorstore(
|
78 |
-
embeddings_function, index_name=os.getenv("PINECONE_API_INDEX_LOCAL_V2")
|
79 |
-
)
|
80 |
|
81 |
llm = get_llm(provider="openai", max_tokens=1024, temperature=0.0)
|
82 |
if os.environ["GRADIO_ENV"] == "local":
|
|
|
7 |
# Import custom modules
|
8 |
from climateqa.engine.embeddings import get_embeddings_function
|
9 |
from climateqa.engine.llm import get_llm
|
10 |
+
from climateqa.engine.vectorstore import get_vectorstore
|
11 |
from climateqa.engine.reranker import get_reranker
|
12 |
from climateqa.engine.graph import make_graph_agent, make_graph_agent_poc
|
13 |
from climateqa.engine.chains.retrieve_papers import find_papers
|
|
|
66 |
|
67 |
# Create vectorstore and retriever
|
68 |
embeddings_function = get_embeddings_function()
|
69 |
+
|
70 |
+
vectorstore = get_vectorstore(provider="azure_search", embeddings=embeddings_function, index_name="climateqa-ipx")
|
71 |
+
vectorstore_graphs = get_vectorstore(provider="azure_search", embeddings=embeddings_function, index_name="climateqa-owid", text_key="description")
|
72 |
+
vectorstore_region = get_vectorstore(provider="azure_search", embeddings=embeddings_function, index_name="climateqa-v2")
|
73 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
|
75 |
llm = get_llm(provider="openai", max_tokens=1024, temperature=0.0)
|
76 |
if os.environ["GRADIO_ENV"] == "local":
|
@@ -19,7 +19,7 @@ from ..llm import get_llm
|
|
19 |
from .prompts import retrieve_chapter_prompt_template
|
20 |
from langchain_core.prompts import ChatPromptTemplate
|
21 |
from langchain_core.output_parsers import StrOutputParser
|
22 |
-
from ..vectorstore import
|
23 |
from ..embeddings import get_embeddings_function
|
24 |
import ast
|
25 |
|
@@ -134,7 +134,7 @@ def get_ToCs(version: str) :
|
|
134 |
"version": version
|
135 |
}
|
136 |
embeddings_function = get_embeddings_function()
|
137 |
-
vectorstore =
|
138 |
tocs = vectorstore.similarity_search_with_score(query="",filter = filters_text)
|
139 |
|
140 |
# remove duplicates or almost duplicates
|
@@ -236,7 +236,7 @@ async def get_POC_documents_by_ToC_relevant_documents(
|
|
236 |
filters_text_toc = {
|
237 |
**filters,
|
238 |
"chunk_type":"text",
|
239 |
-
"toc_level0": {"$in": toc_filters}
|
240 |
"version": version
|
241 |
# "report_type": {}, # TODO to be completed to choose the right documents / chapters according to the analysis of the question
|
242 |
}
|
@@ -273,6 +273,22 @@ async def get_POC_documents_by_ToC_relevant_documents(
|
|
273 |
"docs_images" : docs_images
|
274 |
}
|
275 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
276 |
|
277 |
async def get_IPCC_relevant_documents(
|
278 |
query: str,
|
@@ -299,9 +315,9 @@ async def get_IPCC_relevant_documents(
|
|
299 |
filters = {}
|
300 |
|
301 |
if len(reports) > 0:
|
302 |
-
filters["short_name"] = {"$in":reports}
|
303 |
else:
|
304 |
-
filters["source"] = {
|
305 |
|
306 |
# INIT
|
307 |
docs_summaries = []
|
@@ -323,18 +339,16 @@ async def get_IPCC_relevant_documents(
|
|
323 |
filters_summaries = {
|
324 |
**filters,
|
325 |
"chunk_type":"text",
|
326 |
-
"report_type": {
|
327 |
}
|
328 |
|
329 |
docs_summaries = vectorstore.similarity_search_with_score(query=query,filter = filters_summaries,k = k_summary)
|
330 |
docs_summaries = [x for x in docs_summaries if x[1] > threshold]
|
331 |
|
332 |
# Search for k_total - k_summary documents in the full reports dataset
|
333 |
-
filters_full =
|
334 |
-
|
335 |
-
|
336 |
-
"report_type": { "$nin":["SPM"]},
|
337 |
-
}
|
338 |
docs_full = vectorstore.similarity_search_with_score(query=query,filter = filters_full,k = k_total)
|
339 |
|
340 |
if search_figures:
|
|
|
19 |
from .prompts import retrieve_chapter_prompt_template
|
20 |
from langchain_core.prompts import ChatPromptTemplate
|
21 |
from langchain_core.output_parsers import StrOutputParser
|
22 |
+
from ..vectorstore import get_vectorstore
|
23 |
from ..embeddings import get_embeddings_function
|
24 |
import ast
|
25 |
|
|
|
134 |
"version": version
|
135 |
}
|
136 |
embeddings_function = get_embeddings_function()
|
137 |
+
vectorstore = get_vectorstore(provider="qdrant", embeddings=embeddings_function, index_name="climateqa")
|
138 |
tocs = vectorstore.similarity_search_with_score(query="",filter = filters_text)
|
139 |
|
140 |
# remove duplicates or almost duplicates
|
|
|
236 |
filters_text_toc = {
|
237 |
**filters,
|
238 |
"chunk_type":"text",
|
239 |
+
"toc_level0": toc_filters, # Changed from {"$in": toc_filters} to direct list
|
240 |
"version": version
|
241 |
# "report_type": {}, # TODO to be completed to choose the right documents / chapters according to the analysis of the question
|
242 |
}
|
|
|
273 |
"docs_images" : docs_images
|
274 |
}
|
275 |
|
276 |
+
def filter_for_full_report_documents(filters: dict) -> dict:
|
277 |
+
"""
|
278 |
+
Filter for full report documents.
|
279 |
+
Returns a dictionary format compatible with all vectorstore providers.
|
280 |
+
"""
|
281 |
+
# Start with the base filters
|
282 |
+
full_filters = filters.copy()
|
283 |
+
|
284 |
+
# Add chunk_type filter
|
285 |
+
full_filters["chunk_type"] = "text"
|
286 |
+
|
287 |
+
# Add report_type exclusion using the new _exclude suffix format
|
288 |
+
# This will be converted to appropriate OData filter by Azure Search wrapper
|
289 |
+
full_filters["report_type_exclude"] = ["SPM"]
|
290 |
+
|
291 |
+
return full_filters
|
292 |
|
293 |
async def get_IPCC_relevant_documents(
|
294 |
query: str,
|
|
|
315 |
filters = {}
|
316 |
|
317 |
if len(reports) > 0:
|
318 |
+
filters["short_name"] = reports # Changed from {"$in":reports} to direct list
|
319 |
else:
|
320 |
+
filters["source"] = sources # Changed from {"$in": sources} to direct list
|
321 |
|
322 |
# INIT
|
323 |
docs_summaries = []
|
|
|
339 |
filters_summaries = {
|
340 |
**filters,
|
341 |
"chunk_type":"text",
|
342 |
+
"report_type": ["SPM"], # Changed from {"$in":["SPM"]} to direct list
|
343 |
}
|
344 |
|
345 |
docs_summaries = vectorstore.similarity_search_with_score(query=query,filter = filters_summaries,k = k_summary)
|
346 |
docs_summaries = [x for x in docs_summaries if x[1] > threshold]
|
347 |
|
348 |
# Search for k_total - k_summary documents in the full reports dataset
|
349 |
+
filters_full = filter_for_full_report_documents(filters)
|
350 |
+
|
351 |
+
|
|
|
|
|
352 |
docs_full = vectorstore.similarity_search_with_score(query=query,filter = filters_full,k = k_total)
|
353 |
|
354 |
if search_figures:
|
@@ -60,10 +60,9 @@ async def retrieve_graphs(
|
|
60 |
assert sources
|
61 |
assert any([x in ["OWID"] for x in sources])
|
62 |
|
63 |
-
# Prepare base search kwargs
|
64 |
-
|
65 |
-
|
66 |
-
filters["source"] = {"$in": sources}
|
67 |
|
68 |
docs = vectorstore.similarity_search_with_score(query=query, filter=filters, k=k_total)
|
69 |
|
|
|
60 |
assert sources
|
61 |
assert any([x in ["OWID"] for x in sources])
|
62 |
|
63 |
+
# Prepare base search kwargs for Azure AI Search
|
64 |
+
# Azure expects a filter string, e.g. "source eq 'OWID' or source eq 'IEA'"
|
65 |
+
filters = {"source":"OWID"}
|
|
|
66 |
|
67 |
docs = vectorstore.similarity_search_with_score(query=query, filter=filters, k=k_total)
|
68 |
|
@@ -8,7 +8,6 @@ except Exception:
|
|
8 |
pass
|
9 |
|
10 |
def get_llm(model="gpt-4o-mini",max_tokens=1024, temperature=0.0, streaming=True,timeout=30, **kwargs):
|
11 |
-
|
12 |
llm = ChatOpenAI(
|
13 |
model=model,
|
14 |
api_key=os.environ.get("THEO_API_KEY", None),
|
|
|
8 |
pass
|
9 |
|
10 |
def get_llm(model="gpt-4o-mini",max_tokens=1024, temperature=0.0, streaming=True,timeout=30, **kwargs):
|
|
|
11 |
llm = ChatOpenAI(
|
12 |
model=model,
|
13 |
api_key=os.environ.get("THEO_API_KEY", None),
|
@@ -10,6 +10,7 @@ from climateqa.engine.talk_to_data.objects.llm_outputs import ArrayOutput
|
|
10 |
from climateqa.engine.talk_to_data.objects.location import Location
|
11 |
from climateqa.engine.talk_to_data.objects.plot import Plot
|
12 |
from climateqa.engine.talk_to_data.objects.states import State
|
|
|
13 |
|
14 |
async def detect_location_with_openai(sentence: str) -> str:
|
15 |
"""
|
@@ -118,7 +119,7 @@ async def detect_year_with_openai(sentence: str) -> str:
|
|
118 |
return years_list[0]
|
119 |
else:
|
120 |
return ""
|
121 |
-
|
122 |
|
123 |
async def detect_relevant_tables(user_question: str, plot: Plot, llm, table_names_list: list[str]) -> list[str]:
|
124 |
"""Identifies relevant tables for a plot based on user input.
|
@@ -227,6 +228,55 @@ async def find_year(user_input: str) -> str| None:
|
|
227 |
return None
|
228 |
return year
|
229 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
230 |
async def find_relevant_plots(state: State, llm, plots: list[Plot]) -> list[str]:
|
231 |
print("---- Find relevant plots ----")
|
232 |
relevant_plots = await detect_relevant_plots(state['user_input'], llm, plots)
|
@@ -237,16 +287,28 @@ async def find_relevant_tables_per_plot(state: State, plot: Plot, llm, tables: l
|
|
237 |
relevant_tables = await detect_relevant_tables(state['user_input'], plot, llm, tables)
|
238 |
return relevant_tables
|
239 |
|
240 |
-
async def find_param(state: State, param_name:str, mode: Literal['DRIAS', 'IPCC'] = 'DRIAS') -> dict[str, Optional[str]] | Location | None:
|
241 |
-
"""
|
|
|
242 |
|
243 |
Args:
|
244 |
-
state (State): state
|
245 |
-
param_name (str): name of the
|
246 |
-
|
247 |
|
248 |
Returns:
|
249 |
-
dict
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
250 |
"""
|
251 |
if param_name == 'location':
|
252 |
location = await find_location(state['user_input'], mode)
|
@@ -254,4 +316,7 @@ async def find_param(state: State, param_name:str, mode: Literal['DRIAS', 'IPCC'
|
|
254 |
if param_name == 'year':
|
255 |
year = await find_year(state['user_input'])
|
256 |
return {'year': year}
|
257 |
-
|
|
|
|
|
|
|
|
10 |
from climateqa.engine.talk_to_data.objects.location import Location
|
11 |
from climateqa.engine.talk_to_data.objects.plot import Plot
|
12 |
from climateqa.engine.talk_to_data.objects.states import State
|
13 |
+
import calendar
|
14 |
|
15 |
async def detect_location_with_openai(sentence: str) -> str:
|
16 |
"""
|
|
|
119 |
return years_list[0]
|
120 |
else:
|
121 |
return ""
|
122 |
+
|
123 |
|
124 |
async def detect_relevant_tables(user_question: str, plot: Plot, llm, table_names_list: list[str]) -> list[str]:
|
125 |
"""Identifies relevant tables for a plot based on user input.
|
|
|
228 |
return None
|
229 |
return year
|
230 |
|
231 |
+
async def find_month(user_input: str) -> dict[str, str|None]:
|
232 |
+
"""
|
233 |
+
Extracts month information from user input using an LLM.
|
234 |
+
|
235 |
+
This function analyzes the user's query to detect if a month is mentioned.
|
236 |
+
It returns both the month number (as a string, e.g. '7' for July) and the full English month name (e.g. 'July').
|
237 |
+
If no month is found, both values will be None.
|
238 |
+
|
239 |
+
Args:
|
240 |
+
user_input (str): The user's query text.
|
241 |
+
|
242 |
+
Returns:
|
243 |
+
dict[str, str|None]: A dictionary with keys:
|
244 |
+
- "month_number": the month number as a string (e.g. '7'), or None if not found
|
245 |
+
- "month_name": the full English month name (e.g. 'July'), or None if not found
|
246 |
+
|
247 |
+
Example:
|
248 |
+
>>> await find_month("Show me the temperature in Paris in July")
|
249 |
+
{'month_number': '7', 'month_name': 'July'}
|
250 |
+
>>> await find_month("Show me the temperature in Paris")
|
251 |
+
{'month_number': None, 'month_name': None}
|
252 |
+
"""
|
253 |
+
|
254 |
+
llm = get_llm()
|
255 |
+
prompt = """
|
256 |
+
Extract the month (as a number from 1 to 12) mentioned in the following sentence.
|
257 |
+
Return the result as a Python list of integers. If no month is mentioned, return an empty list.
|
258 |
+
|
259 |
+
Sentence: "{sentence}"
|
260 |
+
"""
|
261 |
+
prompt = ChatPromptTemplate.from_template(prompt)
|
262 |
+
structured_llm = llm.with_structured_output(ArrayOutput)
|
263 |
+
chain = prompt | structured_llm
|
264 |
+
response: ArrayOutput = await chain.ainvoke({"sentence": user_input})
|
265 |
+
months_list = ast.literal_eval(response['array'])
|
266 |
+
if len(months_list) > 0:
|
267 |
+
month_number = int(months_list[0])
|
268 |
+
month_name = calendar.month_name[month_number]
|
269 |
+
return {
|
270 |
+
"month_number": str(month_number),
|
271 |
+
"month_name": month_name
|
272 |
+
}
|
273 |
+
else:
|
274 |
+
return {
|
275 |
+
"month_number" : None,
|
276 |
+
"month_name" : None
|
277 |
+
}
|
278 |
+
|
279 |
+
|
280 |
async def find_relevant_plots(state: State, llm, plots: list[Plot]) -> list[str]:
|
281 |
print("---- Find relevant plots ----")
|
282 |
relevant_plots = await detect_relevant_plots(state['user_input'], llm, plots)
|
|
|
287 |
relevant_tables = await detect_relevant_tables(state['user_input'], plot, llm, tables)
|
288 |
return relevant_tables
|
289 |
|
290 |
+
async def find_param(state: State, param_name: str, mode: Literal['DRIAS', 'IPCC'] = 'DRIAS') -> dict[str, Optional[str]] | Location | None:
|
291 |
+
"""
|
292 |
+
Retrieves a specific parameter (location, year, month, etc.) from the user's input using the appropriate extraction method.
|
293 |
|
294 |
Args:
|
295 |
+
state (State): The current state containing at least the user's input under 'user_input'.
|
296 |
+
param_name (str): The name of the parameter to extract. Supported: 'location', 'year', 'month'.
|
297 |
+
mode (Literal['DRIAS', 'IPCC']): The data mode to use for location extraction.
|
298 |
|
299 |
Returns:
|
300 |
+
- For 'location': a Location object (dict with keys like 'location', 'latitude', etc.), or None if not found.
|
301 |
+
- For 'year': a dict {'year': year or None}.
|
302 |
+
- For 'month': a dict {'month_number': str or None, 'month_name': str or None}.
|
303 |
+
- None if the parameter is not recognized or not found.
|
304 |
+
|
305 |
+
Example:
|
306 |
+
>>> await find_param(state, 'location')
|
307 |
+
{'location': 'Paris', 'latitude': ..., ...}
|
308 |
+
>>> await find_param(state, 'year')
|
309 |
+
{'year': '2050'}
|
310 |
+
>>> await find_param(state, 'month')
|
311 |
+
{'month_number': '7', 'month_name': 'July'}
|
312 |
"""
|
313 |
if param_name == 'location':
|
314 |
location = await find_location(state['user_input'], mode)
|
|
|
316 |
if param_name == 'year':
|
317 |
year = await find_year(state['user_input'])
|
318 |
return {'year': year}
|
319 |
+
if param_name == 'month':
|
320 |
+
month = await find_month(state['user_input'])
|
321 |
+
return month
|
322 |
+
return None
|
@@ -6,16 +6,22 @@ from climateqa.engine.talk_to_data.config import IPCC_DATASET_URL
|
|
6 |
IPCC_TABLES = [
|
7 |
"mean_temperature",
|
8 |
"total_precipitation",
|
|
|
|
|
9 |
]
|
10 |
|
11 |
IPCC_INDICATOR_COLUMNS_PER_TABLE = {
|
12 |
"mean_temperature": "mean_temperature",
|
13 |
-
"total_precipitation": "total_precipitation"
|
|
|
|
|
14 |
}
|
15 |
|
16 |
IPCC_INDICATOR_TO_UNIT = {
|
17 |
"mean_temperature": "°C",
|
18 |
-
"total_precipitation": "mm/day"
|
|
|
|
|
19 |
}
|
20 |
|
21 |
IPCC_SCENARIO = [
|
@@ -30,7 +36,8 @@ IPCC_MODELS = []
|
|
30 |
|
31 |
IPCC_PLOT_PARAMETERS = [
|
32 |
'year',
|
33 |
-
'location'
|
|
|
34 |
]
|
35 |
|
36 |
MACRO_COUNTRIES = ['JP',
|
@@ -63,7 +70,9 @@ HUGE_MACRO_COUNTRIES = ['CL',
|
|
63 |
|
64 |
IPCC_INDICATOR_TO_COLORSCALE = {
|
65 |
"mean_temperature": TEMPERATURE_COLORSCALE,
|
66 |
-
"total_precipitation": PRECIPITATION_COLORSCALE
|
|
|
|
|
67 |
}
|
68 |
|
69 |
IPCC_UI_TEXT = """
|
@@ -77,9 +86,12 @@ By default, we take the **mediane of each climate model**.
|
|
77 |
Current available charts :
|
78 |
- Yearly evolution of an indicator at a specific location (historical + SSP Projections)
|
79 |
- Yearly spatial distribution of an indicator in a specific country
|
|
|
80 |
|
81 |
Current available indicators :
|
82 |
- Mean temperature
|
|
|
|
|
83 |
- Total precipitation
|
84 |
|
85 |
For example, you can ask:
|
|
|
6 |
IPCC_TABLES = [
|
7 |
"mean_temperature",
|
8 |
"total_precipitation",
|
9 |
+
"minimum_temperature",
|
10 |
+
"maximum_temperature"
|
11 |
]
|
12 |
|
13 |
IPCC_INDICATOR_COLUMNS_PER_TABLE = {
|
14 |
"mean_temperature": "mean_temperature",
|
15 |
+
"total_precipitation": "total_precipitation",
|
16 |
+
"minimum_temperature": "minimum_temperature",
|
17 |
+
"maximum_temperature": "maximum_temperature"
|
18 |
}
|
19 |
|
20 |
IPCC_INDICATOR_TO_UNIT = {
|
21 |
"mean_temperature": "°C",
|
22 |
+
"total_precipitation": "mm/day",
|
23 |
+
"minimum_temperature": "°C",
|
24 |
+
"maximum_temperature": "°C"
|
25 |
}
|
26 |
|
27 |
IPCC_SCENARIO = [
|
|
|
36 |
|
37 |
IPCC_PLOT_PARAMETERS = [
|
38 |
'year',
|
39 |
+
'location',
|
40 |
+
'month'
|
41 |
]
|
42 |
|
43 |
MACRO_COUNTRIES = ['JP',
|
|
|
70 |
|
71 |
IPCC_INDICATOR_TO_COLORSCALE = {
|
72 |
"mean_temperature": TEMPERATURE_COLORSCALE,
|
73 |
+
"total_precipitation": PRECIPITATION_COLORSCALE,
|
74 |
+
"minimum_temperature": TEMPERATURE_COLORSCALE,
|
75 |
+
"maximum_temperature": TEMPERATURE_COLORSCALE,
|
76 |
}
|
77 |
|
78 |
IPCC_UI_TEXT = """
|
|
|
86 |
Current available charts :
|
87 |
- Yearly evolution of an indicator at a specific location (historical + SSP Projections)
|
88 |
- Yearly spatial distribution of an indicator in a specific country
|
89 |
+
- Yearly evolution of an indicator in a specific month at a specific location (historical + SSP Projections)
|
90 |
|
91 |
Current available indicators :
|
92 |
- Mean temperature
|
93 |
+
- Minimum temperature
|
94 |
+
- Maximum temperature
|
95 |
- Total precipitation
|
96 |
|
97 |
For example, you can ask:
|
@@ -47,4 +47,27 @@ Each grid point is colored according to the value of the indicator ({unit}), all
|
|
47 |
- For each grid point of {location} country ({country_name}), the value of {indicator} in {year} and for the selected scenario is extracted and mapped to its geographic coordinates.
|
48 |
- The grid points correspond to 1-degree squares centered on the grid points of the IPCC dataset. Each grid point has been mapped to a country using [**reverse_geocoder**](https://github.com/thampiman/reverse-geocoder).
|
49 |
- The coordinates used for each region are those of the closest available grid point in the IPCC database, which uses a regular grid with a spatial resolution of 1 degree.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
"""
|
|
|
47 |
- For each grid point of {location} country ({country_name}), the value of {indicator} in {year} and for the selected scenario is extracted and mapped to its geographic coordinates.
|
48 |
- The grid points correspond to 1-degree squares centered on the grid points of the IPCC dataset. Each grid point has been mapped to a country using [**reverse_geocoder**](https://github.com/thampiman/reverse-geocoder).
|
49 |
- The coordinates used for each region are those of the closest available grid point in the IPCC database, which uses a regular grid with a spatial resolution of 1 degree.
|
50 |
+
"""
|
51 |
+
|
52 |
+
def indicator_specific_month_evolution_informations(
|
53 |
+
indicator: str,
|
54 |
+
params: dict[str, str]
|
55 |
+
) -> str:
|
56 |
+
if "location" not in params:
|
57 |
+
raise ValueError('"location" must be provided in params')
|
58 |
+
location = params["location"]
|
59 |
+
if "month_name" not in params:
|
60 |
+
raise ValueError('"month_name" must be provided in params')
|
61 |
+
month = params["month_name"]
|
62 |
+
unit = IPCC_INDICATOR_TO_UNIT[indicator]
|
63 |
+
return f"""
|
64 |
+
This plot shows how the climate indicator **{indicator}** evolves over time in **{location}** for the month of **{month}**.
|
65 |
+
It combines both historical (from 1950 to 2015) observations and future (from 2016 to 2100) projections for the different SSP climate scenarios (SSP126, SSP245, SSP370 and SSP585).
|
66 |
+
The x-axis represents the years (from 1950 to 2100), and the y-axis shows the value of the {indicator} ({unit}) for the selected month.
|
67 |
+
Each line corresponds to a different scenario, allowing you to compare how {indicator} for month {month} might change under various future conditions.
|
68 |
+
|
69 |
+
**Data source:**
|
70 |
+
- The data comes from the IPCC climate datasets (Parquet files) for the relevant indicator, location, and month.
|
71 |
+
- For each year and scenario, the value of {indicator} for month {month} is extracted for the selected location.
|
72 |
+
- The coordinates used for {location} correspond to the closest available point in the IPCC database, which uses a regular grid with a spatial resolution of 1 degree.
|
73 |
"""
|
@@ -5,8 +5,8 @@ import pandas as pd
|
|
5 |
import geojson
|
6 |
|
7 |
from climateqa.engine.talk_to_data.ipcc.config import IPCC_INDICATOR_TO_COLORSCALE, IPCC_INDICATOR_TO_UNIT, IPCC_SCENARIO
|
8 |
-
from climateqa.engine.talk_to_data.ipcc.plot_informations import choropleth_map_informations, indicator_evolution_informations
|
9 |
-
from climateqa.engine.talk_to_data.ipcc.queries import indicator_for_given_year_query, indicator_per_year_at_location_query
|
10 |
from climateqa.engine.talk_to_data.objects.plot import Plot
|
11 |
|
12 |
def generate_geojson_polygons(latitudes: list[float], longitudes: list[float], indicators: list[float]) -> geojson.FeatureCollection:
|
@@ -102,6 +102,82 @@ indicator_evolution_at_location_historical_and_projections: Plot = {
|
|
102 |
"short_name": "Evolution"
|
103 |
}
|
104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
def plot_choropleth_map_of_country_indicator_for_specific_year(
|
106 |
params: dict,
|
107 |
) -> Callable[[pd.DataFrame], Figure]:
|
@@ -167,6 +243,7 @@ def plot_choropleth_map_of_country_indicator_for_specific_year(
|
|
167 |
|
168 |
return plot_data
|
169 |
|
|
|
170 |
choropleth_map_of_country_indicator_for_specific_year: Plot = {
|
171 |
"name": "Choropleth Map of a Country's Indicator Distribution for a Specific Year",
|
172 |
"description": (
|
@@ -185,5 +262,6 @@ choropleth_map_of_country_indicator_for_specific_year: Plot = {
|
|
185 |
|
186 |
IPCC_PLOTS = [
|
187 |
indicator_evolution_at_location_historical_and_projections,
|
188 |
-
choropleth_map_of_country_indicator_for_specific_year
|
|
|
189 |
]
|
|
|
5 |
import geojson
|
6 |
|
7 |
from climateqa.engine.talk_to_data.ipcc.config import IPCC_INDICATOR_TO_COLORSCALE, IPCC_INDICATOR_TO_UNIT, IPCC_SCENARIO
|
8 |
+
from climateqa.engine.talk_to_data.ipcc.plot_informations import choropleth_map_informations, indicator_evolution_informations, indicator_specific_month_evolution_informations
|
9 |
+
from climateqa.engine.talk_to_data.ipcc.queries import indicator_for_given_year_query, indicator_per_year_and_specific_month_at_location_query, indicator_per_year_at_location_query
|
10 |
from climateqa.engine.talk_to_data.objects.plot import Plot
|
11 |
|
12 |
def generate_geojson_polygons(latitudes: list[float], longitudes: list[float], indicators: list[float]) -> geojson.FeatureCollection:
|
|
|
102 |
"short_name": "Evolution"
|
103 |
}
|
104 |
|
105 |
+
def plot_indicator_monthly_evolution_at_location(
|
106 |
+
params: dict,
|
107 |
+
) -> Callable[[pd.DataFrame], Figure]:
|
108 |
+
"""
|
109 |
+
Returns a function that generates a line plot showing the evolution of a climate indicator
|
110 |
+
for a specific month over time at a specific location, including both historical data
|
111 |
+
and future projections for different climate scenarios.
|
112 |
+
|
113 |
+
Args:
|
114 |
+
params (dict): Dictionary with:
|
115 |
+
- indicator_column (str): Name of the climate indicator column to plot.
|
116 |
+
- location (str): Location (e.g., country, city) for which to plot the indicator.
|
117 |
+
- month (str): Month name to plot.
|
118 |
+
|
119 |
+
Returns:
|
120 |
+
Callable[[pd.DataFrame], Figure]: Function that takes a DataFrame and returns a Plotly Figure.
|
121 |
+
"""
|
122 |
+
indicator = params["indicator_column"]
|
123 |
+
location = params["location"]
|
124 |
+
month = params["month_name"]
|
125 |
+
indicator_label = " ".join(word.capitalize() for word in indicator.split("_"))
|
126 |
+
unit = IPCC_INDICATOR_TO_UNIT.get(indicator, "")
|
127 |
+
|
128 |
+
def plot_data(df: pd.DataFrame) -> Figure:
|
129 |
+
df = df.sort_values(by='year')
|
130 |
+
years = df['year'].astype(int).tolist()
|
131 |
+
indicators = df[indicator].astype(float).tolist()
|
132 |
+
scenarios = df['scenario'].astype(str).tolist()
|
133 |
+
|
134 |
+
# Find last historical value for continuity
|
135 |
+
last_historical = [(y, v) for y, v, s in zip(years, indicators, scenarios) if s == 'historical']
|
136 |
+
last_historical_year, last_historical_indicator = last_historical[-1] if last_historical else (None, None)
|
137 |
+
|
138 |
+
fig = go.Figure()
|
139 |
+
for scenario in IPCC_SCENARIO:
|
140 |
+
x = [y for y, s in zip(years, scenarios) if s == scenario]
|
141 |
+
y = [v for v, s in zip(indicators, scenarios) if s == scenario]
|
142 |
+
# Connect historical to scenario
|
143 |
+
if scenario != 'historical' and last_historical_indicator is not None:
|
144 |
+
x = [last_historical_year] + x
|
145 |
+
y = [last_historical_indicator] + y
|
146 |
+
fig.add_trace(go.Scatter(
|
147 |
+
x=x,
|
148 |
+
y=y,
|
149 |
+
mode='lines',
|
150 |
+
name=scenario
|
151 |
+
))
|
152 |
+
|
153 |
+
fig.update_layout(
|
154 |
+
title=f'Evolution of {indicator_label} in {month} in {location} (Historical + SSP Scenarios)',
|
155 |
+
xaxis_title='Year',
|
156 |
+
yaxis_title=f'{indicator_label} ({unit})',
|
157 |
+
legend_title='Scenario',
|
158 |
+
height=800,
|
159 |
+
)
|
160 |
+
return fig
|
161 |
+
|
162 |
+
return plot_data
|
163 |
+
|
164 |
+
|
165 |
+
indicator_specific_month_evolution_at_location: Plot = {
|
166 |
+
"name": "Indicator specific month Evolution at Location (Historical + Projections)",
|
167 |
+
"description": (
|
168 |
+
"Shows how a climate indicator (e.g., rainfall, temperature) for a specific month changes over time at a specific location, "
|
169 |
+
"including historical data and future projections. "
|
170 |
+
"Useful for questions about the value or trend of an indicator for a given month at a location, "
|
171 |
+
"such as 'How does July temperature evolve in Paris over time?'. "
|
172 |
+
"Parameters: indicator_column (the climate variable), location (e.g., country, city), month (1-12)."
|
173 |
+
),
|
174 |
+
"params": ["indicator_column", "location", "month"],
|
175 |
+
"plot_function": plot_indicator_monthly_evolution_at_location,
|
176 |
+
"sql_query": indicator_per_year_and_specific_month_at_location_query,
|
177 |
+
"plot_information": indicator_specific_month_evolution_informations,
|
178 |
+
"short_name": "Evolution for a specific month"
|
179 |
+
}
|
180 |
+
|
181 |
def plot_choropleth_map_of_country_indicator_for_specific_year(
|
182 |
params: dict,
|
183 |
) -> Callable[[pd.DataFrame], Figure]:
|
|
|
243 |
|
244 |
return plot_data
|
245 |
|
246 |
+
|
247 |
choropleth_map_of_country_indicator_for_specific_year: Plot = {
|
248 |
"name": "Choropleth Map of a Country's Indicator Distribution for a Specific Year",
|
249 |
"description": (
|
|
|
262 |
|
263 |
IPCC_PLOTS = [
|
264 |
indicator_evolution_at_location_historical_and_projections,
|
265 |
+
choropleth_map_of_country_indicator_for_specific_year,
|
266 |
+
indicator_specific_month_evolution_at_location
|
267 |
]
|
@@ -43,7 +43,7 @@ def indicator_per_year_at_location_query(
|
|
43 |
return ""
|
44 |
|
45 |
if country_code in MACRO_COUNTRIES:
|
46 |
-
table_path = f"'{IPCC_DATASET_URL}/{table.lower()}/{country_code}
|
47 |
sql_query = f"""
|
48 |
SELECT year, scenario, AVG({indicator_column}) as {indicator_column}
|
49 |
FROM {table_path}
|
@@ -52,7 +52,7 @@ def indicator_per_year_at_location_query(
|
|
52 |
ORDER BY year, scenario
|
53 |
"""
|
54 |
elif country_code in HUGE_MACRO_COUNTRIES:
|
55 |
-
table_path = f"'{IPCC_DATASET_URL}/{table.lower()}/{country_code}
|
56 |
sql_query = f"""
|
57 |
SELECT year, scenario, {indicator_column}
|
58 |
FROM {table_path}
|
@@ -75,6 +75,66 @@ def indicator_per_year_at_location_query(
|
|
75 |
"""
|
76 |
return sql_query.strip()
|
77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
class IndicatorForGivenYearQueryParams(TypedDict, total=False):
|
79 |
"""
|
80 |
Parameters for querying an indicator's values across locations for a specific year.
|
@@ -110,7 +170,7 @@ def indicator_for_given_year_query(
|
|
110 |
return ""
|
111 |
|
112 |
if country_code in MACRO_COUNTRIES:
|
113 |
-
table_path = f"'{IPCC_DATASET_URL}/{table.lower()}/{country_code}
|
114 |
sql_query = f"""
|
115 |
SELECT latitude, longitude, scenario, AVG({indicator_column}) as {indicator_column}
|
116 |
FROM {table_path}
|
@@ -119,7 +179,7 @@ def indicator_for_given_year_query(
|
|
119 |
ORDER BY latitude, longitude, scenario
|
120 |
"""
|
121 |
elif country_code in HUGE_MACRO_COUNTRIES:
|
122 |
-
table_path = f"'{IPCC_DATASET_URL}/{table.lower()}/{country_code}
|
123 |
sql_query = f"""
|
124 |
SELECT latitude, longitude, scenario, {indicator_column}
|
125 |
FROM {table_path}
|
@@ -141,4 +201,4 @@ def indicator_for_given_year_query(
|
|
141 |
ORDER BY latitude, longitude, scenario
|
142 |
"""
|
143 |
|
144 |
-
return sql_query.strip()
|
|
|
43 |
return ""
|
44 |
|
45 |
if country_code in MACRO_COUNTRIES:
|
46 |
+
table_path = f"'{IPCC_DATASET_URL}/{table.lower()}/{country_code}_monthly_macro.parquet'"
|
47 |
sql_query = f"""
|
48 |
SELECT year, scenario, AVG({indicator_column}) as {indicator_column}
|
49 |
FROM {table_path}
|
|
|
52 |
ORDER BY year, scenario
|
53 |
"""
|
54 |
elif country_code in HUGE_MACRO_COUNTRIES:
|
55 |
+
table_path = f"'{IPCC_DATASET_URL}/{table.lower()}/{country_code}_annualy_macro.parquet'"
|
56 |
sql_query = f"""
|
57 |
SELECT year, scenario, {indicator_column}
|
58 |
FROM {table_path}
|
|
|
75 |
"""
|
76 |
return sql_query.strip()
|
77 |
|
78 |
+
class IndicatorPerYearAndSpecificMonthAtLocationQueryParams(TypedDict, total=False):
|
79 |
+
"""
|
80 |
+
Parameters for querying the evolution of an indicator per year for a specific month at a specific location.
|
81 |
+
|
82 |
+
Attributes:
|
83 |
+
indicator_column (str): Name of the climate indicator column.
|
84 |
+
latitude (str): Latitude of the location.
|
85 |
+
longitude (str): Longitude of the location.
|
86 |
+
country_code (str): Country code.
|
87 |
+
month (str): Month targeted
|
88 |
+
"""
|
89 |
+
indicator_column: str
|
90 |
+
latitude: str
|
91 |
+
longitude: str
|
92 |
+
country_code: str
|
93 |
+
month: str
|
94 |
+
|
95 |
+
def indicator_per_year_and_specific_month_at_location_query(
|
96 |
+
table: str, params: IndicatorPerYearAndSpecificMonthAtLocationQueryParams
|
97 |
+
) -> str:
|
98 |
+
"""
|
99 |
+
Builds an SQL query to get the evolution of an indicator per year for a specific month at a specific location.
|
100 |
+
|
101 |
+
Args:
|
102 |
+
table (str): SQL table of the indicator.
|
103 |
+
params (dict): Dictionary with required params:
|
104 |
+
- indicator_column (str)
|
105 |
+
- latitude (str or float)
|
106 |
+
- longitude (str or float)
|
107 |
+
- month (int)
|
108 |
+
|
109 |
+
Returns:
|
110 |
+
str: The SQL query string.
|
111 |
+
"""
|
112 |
+
indicator_column = params.get("indicator_column")
|
113 |
+
latitude = params.get("latitude")
|
114 |
+
longitude = params.get("longitude")
|
115 |
+
country_code = params.get("country_code")
|
116 |
+
month = params.get('month_number')
|
117 |
+
|
118 |
+
if not all([indicator_column, latitude, longitude, country_code, month]):
|
119 |
+
return ""
|
120 |
+
|
121 |
+
if country_code in (MACRO_COUNTRIES+HUGE_MACRO_COUNTRIES):
|
122 |
+
table_path = f"'{IPCC_DATASET_URL}/{table.lower()}/{country_code}_monthly_macro.parquet'"
|
123 |
+
sql_query = f"""
|
124 |
+
SELECT year, scenario, {indicator_column}
|
125 |
+
FROM {table_path}
|
126 |
+
WHERE latitude = {latitude} AND longitude = {longitude} AND year >= 1950 AND month={month}
|
127 |
+
ORDER BY year, scenario
|
128 |
+
"""
|
129 |
+
else:
|
130 |
+
table_path = f"'{IPCC_DATASET_URL}/{table.lower()}/{country_code}.parquet'"
|
131 |
+
sql_query = f"""
|
132 |
+
SELECT year, scenario, MEDIAN({indicator_column}) AS {indicator_column}
|
133 |
+
FROM {table_path}
|
134 |
+
WHERE latitude = {latitude} AND longitude = {longitude} AND year >= 1950 AND month={month}
|
135 |
+
GROUP BY scenario, year
|
136 |
+
"""
|
137 |
+
return sql_query.strip()
|
138 |
class IndicatorForGivenYearQueryParams(TypedDict, total=False):
|
139 |
"""
|
140 |
Parameters for querying an indicator's values across locations for a specific year.
|
|
|
170 |
return ""
|
171 |
|
172 |
if country_code in MACRO_COUNTRIES:
|
173 |
+
table_path = f"'{IPCC_DATASET_URL}/{table.lower()}/{country_code}_monthly_macro.parquet'"
|
174 |
sql_query = f"""
|
175 |
SELECT latitude, longitude, scenario, AVG({indicator_column}) as {indicator_column}
|
176 |
FROM {table_path}
|
|
|
179 |
ORDER BY latitude, longitude, scenario
|
180 |
"""
|
181 |
elif country_code in HUGE_MACRO_COUNTRIES:
|
182 |
+
table_path = f"'{IPCC_DATASET_URL}/{table.lower()}/{country_code}_annualy_macro.parquet'"
|
183 |
sql_query = f"""
|
184 |
SELECT latitude, longitude, scenario, {indicator_column}
|
185 |
FROM {table_path}
|
|
|
201 |
ORDER BY latitude, longitude, scenario
|
202 |
"""
|
203 |
|
204 |
+
return sql_query.strip()
|
@@ -50,7 +50,7 @@ async def ask_drias(query: str, index_state: int = 0, user_id: str | None = None
|
|
50 |
|
51 |
if "error" in final_state and final_state["error"] != "":
|
52 |
# No Sql query, no dataframe, no figure, no plot information, empty sql queries list, empty result dataframes list, empty figures list, empty plot information list, index state = 0, empty table list, error message
|
53 |
-
return None, None, None, None, [], [], [], 0, [], final_state["error"]
|
54 |
|
55 |
sql_query = sql_queries[index_state]
|
56 |
dataframe = result_dataframes[index_state]
|
@@ -112,7 +112,7 @@ async def ask_ipcc(query: str, index_state: int = 0, user_id: str | None = None)
|
|
112 |
|
113 |
if "error" in final_state and final_state["error"] != "":
|
114 |
# No Sql query, no dataframe, no figure, no plot information, empty sql queries list, empty result dataframes list, empty figures list, empty plot information list, index state = 0, empty table list, error message
|
115 |
-
return None, None, None, None, [], [], [], 0, [], final_state["error"]
|
116 |
|
117 |
sql_query = sql_queries[index_state]
|
118 |
dataframe = result_dataframes[index_state]
|
|
|
50 |
|
51 |
if "error" in final_state and final_state["error"] != "":
|
52 |
# No Sql query, no dataframe, no figure, no plot information, empty sql queries list, empty result dataframes list, empty figures list, empty plot information list, index state = 0, empty table list, error message
|
53 |
+
return None, None, None, None, [], [], [], [], 0, [], final_state["error"]
|
54 |
|
55 |
sql_query = sql_queries[index_state]
|
56 |
dataframe = result_dataframes[index_state]
|
|
|
112 |
|
113 |
if "error" in final_state and final_state["error"] != "":
|
114 |
# No Sql query, no dataframe, no figure, no plot information, empty sql queries list, empty result dataframes list, empty figures list, empty plot information list, index state = 0, empty table list, error message
|
115 |
+
return None, None, None, None, [], [], [], [], 0, [], final_state["error"]
|
116 |
|
117 |
sql_query = sql_queries[index_state]
|
118 |
dataframe = result_dataframes[index_state]
|
@@ -125,11 +125,16 @@ async def drias_workflow(user_input: str) -> State:
|
|
125 |
'plot': plot,
|
126 |
'status': 'OK'
|
127 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
|
129 |
-
# Gather all required parameters
|
130 |
params = {}
|
131 |
-
for
|
132 |
-
param = await find_param(state, param_name, mode='DRIAS')
|
133 |
if param:
|
134 |
params.update(param)
|
135 |
|
|
|
125 |
'plot': plot,
|
126 |
'status': 'OK'
|
127 |
}
|
128 |
+
|
129 |
+
# Gather all required parameters in parallel
|
130 |
+
param_tasks = [
|
131 |
+
find_param(state, param_name, mode='DRIAS')
|
132 |
+
for param_name in DRIAS_PLOT_PARAMETERS
|
133 |
+
]
|
134 |
+
param_results = await asyncio.gather(*param_tasks)
|
135 |
|
|
|
136 |
params = {}
|
137 |
+
for param in param_results:
|
|
|
138 |
if param:
|
139 |
params.update(param)
|
140 |
|
@@ -125,12 +125,17 @@ async def ipcc_workflow(user_input: str) -> State:
|
|
125 |
}
|
126 |
|
127 |
# Gather all required parameters
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
params = {}
|
129 |
-
for
|
130 |
-
param = await find_param(state, param_name, mode='IPCC')
|
131 |
if param:
|
132 |
params.update(param)
|
133 |
-
|
134 |
# Process all outputs in parallel using process_output
|
135 |
tasks = [
|
136 |
process_output(output_title, output['table'], output['plot'], params.copy())
|
@@ -152,10 +157,18 @@ async def ipcc_workflow(user_input: str) -> State:
|
|
152 |
|
153 |
# Set error messages if needed
|
154 |
if not errors['have_relevant_table']:
|
155 |
-
state['error'] =
|
|
|
|
|
|
|
156 |
elif not errors['have_sql_query']:
|
157 |
-
state['error'] =
|
|
|
|
|
|
|
158 |
elif not errors['have_dataframe']:
|
159 |
-
state['error'] =
|
160 |
-
|
|
|
|
|
161 |
return state
|
|
|
125 |
}
|
126 |
|
127 |
# Gather all required parameters
|
128 |
+
param_tasks = [
|
129 |
+
find_param(state, param_name, mode='IPCC')
|
130 |
+
for param_name in IPCC_PLOT_PARAMETERS
|
131 |
+
]
|
132 |
+
param_results = await asyncio.gather(*param_tasks)
|
133 |
+
|
134 |
params = {}
|
135 |
+
for param in param_results:
|
|
|
136 |
if param:
|
137 |
params.update(param)
|
138 |
+
|
139 |
# Process all outputs in parallel using process_output
|
140 |
tasks = [
|
141 |
process_output(output_title, output['table'], output['plot'], params.copy())
|
|
|
157 |
|
158 |
# Set error messages if needed
|
159 |
if not errors['have_relevant_table']:
|
160 |
+
state['error'] = (
|
161 |
+
"Sorry, I couldn't find any relevant table in our database to answer your question.\n"
|
162 |
+
"Try asking about a different climate indicator like temperature or precipitation."
|
163 |
+
)
|
164 |
elif not errors['have_sql_query']:
|
165 |
+
state['error'] = (
|
166 |
+
"Sorry, I couldn't generate a relevant SQL query to answer your question.\n"
|
167 |
+
"Try rephrasing your question to focus on a specific location, a year, or a month."
|
168 |
+
)
|
169 |
elif not errors['have_dataframe']:
|
170 |
+
state['error'] = (
|
171 |
+
"Sorry, there is no data in our tables that can answer your question.\n"
|
172 |
+
"Try asking about a more common location, or a different year."
|
173 |
+
)
|
174 |
return state
|
@@ -1,11 +1,11 @@
|
|
1 |
-
#
|
2 |
-
# More info at https://docs.pinecone.io/docs/langchain
|
3 |
-
# And https://python.langchain.com/docs/integrations/vectorstores/pinecone
|
4 |
import os
|
5 |
-
from pinecone import Pinecone
|
6 |
-
from langchain_community.vectorstores import Pinecone as PineconeVectorstore
|
7 |
|
8 |
-
#
|
|
|
|
|
|
|
|
|
9 |
try:
|
10 |
from dotenv import load_dotenv
|
11 |
load_dotenv()
|
@@ -13,44 +13,136 @@ except:
|
|
13 |
pass
|
14 |
|
15 |
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
)
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
# def get_pinecone_retriever(vectorstore,k = 10,namespace = "vectors",sources = ["IPBES","IPCC"]):
|
42 |
-
|
43 |
-
# assert isinstance(sources,list)
|
44 |
-
|
45 |
-
# # Check if all elements in the list are either IPCC or IPBES
|
46 |
-
# filter = {
|
47 |
-
# "source": { "$in":sources},
|
48 |
-
# }
|
49 |
-
|
50 |
-
# retriever = vectorstore.as_retriever(search_kwargs={
|
51 |
-
# "k": k,
|
52 |
-
# "namespace":"vectors",
|
53 |
-
# "filter":filter
|
54 |
-
# })
|
55 |
|
56 |
-
# return retriever
|
|
|
1 |
+
# Azure AI Search: https://python.langchain.com/docs/integrations/vectorstores/azuresearch
|
|
|
|
|
2 |
import os
|
|
|
|
|
3 |
|
4 |
+
# Azure AI Search imports
|
5 |
+
from langchain_community.vectorstores.azuresearch import AzureSearch
|
6 |
+
|
7 |
+
|
8 |
+
# Load environment variables
|
9 |
try:
|
10 |
from dotenv import load_dotenv
|
11 |
load_dotenv()
|
|
|
13 |
pass
|
14 |
|
15 |
|
16 |
+
class AzureSearchWrapper:
|
17 |
+
"""
|
18 |
+
Wrapper class for Azure AI Search vectorstore to handle filter conversion.
|
19 |
+
|
20 |
+
This wrapper automatically converts dictionary-style filters to Azure Search OData filter format,
|
21 |
+
ensuring seamless compatibility when switching from other providers.
|
22 |
+
"""
|
23 |
+
|
24 |
+
def __init__(self, azure_search_vectorstore):
|
25 |
+
self.vectorstore = azure_search_vectorstore
|
26 |
+
|
27 |
+
def __getattr__(self, name):
|
28 |
+
"""Delegate all other attributes to the wrapped vectorstore."""
|
29 |
+
return getattr(self.vectorstore, name)
|
30 |
+
|
31 |
+
def _convert_dict_filter_to_odata(self, filter_dict):
|
32 |
+
"""
|
33 |
+
Convert dictionary-style filters to Azure Search OData filter format.
|
34 |
+
|
35 |
+
Args:
|
36 |
+
filter_dict (dict): Dictionary-style filter
|
37 |
+
|
38 |
+
Returns:
|
39 |
+
str: OData filter string
|
40 |
+
"""
|
41 |
+
if not filter_dict:
|
42 |
+
return None
|
43 |
+
|
44 |
+
conditions = []
|
45 |
+
|
46 |
+
for key, value in filter_dict.items():
|
47 |
+
if key.endswith('_exclude'):
|
48 |
+
# Handle exclusion filters (e.g., report_type_exclude)
|
49 |
+
base_key = key.replace('_exclude', '')
|
50 |
+
if isinstance(value, list):
|
51 |
+
if len(value) == 1:
|
52 |
+
conditions.append(f"{base_key} ne '{value[0]}'")
|
53 |
+
else:
|
54 |
+
exclude_conditions = [f"{base_key} ne '{v}'" for v in value]
|
55 |
+
conditions.append(f"({' and '.join(exclude_conditions)})")
|
56 |
+
else:
|
57 |
+
conditions.append(f"{base_key} ne '{value}'")
|
58 |
+
elif isinstance(value, list):
|
59 |
+
# Handle list values (equivalent to $in operator)
|
60 |
+
if len(value) == 1:
|
61 |
+
conditions.append(f"{key} eq '{value[0]}'")
|
62 |
+
else:
|
63 |
+
list_conditions = [f"{key} eq '{v}'" for v in value]
|
64 |
+
conditions.append(f"({' or '.join(list_conditions)})")
|
65 |
+
else:
|
66 |
+
# Handle single values
|
67 |
+
conditions.append(f"{key} eq '{value}'")
|
68 |
+
|
69 |
+
return " and ".join(conditions) if conditions else None
|
70 |
+
|
71 |
+
def similarity_search_with_score(self, query, k=4, filter=None, **kwargs):
|
72 |
+
"""Override similarity_search_with_score to convert filters."""
|
73 |
+
if filter is not None:
|
74 |
+
filter = self._convert_dict_filter_to_odata(filter)
|
75 |
+
|
76 |
+
return self.vectorstore.hybrid_search_with_score(
|
77 |
+
query=query, k=k, filters=filter, **kwargs
|
78 |
+
)
|
79 |
+
|
80 |
+
|
81 |
+
def similarity_search(self, query, k=4, filter=None, **kwargs):
|
82 |
+
"""Override similarity_search to convert filters."""
|
83 |
+
if filter is not None:
|
84 |
+
filter = self._convert_dict_filter_to_odata(filter)
|
85 |
+
|
86 |
+
return self.vectorstore.similarity_search(
|
87 |
+
query=query, k=k, filter=filter, **kwargs
|
88 |
+
)
|
89 |
+
|
90 |
+
def similarity_search_by_vector(self, embedding, k=4, filter=None, **kwargs):
|
91 |
+
"""Override similarity_search_by_vector to convert filters."""
|
92 |
+
if filter is not None:
|
93 |
+
filter = self._convert_dict_filter_to_odata(filter)
|
94 |
+
|
95 |
+
return self.vectorstore.similarity_search_by_vector(
|
96 |
+
embedding=embedding, k=k, filter=filter, **kwargs
|
97 |
+
)
|
98 |
+
|
99 |
+
def as_retriever(self, search_type="similarity", search_kwargs=None, **kwargs):
|
100 |
+
"""Override as_retriever to handle filter conversion in search_kwargs."""
|
101 |
+
if search_kwargs and "filter" in search_kwargs:
|
102 |
+
# Convert the filter in search_kwargs
|
103 |
+
search_kwargs = search_kwargs.copy() # Don't modify the original
|
104 |
+
if search_kwargs["filter"] is not None:
|
105 |
+
search_kwargs["filter"] = self._convert_dict_filter_to_odata(search_kwargs["filter"])
|
106 |
+
|
107 |
+
return self.vectorstore.as_retriever(
|
108 |
+
search_type=search_type, search_kwargs=search_kwargs, **kwargs
|
109 |
+
)
|
110 |
+
|
111 |
+
|
112 |
+
def get_azure_search_vectorstore(embeddings, text_key="content", index_name=None):
|
113 |
+
"""
|
114 |
+
Create an Azure AI Search vectorstore instance.
|
115 |
+
|
116 |
+
Args:
|
117 |
+
embeddings: The embeddings function to use
|
118 |
+
text_key: The key for text content in the payload (default: "content")
|
119 |
+
index_name: The name of the Azure Search index
|
120 |
+
|
121 |
+
Returns:
|
122 |
+
AzureSearchWrapper: A wrapped Azure AI Search vectorstore instance with filter compatibility
|
123 |
+
"""
|
124 |
+
# Get Azure AI Search configuration from environment variables
|
125 |
+
azure_search_endpoint = os.getenv("AI_SEARCH_INDEX_ENDPOINT")
|
126 |
+
azure_search_key = os.getenv("AI_SEARCH_KEY")
|
127 |
+
|
128 |
+
if not azure_search_endpoint:
|
129 |
+
raise ValueError("AI_SEARCH_INDEX_ENDPOINT environment variable is required")
|
130 |
+
|
131 |
+
if not azure_search_key:
|
132 |
+
raise ValueError("AI_SEARCH_KEY environment variable is required")
|
133 |
+
|
134 |
+
if not index_name:
|
135 |
+
raise ValueError("index_name must be provided for Azure Search")
|
136 |
+
|
137 |
+
# Create Azure Search vectorstore
|
138 |
+
vectorstore = AzureSearch(
|
139 |
+
azure_search_endpoint=azure_search_endpoint,
|
140 |
+
azure_search_key=azure_search_key,
|
141 |
+
index_name=index_name,
|
142 |
+
embedding_function=embeddings.embed_query,
|
143 |
+
content_key=text_key,
|
144 |
)
|
145 |
+
|
146 |
+
# Wrap the vectorstore to handle filter conversion
|
147 |
+
return AzureSearchWrapper(vectorstore)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
|
|
@@ -25,7 +25,7 @@ def remove_duplicates_keep_highest_score(documents):
|
|
25 |
unique_docs = {}
|
26 |
|
27 |
for doc in documents:
|
28 |
-
doc_id = doc.metadata.get('
|
29 |
if doc_id in unique_docs:
|
30 |
if doc.metadata['reranking_score'] > unique_docs[doc_id].metadata['reranking_score']:
|
31 |
unique_docs[doc_id] = doc
|
|
|
25 |
unique_docs = {}
|
26 |
|
27 |
for doc in documents:
|
28 |
+
doc_id = doc.metadata.get('id')
|
29 |
if doc_id in unique_docs:
|
30 |
if doc.metadata['reranking_score'] > unique_docs[doc_id].metadata['reranking_score']:
|
31 |
unique_docs[doc_id] = doc
|
@@ -68,6 +68,8 @@ def show_filter_by_scenario(table_names, index_state, dataframes):
|
|
68 |
return gr.update(visible=False)
|
69 |
|
70 |
def filter_by_scenario(dataframes, figures, table_names, index_state, scenario):
|
|
|
|
|
71 |
df = dataframes[index_state]
|
72 |
if not table_names[index_state].startswith("Map"):
|
73 |
return df, figures[index_state](df)
|
|
|
68 |
return gr.update(visible=False)
|
69 |
|
70 |
def filter_by_scenario(dataframes, figures, table_names, index_state, scenario):
|
71 |
+
if len(dataframes) == 0:
|
72 |
+
return None, None
|
73 |
df = dataframes[index_state]
|
74 |
if not table_names[index_state].startswith("Map"):
|
75 |
return df, figures[index_state](df)
|
@@ -1,6 +1,9 @@
|
|
1 |
gradio==5.0.2
|
2 |
azure-storage-file-share==12.11.1
|
3 |
azure-storage-blob==12.23.0
|
|
|
|
|
|
|
4 |
python-dotenv==1.0.0
|
5 |
langchain==0.2.1
|
6 |
langchain_openai==0.1.7
|
|
|
1 |
gradio==5.0.2
|
2 |
azure-storage-file-share==12.11.1
|
3 |
azure-storage-blob==12.23.0
|
4 |
+
# Azure AI Search support
|
5 |
+
azure-search-documents>=11.4.0
|
6 |
+
azure-core>=1.29.0
|
7 |
python-dotenv==1.0.0
|
8 |
langchain==0.2.1
|
9 |
langchain_openai==0.1.7
|
The diff for this file is too large to render.
See raw diff
|
|
@@ -661,7 +661,6 @@ a {
|
|
661 |
|
662 |
#sql-query textarea{
|
663 |
min-height: 200px !important;
|
664 |
-
|
665 |
}
|
666 |
|
667 |
#sql-query span{
|
|
|
661 |
|
662 |
#sql-query textarea{
|
663 |
min-height: 200px !important;
|
|
|
664 |
}
|
665 |
|
666 |
#sql-query span{
|