Spaces:
Sleeping
Sleeping
update politique specific database
Browse files- app.py +21 -1
- spinoza_project/source/frontend/utils.py +16 -0
app.py
CHANGED
@@ -15,6 +15,7 @@ from spinoza_project.source.frontend.utils import (
|
|
15 |
make_html_source,
|
16 |
make_html_presse_source,
|
17 |
make_html_afp_source,
|
|
|
18 |
parse_output_llm_with_sources,
|
19 |
init_env,
|
20 |
)
|
@@ -53,9 +54,10 @@ bdd_afp = get_vectorstore_api("afp")
|
|
53 |
qdrants = {
|
54 |
tab: pickle_to_document_store(
|
55 |
hf_hub_download(
|
56 |
-
repo_id="
|
57 |
filename=f"database_{tab}.pickle",
|
58 |
repo_type="dataset",
|
|
|
59 |
)
|
60 |
)
|
61 |
for tab in config["prompt_naming"]
|
@@ -243,6 +245,24 @@ def get_sources(
|
|
243 |
]
|
244 |
)
|
245 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
246 |
else:
|
247 |
sources = qdrants[
|
248 |
config["source_mapping"][tab]
|
|
|
15 |
make_html_source,
|
16 |
make_html_presse_source,
|
17 |
make_html_afp_source,
|
18 |
+
make_html_politique_source,
|
19 |
parse_output_llm_with_sources,
|
20 |
init_env,
|
21 |
)
|
|
|
54 |
qdrants = {
|
55 |
tab: pickle_to_document_store(
|
56 |
hf_hub_download(
|
57 |
+
repo_id="TestSpinoza/spinoza-database",
|
58 |
filename=f"database_{tab}.pickle",
|
59 |
repo_type="dataset",
|
60 |
+
force_download=True,
|
61 |
)
|
62 |
)
|
63 |
for tab in config["prompt_naming"]
|
|
|
245 |
]
|
246 |
)
|
247 |
|
248 |
+
elif tab == "Documents Stratégiques":
|
249 |
+
sources = qdrants[
|
250 |
+
config["source_mapping"][tab]
|
251 |
+
].similarity_search_with_relevance_scores(
|
252 |
+
config["query_preprompt"]
|
253 |
+
+ question.replace("<p>", "").replace("</p>\n", ""),
|
254 |
+
k=k,
|
255 |
+
)
|
256 |
+
sources = [
|
257 |
+
(doc, score) for doc, score in sources if score >= min_similarity
|
258 |
+
]
|
259 |
+
formated.extend(
|
260 |
+
[
|
261 |
+
make_html_politique_source(source[0], j, source[1], config)
|
262 |
+
for j, source in zip(range(k * i + 1, k * (i + 1) + 1), sources)
|
263 |
+
]
|
264 |
+
)
|
265 |
+
|
266 |
else:
|
267 |
sources = qdrants[
|
268 |
config["source_mapping"][tab]
|
spinoza_project/source/frontend/utils.py
CHANGED
@@ -107,6 +107,22 @@ def make_html_afp_source(source, i, score):
|
|
107 |
"""
|
108 |
|
109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
def make_html_source(source, i, score, config):
|
111 |
meta = source.metadata
|
112 |
return f"""
|
|
|
107 |
"""
|
108 |
|
109 |
|
110 |
+
def make_html_politique_source(source, i, score, config):
|
111 |
+
meta = source.metadata
|
112 |
+
return f"""
|
113 |
+
<div class="card" id="doc{i}">
|
114 |
+
<div class="card-content">
|
115 |
+
<h2>Doc {i} - {meta['file_name'].replace(".pdf", "").replace("_", " ").replace("-", " ")} - Page {meta['content_page_number'] + 1}</h2>
|
116 |
+
<p>{source.page_content.replace(config["passage_preprompt"], "")}</p>
|
117 |
+
</div>
|
118 |
+
<div class="card-footer">
|
119 |
+
<span>Politique - {meta['file_source_type']}</span>
|
120 |
+
<span>Relevance Score : {round(100*score,1)}%</span>
|
121 |
+
</div>
|
122 |
+
</div>
|
123 |
+
"""
|
124 |
+
|
125 |
+
|
126 |
def make_html_source(source, i, score, config):
|
127 |
meta = source.metadata
|
128 |
return f"""
|