hackaton-spinoza

Sleeping

App Files Files Community

momenaca commited on Sep 30, 2024

Commit

9b4020a

1 Parent(s): c780ad4

update app main file by performing a major cleanup and simplification

Browse files

Files changed (5) hide show

app.py +95 -422
assets/style.css +3 -36
assets/utils_javascript.py +28 -49
spinoza_project/prompt_Spinoza.yaml +19 -0
spinoza_project/source/frontend/gradio_utils.py +273 -0

app.py CHANGED Viewed

@@ -1,27 +1,29 @@
 import gradio as gr
 import time
-import yaml
-from langchain.prompts.chat import ChatPromptTemplate
-from huggingface_hub import hf_hub_download
 from spinoza_project.source.backend.llm_utils import (
-    get_llm,
     get_llm_api,
-    get_vectorstore,
     get_vectorstore_api,
 )
-from spinoza_project.source.backend.document_store import pickle_to_document_store
-from spinoza_project.source.backend.get_prompts import get_qa_prompts
 from spinoza_project.source.frontend.utils import (
-    make_html_source,
-    make_html_presse_source,
-    make_html_afp_source,
-    make_html_politique_source,
-    parse_output_llm_with_sources,
     init_env,
 )
-from spinoza_project.source.backend.prompt_utils import (
-    to_chat_instruction,
-    SpecialTokens,
 )
 from assets.utils_javascript import (
@@ -33,172 +35,65 @@ from assets.utils_javascript import (
 )
 init_env()
-with open("./spinoza_project/config.yaml") as f:
-    config = yaml.full_load(f)
-prompts = {}
-for source in config["prompt_naming"]:
-    with open(f"./spinoza_project/prompt_{source}.yaml") as f:
-        prompts[source] = yaml.full_load(f)
 ## Building LLM
 print("Building LLM")
-model = "gpt35turbo"
 llm = get_llm_api()
-## Loading_tools
 print("Loading Databases")
 bdd_presse = get_vectorstore_api("presse")
 bdd_afp = get_vectorstore_api("afp")
-qdrants = {
-    tab: pickle_to_document_store(
-        hf_hub_download(
-            repo_id="SpinozaProject/spinoza-database",
-            filename=f"database_{tab}.pickle",
-            repo_type="dataset",
-        )
-    )
-    for tab in config["prompt_naming"]
-    if tab != "Presse" and tab != "AFP"
-}
-## Load Prompts
-print("Loading Prompts")
-chat_qa_prompts, chat_reformulation_prompts, chat_summarize_memory_prompts = {}, {}, {}
-for source, prompt in prompts.items():
-    chat_qa_prompt, chat_reformulation_prompt = get_qa_prompts(config, prompt)
-    chat_qa_prompts[source] = chat_qa_prompt
-    chat_reformulation_prompts[source] = chat_reformulation_prompt
-with open("./assets/style.css", "r") as f:
-    css = f.read()
-special_tokens = SpecialTokens(config)
-synthesis_template = """You are a factual journalist that summarize the secialized awnsers from thechnical sources.
-Based on the folowing question:
-{question}
-And the following expert answer:
-{answers}
-- When using legal answers, keep tracking of the name of the articles.
-- When using ADEME answers, name the sources that are mainly used.
-- List the different elements mentionned, and highlight the agreement points between the sources, as well as the contradictions or differences.
-- Contradictions don't lie in whether or not a subject is dealt with, but more in the opinion given or the way the subject is dealt with.
-- Generate the answer as markdown, with an aerated layout, and headlines in bold
-- When you use information from a passage, mention where it came from by using [Doc i] at the end of the sentence. i stands for the number of the document.",
-- Do not use the sentence 'Doc i says ...' to say where information came from.",
-- If the same thing is said in more than one document, you can mention all of them like this: [Doc i, Doc j, Doc k]",
-- Start by highlighting contradictions, then do a general summary and finally get into the details that might be interesting for article writing. Where relevant, quote them.
-- Awnser in French / Répond en Français
-"""
-synthesis_prompt = to_chat_instruction(synthesis_template, special_tokens)
-synthesis_prompt_template = ChatPromptTemplate.from_messages([synthesis_prompt])
-def zip_longest_fill(*args, fillvalue=None):
-    # zip_longest('ABCD', 'xy', fillvalue='-') --> Ax By C- D-
-    iterators = [iter(it) for it in args]
-    num_active = len(iterators)
-    if not num_active:
-        return
-    cond = True
-    fillvalues = [None] * len(iterators)
-    while cond:
-        values = []
-        for i, it in enumerate(iterators):
-            try:
-                value = next(it)
-            except StopIteration:
-                value = fillvalues[i]
-            values.append(value)
-        new_cond = False
-        for i, elt in enumerate(values):
-            if elt != fillvalues[i]:
-                new_cond = True
-        cond = new_cond
-        fillvalues = values.copy()
-        yield tuple(values)
-def format_question(question):
-    return f"{question}"  # ###
-def parse_question(question):
-    x = question.replace("<p>", "").replace("</p>\n", "")
-    if "### " in x:
-        return x.split("### ")[1]
-    return x
-def reformulate(question, tab, config=config):
-    if tab in list(config["tabs"].keys()):
-        return llm.stream(
-            chat_reformulation_prompts[config["source_mapping"][tab]],
-            {"question": parse_question(question)},
-        )
-    else:
-        return iter([None] * 5)
-def reformulate_single_question(question, tab, config=config):
-    for elt in reformulate(question, tab, config=config):
-        time.sleep(0.02)
-        yield elt
-def reformulate_questions(question, config=config):
     for elt in zip_longest_fill(
-        *[reformulate(question, tab, config=config) for tab in config["tabs"]]
     ):
         time.sleep(0.02)
         yield elt
-def add_question(question):
-    return question
-def answer(question, source, tab, config=config):
-    if tab in list(config["tabs"].keys()):
-        if len(source) < 10:
-            return iter(["Aucune source trouvée, veuillez reformuler votre question"])
-        else:
-            return llm.stream(
-                chat_qa_prompts[config["source_mapping"][tab]],
-                {
-                    "question": parse_question(question),
-                    "sources": source.replace("<p>", "").replace("</p>\n", ""),
-                },
-            )
-    else:
-        return iter([None] * 5)
-def answer_single_question(source, question, tab, config=config):
-    for elt in answer(question, source, tab, config=config):
-        time.sleep(0.02)
-        yield elt
-def answer_questions(*questions_sources, config=config):
     questions = [elt for elt in questions_sources[: len(questions_sources) // 2]]
     sources = [elt for elt in questions_sources[len(questions_sources) // 2 :]]
     for elt in zip_longest_fill(
         *[
-            answer(question, source, tab, config=config)
             for question, source, tab in zip(questions, sources, config["tabs"])
         ]
     ):
@@ -209,105 +104,13 @@ def answer_questions(*questions_sources, config=config):
         ]
-def get_sources(
-    questions, qdrants=qdrants, bdd_presse=bdd_presse, bdd_afp=bdd_afp, config=config
 ):
-    k = config["num_document_retrieved"]
-    min_similarity = config["min_similarity"]
-    text, formated = [], []
-    for i, (question, tab) in enumerate(zip(questions, list(config["tabs"].keys()))):
-        if tab == "Presse":
-            sources = bdd_presse.similarity_search_with_relevance_scores(
-                question.replace("<p>", "").replace("</p>\n", ""), k=k
-            )
-            sources = [
-                (doc, score) for doc, score in sources if score >= min_similarity
-            ]
-            formated.extend(
-                [
-                    make_html_presse_source(source[0], j, source[1])
-                    for j, source in zip(range(k * i + 1, k * (i + 1) + 1), sources)
-                ]
-            )
-        elif tab == "AFP":
-            sources = bdd_afp.similarity_search_with_relevance_scores(
-                question.replace("<p>", "").replace("</p>\n", ""), k=k
-            )
-            sources = [
-                (doc, score) for doc, score in sources if score >= min_similarity
-            ]
-            formated.extend(
-                [
-                    make_html_afp_source(source[0], j, source[1])
-                    for j, source in zip(range(k * i + 1, k * (i + 1) + 1), sources)
-                ]
-            )
-        elif tab == "Documents Stratégiques":
-            sources = qdrants[
-                config["source_mapping"][tab]
-            ].similarity_search_with_relevance_scores(
-                config["query_preprompt"]
-                + question.replace("<p>", "").replace("</p>\n", ""),
-                k=k,
-            )
-            sources = [
-                (doc, score) for doc, score in sources if score >= min_similarity
-            ]
-            formated.extend(
-                [
-                    make_html_politique_source(source[0], j, source[1], config)
-                    for j, source in zip(range(k * i + 1, k * (i + 1) + 1), sources)
-                ]
-            )
-        else:
-            sources = qdrants[
-                config["source_mapping"][tab]
-            ].similarity_search_with_relevance_scores(
-                config["query_preprompt"]
-                + question.replace("<p>", "").replace("</p>\n", ""),
-                k=k,
-            )
-            sources = [
-                (doc, score) for doc, score in sources if score >= min_similarity
-            ]
-            formated.extend(
-                [
-                    make_html_source(source[0], j, source[1], config)
-                    for j, source in zip(range(k * i + 1, k * (i + 1) + 1), sources)
-                ]
-            )
-        text.extend(
-            [
-                "\n\n".join(
-                    [
-                        f"Doc {str(j)} with source type {source[0].metadata.get('file_source_type')}:\n"
-                        + source[0].page_content
-                        for j, source in zip(range(k * i + 1, k * (i + 1) + 1), sources)
-                    ]
-                )
-            ]
-        )
-    formated = "".join(formated)
-    return formated, text
-def retrieve_sources(
-    *questions, qdrants=qdrants, bdd_presse=bdd_presse, bdd_afp=bdd_afp, config=config
-):
-    formated_sources, text_sources = get_sources(
-        questions, qdrants, bdd_presse, bdd_afp, config
-    )
-    return (formated_sources, *text_sources)
-def get_synthesis(question, *answers, config=config):
     answer = []
     for i, tab in enumerate(config["tabs"]):
         if len(str(answers[i])) >= 100:
@@ -329,47 +132,6 @@ def get_synthesis(question, *answers, config=config):
             yield [(question, parse_output_llm_with_sources(elt))]
-theme = gr.themes.Base(
-    primary_hue="blue",
-    secondary_hue="red",
-    font=[gr.themes.GoogleFont("Poppins"), "ui-sans-serif", "system-ui", "sans-serif"],
-)
-with open("./assets/style.css", "r") as f:
-    css = f.read()
-with open("./assets/source_information.md", "r") as f:
-    source_information = f.read()
-def start_agents():
-    gr.Info(message="The agents and Spinoza are loading...", duration=3)
-    return [
-        (None, "I am waiting until all the agents are done to generate an answer...")
-    ]
-def end_agents():
-    gr.Info(
-        message="The agents and Spinoza have finished answering your question",
-        duration=3,
-    )
-def next_call():
-    return
-init_prompt = """
-Hello, I am Spinoza, a conversational assistant designed to help you in your journalistic journey. I will answer your questions based **on the provided sources**.
-⚠️ Limitations
-*Please note that this chatbot is in an early stage, it is not perfect and may sometimes give irrelevant answers. If you are not satisfied with the answer, please ask a more specific question or report your feedback to help us improve the system.*
-What do you want to learn ?
-"""
 with gr.Blocks(
     title=f"🔍 Spinoza",
     css=css,
@@ -388,132 +150,43 @@ with gr.Blocks(
         with gr.Row(elem_id="chatbot-row"):
             with gr.Column(scale=2, elem_id="center-panel"):
                 with gr.Group(elem_id="chatbot-group"):
-                    with gr.Accordion(
-                        "Science agent",
-                        open=False,
-                        elem_id="accordion-science",
-                        elem_classes="accordion",
-                    ):
-                        chatbots[list(config["tabs"].keys())[0]] = gr.Chatbot(
-                            show_copy_button=True,
-                            show_share_button=False,
-                            show_label=False,
-                            elem_id="chatbot-science",
-                            layout="panel",
-                            avatar_images=(
-                                "./assets/logos/help.png",
-                                None,
-                            ),
-                        )
-                    with gr.Accordion(
-                        "Law agent",
-                        open=False,
-                        elem_id="accordion-legal",
-                        elem_classes="accordion",
-                    ):
-                        chatbots[list(config["tabs"].keys())[1]] = gr.Chatbot(
-                            show_copy_button=True,
-                            show_share_button=False,
-                            show_label=False,
-                            elem_id="chatbot-legal",
-                            layout="panel",
-                            avatar_images=(
-                                "./assets/logos/help.png",
-                                None,
-                            ),
-                        )
-                    with gr.Accordion(
-                        "Politics agent",
-                        open=False,
-                        elem_id="accordion-politique",
-                        elem_classes="accordion",
-                    ):
-                        chatbots[list(config["tabs"].keys())[2]] = gr.Chatbot(
-                            show_copy_button=True,
-                            show_share_button=False,
-                            show_label=False,
-                            elem_id="chatbot-politique",
-                            layout="panel",
-                            avatar_images=(
-                                "./assets/logos/help.png",
-                                None,
-                            ),
-                        )
-                    with gr.Accordion(
-                        "ADEME agent",
-                        open=False,
-                        elem_id="accordion-ademe",
-                        elem_classes="accordion",
-                    ):
-                        chatbots[list(config["tabs"].keys())[3]] = gr.Chatbot(
-                            show_copy_button=True,
-                            show_share_button=False,
-                            show_label=False,
-                            elem_id="chatbot-ademe",
-                            layout="panel",
-                            avatar_images=(
-                                "./assets/logos/help.png",
-                                None,
-                            ),
-                        )
-                    with gr.Accordion(
-                        "Press agent",
-                        open=False,
-                        elem_id="accordion-presse",
-                        elem_classes="accordion",
-                    ):
-                        chatbots[list(config["tabs"].keys())[4]] = gr.Chatbot(
-                            show_copy_button=True,
-                            show_share_button=False,
-                            show_label=False,
-                            elem_id="chatbot-presse",
-                            layout="panel",
-                            avatar_images=(
-                                "./assets/logos/help.png",
-                                None,
-                            ),
-                        )
-                    with gr.Accordion(
-                        "AFP agent",
-                        open=False,
-                        elem_id="accordion-afp",
-                        elem_classes="accordion",
-                    ):
-                        chatbots[list(config["tabs"].keys())[5]] = gr.Chatbot(
-                            show_copy_button=True,
-                            show_share_button=False,
-                            show_label=False,
-                            elem_id="chatbot-afp",
-                            layout="panel",
-                            avatar_images=(
-                                "./assets/logos/help.png",
-                                None,
-                            ),
-                        )
-                    with gr.Accordion(
-                        "Spinoza",
-                        open=True,
-                        elem_id="accordion-spinoza",
-                        elem_classes="accordion",
-                    ):
-                        chatbots["spinoza"] = gr.Chatbot(
-                            value=[(None, init_prompt)],
-                            show_copy_button=True,
-                            show_share_button=False,
-                            show_label=False,
-                            elem_id="chatbot-spinoza",
-                            layout="panel",
-                            avatar_images=(
-                                "./assets/logos/help.png",
-                                "./assets/logos/spinoza.png",
-                            ),
-                        )
                 with gr.Row(elem_id="input-message"):
                     ask = gr.Textbox(
@@ -542,7 +215,7 @@ with gr.Blocks(
                 gr.Markdown("For any issue contact **[email protected]**.")
     ask.submit(
-        start_agents, inputs=[], outputs=[chatbots["spinoza"]], js=accordion_trigger()
     ).then(
         fn=reformulate_questions,
         inputs=[ask],
@@ -564,7 +237,7 @@ with gr.Blocks(
         fn=get_synthesis,
         inputs=[agent_questions[list(config["tabs"].keys())[1]]]
         + [chatbots[tab] for tab in config["tabs"]],
-        outputs=[chatbots["spinoza"]],
     ).then(
         fn=next_call, inputs=[], outputs=[], js=accordion_trigger_spinoza_end()
     ).then(

 import gradio as gr
 import time
 from spinoza_project.source.backend.llm_utils import (
     get_llm_api,
     get_vectorstore_api,
 )
 from spinoza_project.source.frontend.utils import (
     init_env,
+    parse_output_llm_with_sources,
 )
+from spinoza_project.source.frontend.gradio_utils import (
+    get_sources,
+    set_prompts,
+    get_config,
+    get_prompts,
+    get_assets,
+    get_theme,
+    get_init_prompt,
+    get_synthesis_prompt,
+    get_qdrants,
+    start_agents,
+    end_agents,
+    next_call,
+    zip_longest_fill,
+    reformulate,
+    answer,
 )
 from assets.utils_javascript import (
 )
 init_env()
+config = get_config()
+## Loading Prompts
+print("Loading Prompts")
+prompts = get_prompts(config)
+chat_qa_prompts, chat_reformulation_prompts = set_prompts(prompts, config)
+synthesis_prompt_template = get_synthesis_prompt(config)
 ## Building LLM
 print("Building LLM")
 llm = get_llm_api()
+## Loading BDDs
 print("Loading Databases")
 bdd_presse = get_vectorstore_api("presse")
 bdd_afp = get_vectorstore_api("afp")
+qdrants = get_qdrants(config)
+## Loading Assets
+css, source_information = get_assets()
+theme = get_theme()
+init_prompt = get_init_prompt()
+def reformulate_questions(
+    question,
+    llm=llm,
+    chat_reformulation_prompts=chat_reformulation_prompts,
+    config=config,
+):
     for elt in zip_longest_fill(
+        *[
+            reformulate(llm, chat_reformulation_prompts, question, tab, config=config)
+            for tab in config["tabs"]
+        ]
     ):
         time.sleep(0.02)
         yield elt
+def retrieve_sources(
+    *questions, qdrants=qdrants, bdd_presse=bdd_presse, bdd_afp=bdd_afp, config=config
+):
+    formated_sources, text_sources = get_sources(
+        questions, qdrants, bdd_presse, bdd_afp, config
+    )
+    return (formated_sources, *text_sources)
+def answer_questions(
+    *questions_sources, llm=llm, chat_qa_prompts=chat_qa_prompts, config=config
+):
     questions = [elt for elt in questions_sources[: len(questions_sources) // 2]]
     sources = [elt for elt in questions_sources[len(questions_sources) // 2 :]]
     for elt in zip_longest_fill(
         *[
+            answer(llm, chat_qa_prompts, question, source, tab, config)
             for question, source, tab in zip(questions, sources, config["tabs"])
         ]
     ):
         ]
+def get_synthesis(
+    question,
+    *answers,
+    llm=llm,
+    synthesis_prompt_template=synthesis_prompt_template,
+    config=config,
 ):
     answer = []
     for i, tab in enumerate(config["tabs"]):
         if len(str(answers[i])) >= 100:
             yield [(question, parse_output_llm_with_sources(elt))]
 with gr.Blocks(
     title=f"🔍 Spinoza",
     css=css,
         with gr.Row(elem_id="chatbot-row"):
             with gr.Column(scale=2, elem_id="center-panel"):
                 with gr.Group(elem_id="chatbot-group"):
+                    for tab in list(config["tabs"].keys()) + ["Spinoza"]:
+                        if tab == "Spinoza":
+                            agent_name = f"Spinoza"
+                            elem_id = f"accordion-{tab}"
+                            elem_classes = "accordion accordion-agent spinoza-agent"
+                        else:
+                            agent_name = f"Agent {config['source_mapping'][tab]}"
+                            elem_id = f"accordion-{config['source_mapping'][tab]}"
+                            elem_classes = "accordion accordion-agent"
+                        with gr.Accordion(
+                            agent_name,
+                            open=True if agent_name == "Spinoza" else False,
+                            elem_id=elem_id,
+                            elem_classes=elem_classes,
+                        ):
+                            # chatbot_key = agent_name.lower().replace(" ", "_")
+                            chatbots[tab] = gr.Chatbot(
+                                value=(
+                                    [(None, init_prompt)]
+                                    if agent_name == "Spinoza"
+                                    else None
+                                ),
+                                show_copy_button=True,
+                                show_share_button=False,
+                                show_label=False,
+                                elem_id=f"chatbot-{agent_name.lower().replace(' ', '-')}",
+                                layout="panel",
+                                avatar_images=(
+                                    "./assets/logos/help.png",
+                                    (
+                                        "./assets/logos/spinoza.png"
+                                        if agent_name == "Spinoza"
+                                        else None
+                                    ),
+                                ),
+                            )
                 with gr.Row(elem_id="input-message"):
                     ask = gr.Textbox(
                 gr.Markdown("For any issue contact **[email protected]**.")
     ask.submit(
+        start_agents, inputs=[], outputs=[chatbots["Spinoza"]], js=accordion_trigger()
     ).then(
         fn=reformulate_questions,
         inputs=[ask],
         fn=get_synthesis,
         inputs=[agent_questions[list(config["tabs"].keys())[1]]]
         + [chatbots[tab] for tab in config["tabs"]],
+        outputs=[chatbots["Spinoza"]],
     ).then(
         fn=next_call, inputs=[], outputs=[], js=accordion_trigger_spinoza_end()
     ).then(

assets/style.css CHANGED Viewed

@@ -118,53 +118,20 @@ a {
         height: calc(-100px + 100vh) !important;
     }
-    #accordion-spinoza {
         height: 15cm;
     }
-    #accordion-spinoza>open>span:nth-child(1) {
         color: #000000;
         font-size: large;
         font-weight: bold;
     }
-    #accordion-spinoza>button:nth-child(2)>span:nth-child(1) {
-        color: #000000;
-        font-size: large;
-        font-weight: bold;
-    }
-    #accordion-science>button:nth-child(2)>span:nth-child(1) {
-        color: #9ca1a5e7;
-        font-weight: bold;
-    }
-    #accordion-presse>button:nth-child(2)>span:nth-child(1) {
-        color: #9ca1a5e7;
-        font-weight: bold;
-    }
-    #accordion-legal>button:nth-child(2)>span:nth-child(1) {
         color: #9ca1a5e7;
         font-weight: bold;
     }
-    #accordion-politique>button:nth-child(2)>span:nth-child(1) {
-        color: #9ca1a5e7;
-        font-weight: bold;
-    }
-    #accordion-ademe>button:nth-child(2)>span:nth-child(1) {
-        color: #9ca1a5e7;
-        font-weight: bold;
-    }
-    #accordion-afp>button:nth-child(2)>span:nth-child(1) {
-        color: #9ca1a5e7;
-        font-weight: bold;
-    }
 }
 textarea.scroll-hide {

         height: calc(-100px + 100vh) !important;
     }
+    .accordion-agent.spinoza-agent {
         height: 15cm;
     }
+    .accordion-agent.spinoza-agent > button > span {
         color: #000000;
         font-size: large;
         font-weight: bold;
     }
+    .accordion-agent > button > span {
         color: #9ca1a5e7;
         font-weight: bold;
     }
 }
 textarea.scroll-hide {

assets/utils_javascript.py CHANGED Viewed

@@ -15,37 +15,19 @@ def update_footer():
 def accordion_trigger():
     return """
     function accordion_trigger() {
-        input_textbox = document.getElementById("input-textbox")
         input_textbox.addEventListener('keyup', function (e) {
             if (e.key === 'Enter' || e.keyCode === 13) {
-                var accordion_science = document.getElementById("accordion-science")
-                var accordion_presse = document.getElementById("accordion-presse")
-                var accordion_politique = document.getElementById("accordion-politique")
-                var accordion_legal = document.getElementById("accordion-legal")
-                var accordion_ademe= document.getElementById("accordion-ademe")
-                var accordion_afp= document.getElementById("accordion-afp")
-                var accordion_spinoza = document.getElementById("accordion-spinoza")
-                document.querySelectorAll(".loader").forEach(el => el.remove());
-                document.querySelectorAll(".loader-helper").forEach(el => el.remove());
-                accordion_science.children[1].children[0].textContent = "Science agent";
-                accordion_presse.children[1].children[0].textContent = "Press agent";
-                accordion_politique.children[1].children[0].textContent = "Politics agent";
-                accordion_legal.children[1].children[0].textContent = "Law agent";
-                accordion_ademe.children[1].children[0].textContent = "ADEME agent";
-                accordion_afp.children[1].children[0].textContent = "AFP agent";
-                accordion_spinoza.children[1].children[0].textContent = "Spinoza";
-                accordion_science.children[1].children[0].innerHTML += "<span class='loader-helper'> - </span>";
-                accordion_science.children[1].children[0].innerHTML += "<span class='loader'>loading</span>";
-                accordion_presse.children[1].children[0].innerHTML += "<span class='loader-helper'> - </span>";
-                accordion_presse.children[1].children[0].innerHTML += "<span class='loader'>loading</span>";
-                accordion_politique.children[1].children[0].innerHTML += "<span class='loader-helper'> - </span>";
-                accordion_politique.children[1].children[0].innerHTML += "<span class='loader'>loading</span>";
-                accordion_legal.children[1].children[0].innerHTML += "<span class='loader-helper'> - </span>";
-                accordion_legal.children[1].children[0].innerHTML += "<span class='loader'>loading</span>";
-                accordion_ademe.children[1].children[0].innerHTML += "<span class='loader-helper'> - </span>";
-                accordion_ademe.children[1].children[0].innerHTML += "<span class='loader'>loading</span>";
-                accordion_afp.children[1].children[0].innerHTML += "<span class='loader-helper'> - </span>";
-                accordion_afp.children[1].children[0].innerHTML += "<span class='loader'>loading</span>";
             }
         });
     }
@@ -55,18 +37,15 @@ def accordion_trigger():
 def accordion_trigger_end():
     return """
     function accordion_trigger_end() {
-        var accordion_science = document.getElementById("accordion-science")
-        var accordion_presse = document.getElementById("accordion-presse")
-        var accordion_politique = document.getElementById("accordion-politique")
-        var accordion_legal = document.getElementById("accordion-legal")
-        var accordion_ademe = document.getElementById("accordion-ademe")
-        var accordion_afp= document.getElementById("accordion-afp")
-        accordion_science.children[1].children[0].textContent = "Science agent - ready";
-        accordion_presse.children[1].children[0].textContent = "Press agent - ready";
-        accordion_politique.children[1].children[0].textContent = "Politics agent - ready";
-        accordion_legal.children[1].children[0].textContent = "Law agent - ready";
-        accordion_ademe.children[1].children[0].textContent = "ADEME agent - ready";
-        accordion_afp.children[1].children[0].textContent = "AFP agent - ready";
     }
     """
@@ -74,12 +53,11 @@ def accordion_trigger_end():
 def accordion_trigger_spinoza():
     return """
     function accordion_trigger_spinoza() {
-        var accordion_spinoza = document.getElementById("accordion-spinoza")
-        document.querySelectorAll(".loader").forEach(el => el.remove());
-        document.querySelectorAll(".loader-helper").forEach(el => el.remove());
-        accordion_spinoza.children[1].children[0].textContent = "Spinoza";
-        accordion_spinoza.children[1].children[0].innerHTML += "<span class='loader-helper'> - </span>";
-        accordion_spinoza.children[1].children[0].innerHTML += "<span class='loader'>generating</span>";
     }
     """
@@ -87,7 +65,8 @@ def accordion_trigger_spinoza():
 def accordion_trigger_spinoza_end():
     return """
     function accordion_trigger_spinoza_end() {
-        var accordion_spinoza = document.getElementById("accordion-spinoza")
-        accordion_spinoza.children[1].children[0].textContent = "Spinoza - ready";
     }
     """

 def accordion_trigger():
     return """
     function accordion_trigger() {
+        var input_textbox = document.getElementById("input-textbox");
         input_textbox.addEventListener('keyup', function (e) {
             if (e.key === 'Enter' || e.keyCode === 13) {
+                document.querySelectorAll(".loader, .loader-helper").forEach(el => el.remove());
+                var accordions = document.querySelectorAll('.accordion-agent');
+                accordions.forEach(function (accordion) {
+                    var agentName = "Agent " + accordion.id.split('-')[1];
+                    var buttonSpan = accordion.querySelector('button > span');
+                    if (!accordion.classList.contains('spinoza-agent')) {
+                        buttonSpan.textContent = agentName;
+                        buttonSpan.innerHTML += "<span class='loader-helper'> - </span><span class='loader'>loading</span>";
+                    }
+                });
             }
         });
     }
 def accordion_trigger_end():
     return """
     function accordion_trigger_end() {
+        var accordions = document.querySelectorAll('.accordion-agent');
+        accordions.forEach(function (accordion) {
+            if (!accordion.classList.contains('spinoza-agent')) {
+                var agentName = "Agent " + accordion.id.split('-')[1];
+                var buttonSpan = accordion.querySelector('button > span');
+                buttonSpan.textContent = agentName + " - ready";
+            }
+        });
     }
     """
 def accordion_trigger_spinoza():
     return """
     function accordion_trigger_spinoza() {
+        var accordion_spinoza = document.querySelector('.spinoza-agent');
+        document.querySelectorAll(".loader, .loader-helper").forEach(el => el.remove());
+        var buttonSpan = accordion_spinoza.querySelector('button > span');
+        buttonSpan.textContent = "Spinoza";
+        buttonSpan.innerHTML += "<span class='loader-helper'> - </span><span class='loader'>generating</span>";
     }
     """
 def accordion_trigger_spinoza_end():
     return """
     function accordion_trigger_spinoza_end() {
+        var accordion_spinoza = document.querySelector('.spinoza-agent');
+        var buttonSpan = accordion_spinoza.querySelector('button > span');
+        buttonSpan.textContent = "Spinoza - ready";
     }
     """

spinoza_project/prompt_Spinoza.yaml ADDED Viewed

	@@ -0,0 +1,19 @@

+prompt:
+  [
+    "You are a factual journalist that summarize the secialized awnsers from thechnical sources.",
+    "Based on the folowing question:",
+    "{question}",
+    "",
+    "And the following expert answer:",
+    "{answers}",
+    "",
+    "- When using legal answers, keep tracking of the name of the articles.",
+    "- When using ADEME answers, name the sources that are mainly used.",
+    "- List the different elements mentionned, and highlight the agreement points between the sources, as well as the contradictions or differences.",
+    "- Contradictions don't lie in whether or not a subject is dealt with, but more in the opinion given or the way the subject is dealt with.",
+    "- Generate the answer as markdown, with an aerated layout, and headlines in bold",
+    "- When you use information from a passage, mention where it came from by using [Doc i] at the end of the sentence. i stands for the number of the document.",
+    "- If the same thing is said in more than one document, you can mention all of them like this: [Doc i, Doc j, Doc k]",
+    "- Start by highlighting contradictions, then do a general summary and finally get into the details that might be interesting for article writing. Where relevant, quote them.",
+    "- Answer in French / Répond en Français"
+  ]

spinoza_project/source/frontend/gradio_utils.py ADDED Viewed

	@@ -0,0 +1,273 @@

+import gradio as gr
+import yaml
+from langchain.prompts.chat import ChatPromptTemplate
+from huggingface_hub import hf_hub_download
+from spinoza_project.source.frontend.utils import (
+    make_html_source,
+    make_html_presse_source,
+    make_html_afp_source,
+    make_html_politique_source,
+)
+from spinoza_project.source.backend.prompt_utils import (
+    to_chat_instruction,
+    SpecialTokens,
+)
+from spinoza_project.source.backend.get_prompts import get_qa_prompts
+from spinoza_project.source.backend.document_store import pickle_to_document_store
+def get_config():
+    with open("./spinoza_project/config.yaml") as f:
+        return yaml.full_load(f)
+def get_prompts(config):
+    prompts = {}
+    for source in config["prompt_naming"]:
+        with open(f"./spinoza_project/prompt_{source}.yaml") as f:
+            prompts[source] = yaml.full_load(f)
+    return prompts
+def set_prompts(prompts, config):
+    chat_qa_prompts, chat_reformulation_prompts = ({}, {})
+    for source, prompt in prompts.items():
+        chat_qa_prompt, chat_reformulation_prompt = get_qa_prompts(config, prompt)
+        chat_qa_prompts[source] = chat_qa_prompt
+        chat_reformulation_prompts[source] = chat_reformulation_prompt
+    return chat_qa_prompts, chat_reformulation_prompts
+def get_assets():
+    with open("./assets/style.css", "r") as f:
+        css = f.read()
+    with open("./assets/source_information.md", "r") as f:
+        source_information = f.read()
+    return css, source_information
+def get_qdrants(config):
+    qdrants = {
+        tab: pickle_to_document_store(
+            hf_hub_download(
+                repo_id="SpinozaProject/spinoza-database",
+                filename=f"database_{tab}.pickle",
+                repo_type="dataset",
+            )
+        )
+        for tab in config["prompt_naming"]
+        if tab != "Presse" and tab != "AFP"
+    }
+    return qdrants
+def get_theme():
+    return gr.themes.Base(
+        primary_hue="blue",
+        secondary_hue="red",
+        font=[
+            gr.themes.GoogleFont("Poppins"),
+            "ui-sans-serif",
+            "system-ui",
+            "sans-serif",
+        ],
+    )
+def get_init_prompt():
+    return """
+    Hello, I am Spinoza, a conversational assistant designed to help you in your journalistic journey. I will answer your questions based **on the provided sources**.
+    ⚠️ Limitations
+    *Please note that this chatbot is in an early stage, it is not perfect and may sometimes give irrelevant answers. If you are not satisfied with the answer, please ask a more specific question or report your feedback to help us improve the system.*
+    What do you want to learn ?
+"""
+def get_synthesis_prompt(config):
+    special_tokens = SpecialTokens(config)
+    with open(f"./spinoza_project/prompt_Spinoza.yaml", "r") as f:
+        synthesis_template = f.read()
+    synthesis_prompt = to_chat_instruction(synthesis_template, special_tokens)
+    synthesis_prompt_template = ChatPromptTemplate.from_messages([synthesis_prompt])
+    return synthesis_prompt_template
+def zip_longest_fill(*args, fillvalue=None):
+    # zip_longest('ABCD', 'xy', fillvalue='-') --> Ax By C- D-
+    iterators = [iter(it) for it in args]
+    num_active = len(iterators)
+    if not num_active:
+        return
+    cond = True
+    fillvalues = [None] * len(iterators)
+    while cond:
+        values = []
+        for i, it in enumerate(iterators):
+            try:
+                value = next(it)
+            except StopIteration:
+                value = fillvalues[i]
+            values.append(value)
+        new_cond = False
+        for i, elt in enumerate(values):
+            if elt != fillvalues[i]:
+                new_cond = True
+        cond = new_cond
+        fillvalues = values.copy()
+        yield tuple(values)
+def start_agents():
+    gr.Info(message="The agents and Spinoza are loading...", duration=3)
+    return [
+        (None, "I am waiting until all the agents are done to generate an answer...")
+    ]
+def end_agents():
+    gr.Info(
+        message="The agents and Spinoza have finished answering your question",
+        duration=3,
+    )
+def next_call():
+    return
+def format_question(question):
+    return f"{question}"
+def parse_question(question):
+    x = question.replace("<p>", "").replace("</p>\n", "")
+    if "### " in x:
+        return x.split("### ")[1]
+    return x
+def reformulate(llm, chat_reformulation_prompts, question, tab, config):
+    if tab in list(config["tabs"].keys()):
+        return llm.stream(
+            chat_reformulation_prompts[config["source_mapping"][tab]],
+            {"question": parse_question(question)},
+        )
+    else:
+        return iter([None] * 5)
+def add_question(question):
+    return question
+def answer(llm, chat_qa_prompts, question, source, tab, config):
+    if tab in list(config["tabs"].keys()):
+        if len(source) < 10:
+            return iter(["Aucune source trouvée, veuillez reformuler votre question"])
+        else:
+            return llm.stream(
+                chat_qa_prompts[config["source_mapping"][tab]],
+                {
+                    "question": parse_question(question),
+                    "sources": source.replace("<p>", "").replace("</p>\n", ""),
+                },
+            )
+    else:
+        return iter([None] * 5)
+def get_sources(questions, qdrants, bdd_presse, bdd_afp, config):
+    k = config["num_document_retrieved"]
+    min_similarity = config["min_similarity"]
+    text, formated = [], []
+    for i, (question, tab) in enumerate(zip(questions, list(config["tabs"].keys()))):
+        if tab == "Presse":
+            sources = bdd_presse.similarity_search_with_relevance_scores(
+                question.replace("<p>", "").replace("</p>\n", ""), k=k
+            )
+            sources = [
+                (doc, score) for doc, score in sources if score >= min_similarity
+            ]
+            formated.extend(
+                [
+                    make_html_presse_source(source[0], j, source[1])
+                    for j, source in zip(range(k * i + 1, k * (i + 1) + 1), sources)
+                ]
+            )
+        elif tab == "AFP":
+            sources = bdd_afp.similarity_search_with_relevance_scores(
+                question.replace("<p>", "").replace("</p>\n", ""), k=k
+            )
+            sources = [
+                (doc, score) for doc, score in sources if score >= min_similarity
+            ]
+            formated.extend(
+                [
+                    make_html_afp_source(source[0], j, source[1])
+                    for j, source in zip(range(k * i + 1, k * (i + 1) + 1), sources)
+                ]
+            )
+        elif tab == "Documents Stratégiques":
+            sources = qdrants[
+                config["source_mapping"][tab]
+            ].similarity_search_with_relevance_scores(
+                config["query_preprompt"]
+                + question.replace("<p>", "").replace("</p>\n", ""),
+                k=k,
+            )
+            sources = [
+                (doc, score) for doc, score in sources if score >= min_similarity
+            ]
+            formated.extend(
+                [
+                    make_html_politique_source(source[0], j, source[1], config)
+                    for j, source in zip(range(k * i + 1, k * (i + 1) + 1), sources)
+                ]
+            )
+        else:
+            sources = qdrants[
+                config["source_mapping"][tab]
+            ].similarity_search_with_relevance_scores(
+                config["query_preprompt"]
+                + question.replace("<p>", "").replace("</p>\n", ""),
+                k=k,
+            )
+            sources = [
+                (doc, score) for doc, score in sources if score >= min_similarity
+            ]
+            formated.extend(
+                [
+                    make_html_source(source[0], j, source[1], config)
+                    for j, source in zip(range(k * i + 1, k * (i + 1) + 1), sources)
+                ]
+            )
+        text.extend(
+            [
+                "\n\n".join(
+                    [
+                        f"Doc {str(j)} with source type {source[0].metadata.get('file_source_type')}:\n"
+                        + source[0].page_content
+                        for j, source in zip(range(k * i + 1, k * (i + 1) + 1), sources)
+                    ]
+                )
+            ]
+        )
+    formated = "".join(formated)
+    return formated, text