Spaces:

BHO
/

TKOGPTv1.0

Sleeping

App Files Files Community

BHO commited on May 19, 2023

Commit

ae7c5cd

1 Parent(s): 7d463f0

Delete App.py

Browse files

Files changed (1) hide show

App.py +0 -416

App.py DELETED Viewed

@@ -1,416 +0,0 @@
-import gradio as gr
-from haystack.document_stores import FAISSDocumentStore
-from haystack.nodes import EmbeddingRetriever
-import openai
-import pandas as pd
-import os
-from utils import (
-    make_pairs,
-    set_openai_api_key,
-    create_user_id,
-    to_completion,
-)
-from datetime import datetime
-# from azure.storage.fileshare import ShareServiceClient
-try:
-    from dotenv import load_dotenv
-    load_dotenv()
-except:
-    pass
-theme = gr.themes.Soft(
-    primary_hue="sky",
-    font=[gr.themes.GoogleFont("Poppins"), "ui-sans-serif", "system-ui", "sans-serif"],
-)
-init_prompt = (
-    "TKOQA, an AI Assistant for Tikehau. "
-)
-sources_prompt = (
-    "When relevant, use facts and numbers from the following documents in your answer. "
-)
-def get_reformulation_prompt(query: str) -> str:
-    return f"""Reformulate the following user message to be a short standalone question in English, in the context of the Universal Registration Document of Tikehau .
----
-query: what is the AUM of Tikehau in 2022?
-standalone question: What is the AUM of TIkehau in 2022?
-language: English
----
-query: what is T2?
-standalone question: what is the transition energy fund at Tikehau?
-language: English
----
-query: what is the business of Tikehau?
-standalone question: What are the main business units of Tikehau?
-language: English
----
-query: {query}
-standalone question:"""
-system_template = {
-    "role": "system",
-    "content": init_prompt,
-}
-# openai.api_type = "azure"
-os.environ["OPENAI_API_KEY"] = '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'
-openai.api_key = os.environ["OPENAI_API_KEY"]
-# BHO
-# openai.api_base = os.environ["ressource_endpoint"]
-# openai.api_version = "2022-12-01"
-document_store = FAISSDocumentStore()
-ds = FAISSDocumentStore.load(index_path="./tko_urd.faiss", config_path="./tko_urd.json",)
-retriever = EmbeddingRetriever(
-    document_store=ds,
-    embedding_model="sentence-transformers/multi-qa-mpnet-base-dot-v1",
-    model_format="sentence_transformers",
-    progress_bar=False,
-)
-# retrieve_giec = EmbeddingRetriever(
-#     document_store=FAISSDocumentStore.load(
-#         index_path="./documents/climate_gpt_v2_only_giec.faiss",
-#         config_path="./documents/climate_gpt_v2_only_giec.json",
-#     ),
-#     embedding_model="sentence-transformers/multi-qa-mpnet-base-dot-v1",
-#     model_format="sentence_transformers",
-# )
-# BHO
-# For Azure connection in secrets in HuggingFace
-# credential = {
-#     "account_key": os.environ["account_key"],
-#     "account_name": os.environ["account_name"],
-# }
-# BHO
-# account_url = os.environ["account_url"]
-# file_share_name = "climategpt"
-# service = ShareServiceClient(account_url=account_url, credential=credential)
-# share_client = service.get_share_client(file_share_name)
-user_id = create_user_id(10)
-def filter_sources(df, k_summary=3, k_total=10, source="ipcc"):
-    assert source in ["ipcc", "ipbes", "all"]
-    # Filter by source
-    if source == "ipcc":
-        df = df.loc[df["source"] == "IPCC"]
-    elif source == "ipbes":
-        df = df.loc[df["source"] == "IPBES"]
-    else:
-        pass
-    # Prepare summaries
-    df_summaries = df #.loc[df.loc.obj.values]
-    # Separate summaries and full reports
-    #df_summaries = df.loc[df["report_type"].isin(["SPM", "TS"])]
-    #df_full = df.loc[~df["report_type"].isin(["SPM", "TS"])]
-    # Find passages from summaries dataset
-    passages_summaries = df_summaries.head(k_summary)
-    # Find passages from full reports dataset
-    # passages_fullreports = df_full.head(k_total - len(passages_summaries))
-    # Concatenate passages
-    #passages = pd.concat([passages_summaries, passages_fullreports], axis=0, ignore_index=True)
-    passages = passages_summaries
-    return passages
-def retrieve_with_summaries(query, retriever, k_summary=3, k_total=10, source="ipcc", max_k=100, threshold=0.555,
-                            as_dict=True):
-    assert max_k > k_total
-    docs = retriever.retrieve(query, top_k=max_k)
-    docs = [{**x.meta, "score": x.score, "content": x.content} for x in docs if x.score > threshold]
-    if len(docs) == 0:
-        return []
-    res = pd.DataFrame(docs)
-    passages_df = filter_sources(res, k_summary, k_total, source)
-    if as_dict:
-        contents = passages_df["content"].tolist()
-        meta = passages_df.drop(columns=["content"]).to_dict(orient="records")
-        passages = []
-        for i in range(len(contents)):
-            passages.append({"content": contents[i], "meta": meta[i]})
-        return passages
-    else:
-        return passages_df
-def make_html_source(source, i):
-    meta = source['meta']
-    return f"""
-<div class="card">
-    <div class="card-content">
-        <h2>Doc {i} - {meta['file_name']} - Page {meta['page_number']}</h2>
-        <p>{source['content']}</p>
-    </div>
-</div>
-"""
-def chat(
-        user_id: str,
-        query: str,
-        history: list = [system_template],
-        report_type: str = "All available",
-        threshold: float = 0.555,
-) -> tuple:
-    """retrieve relevant documents in the document store then query gpt-turbo
-    Args:
-        query (str): user message.
-        history (list, optional): history of the conversation. Defaults to [system_template].
-        report_type (str, optional): should be "All available" or "IPCC only". Defaults to "All available".
-        threshold (float, optional): similarity threshold, don't increase more than 0.568. Defaults to 0.56.
-    Yields:
-        tuple: chat gradio format, chat openai format, sources used.
-    """
-    if report_type not in ["IPCC", "IPBES"]: report_type = "all"
-    print("Searching in ", report_type, " reports")
-    reformulated_query = openai.Completion.create(
-        engine="text-davinci-003",
-        prompt=get_reformulation_prompt(query),
-        temperature=0,
-        max_tokens=128,
-        stop=["\n---\n", "<|im_end|>"],
-    )
-    reformulated_query = reformulated_query["choices"][0]["text"]
-    reformulated_query, language = reformulated_query.split("\n")
-    language = language.split(":")[1].strip()
-    sources = retrieve_with_summaries(reformulated_query, retriever, k_total=10, k_summary=3, as_dict=True,
-                                      source=report_type.lower(), threshold=threshold)
-    response_retriever = {
-        "language": language,
-        "reformulated_query": reformulated_query,
-        "query": query,
-        "sources": sources,
-    }
-    # docs = [d for d in retriever.retrieve(query=reformulated_query, top_k=10) if d.score > threshold]
-    messages = history + [{"role": "user", "content": query}]
-    if len(sources) > 0:
-        docs_string = []
-        docs_html = []
-        for i, d in enumerate(sources, 1):
-            #docs_string.append(f"📃 Doc {i}: {d['meta']['short_name']} page {d['meta']['page_number']}\n{d['content']}")
-            docs_string.append(f"📃 Doc {i}: {d['meta']['file_name']} page {d['meta']['page_number']}\n{d['content']}")
-            docs_html.append(make_html_source(d, i))
-        docs_string = "\n\n".join([f"Query used for retrieval:\n{reformulated_query}"] + docs_string)
-        docs_html = "\n\n".join([f"Query used for retrieval:\n{reformulated_query}"] + docs_html)
-        messages.append({"role": "system", "content": f"{sources_prompt}\n\n{docs_string}\n\nAnswer in {language}:"})
-        response = openai.Completion.create(
-            # engine="climateGPT",
-            engine="text-davinci-003",
-            prompt=to_completion(messages),
-            temperature=0,  # deterministic
-            stream=True,
-            max_tokens=1024,
-        )
-        complete_response = ""
-        messages.pop()
-        messages.append({"role": "assistant", "content": complete_response})
-        timestamp = str(datetime.now().timestamp())
-        file = user_id[0] + timestamp + ".json"
-        logs = {
-            "user_id": user_id[0],
-            "prompt": query,
-            "retrived": sources,
-            "report_type": report_type,
-            "prompt_eng": messages[0],
-            "answer": messages[-1]["content"],
-            "time": timestamp,
-        }
-        # log_on_azure(file, logs, share_client)
-        print(logs)
-        for chunk in response:
-            if (chunk_message := chunk["choices"][0].get("text")) and chunk_message != "<|im_end|>":
-                complete_response += chunk_message
-                messages[-1]["content"] = complete_response
-                gradio_format = make_pairs([a["content"] for a in messages[1:]])
-                yield gradio_format, messages, docs_html
-    else:
-        docs_string = "⚠️ No relevant passages found in the URDs"
-        complete_response = "**⚠️ No relevant passages found in the URDs **"
-        messages.append({"role": "assistant", "content": complete_response})
-        gradio_format = make_pairs([a["content"] for a in messages[1:]])
-        yield gradio_format, messages, docs_string
-def save_feedback(feed: str, user_id):
-    if len(feed) > 1:
-        timestamp = str(datetime.now().timestamp())
-        file = user_id[0] + timestamp + ".json"
-        logs = {
-            "user_id": user_id[0],
-            "feedback": feed,
-            "time": timestamp,
-        }
-        # log_on_azure(file, logs, share_client)
-        print(logs)
-        return "Feedback submitted, thank you!"
-def reset_textbox():
-    return gr.update(value="")
-# def log_on_azure(file, logs, share_client):
-#     file_client = share_client.get_file_client(file)
-#     file_client.upload_file(str(logs))
-with gr.Blocks(title="TKO URD Q&A", css="style.css", theme=theme) as demo:
-    user_id_state = gr.State([user_id])
-    # Gradio
-    gr.Markdown("<h1><center>Tikehau Capital Q&A </center></h1>")
-    with gr.Row():
-        with gr.Column(scale=2):
-            chatbot = gr.Chatbot(elem_id="chatbot", label=" Tikehau Capital Q&A chatbot", show_label=False)
-            state = gr.State([system_template])
-            with gr.Row():
-                ask = gr.Textbox(
-                    show_label=True,
-                    placeholder="Ask here your Tikehau-related question and press enter",
-                ).style(container=False)
-                #ask_examples_hidden = gr.Textbox(elem_id="hidden-message")
-            # examples_questions = gr.Examples(
-            #     [
-            #         "What is the AUM of Tikehau in 2022?",
-            #     ],
-            #     [ask_examples_hidden],
-            #     examples_per_page=15,
-            #)
-        with gr.Column(scale=1, variant="panel"):
-            gr.Markdown("### Sources")
-            sources_textbox = gr.Markdown(show_label=False)
-    # dropdown_sources = gr.inputs.Dropdown(
-    #     ["IPCC", "IPBES", "ALL"],
-    #     default="ALL",
-    #     label="Select reports",
-    # )
-    dropdown_sources = gr.State(["All"])
-    ask.submit(
-        fn=chat,
-        inputs=[
-            user_id_state,
-            ask,
-            state,
-            dropdown_sources
-        ],
-        outputs=[chatbot, state, sources_textbox],
-    )
-    ask.submit(reset_textbox, [], [ask])
-    # ask_examples_hidden.change(
-    #     fn=chat,
-    #     inputs=[
-    #         user_id_state,
-    #         ask_examples_hidden,
-    #         state,
-    #         dropdown_sources
-    #     ],
-    #     outputs=[chatbot, state, sources_textbox],
-    # )
-    with gr.Row():
-        with gr.Column(scale=1):
-            gr.Markdown(
-                """
-<div class="warning-box">
-Version 0.1-beta - This tool is under active development
-</div>
-"""
-            )
-        with gr.Column(scale=1):
-            gr.Markdown("*Source : Tikehau Universal Registration Documents *")
-    gr.Markdown("## How to use TKO URD Q&A")
-    with gr.Row():
-        with gr.Column(scale=1):
-            gr.Markdown(
-                """
-    ### 💪 Getting started
-    - In the chatbot section, simply type your Tikehau-related question, answers will be provided  with references to relevant URDs.
-    """
-            )
-        with gr.Column(scale=1):
-            gr.Markdown(
-                """
-    ### ⚠️ Limitations
-    <div class="warning-box">
-    <ul>
-        <li>Please note that, like any AI, the model may occasionally generate an inaccurate or imprecise answer.</li>
-    </div>
-    """
-            )
-    gr.Markdown("## 🙏 Feedback and feature requests")
-    gr.Markdown(
-        """
-    ### Beta test
-    - Feedback welcome.
-    """
-    )
-    gr.Markdown(
-        """
-## 🛢️ Carbon Footprint
-Carbon emissions were measured during the development and inference process using CodeCarbon [https://github.com/mlco2/codecarbon](https://github.com/mlco2/codecarbon)
-| Phase | Description | Emissions | Source |
-| --- | --- | --- | --- |
-| Inference | API call to turbo-GPT | ~0.38gCO2e / call | https://medium.com/@chrispointon/the-carbon-footprint-of-chatgpt-e1bc14e4cc2a |
-Carbon Emissions are **relatively low but not negligible** compared to other usages: one question asked to ClimateQ&A is around 0.482gCO2e - equivalent to 2.2m by car (https://datagir.ademe.fr/apps/impact-co2/)
-Or around 2 to 4 times more than a typical Google search.
-</b>.
-"""
-    )
-    demo.queue(concurrency_count=16)
-demo.launch()