File size: 4,596 Bytes
e436366
 
 
b1191e1
e436366
 
 
 
 
 
 
 
b1191e1
e436366
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
from functools import cache
import os
import time
import gradio as gr
from langchain.llms import OpenAI
from langchain.chains.summarize import load_summarize_chain
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains.question_answering import load_qa_chain
import tiktoken


@cache
def tiktoken_len_builder(model_name):
    tokenizer = tiktoken.encoding_for_model(model_name)

    def token_len(text):
        tokens = tokenizer.encode(text, disallowed_special=())
        return len(tokens)

    return token_len


def split_documents(docs, length_function, chunk_size=400):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=20,
        length_function=length_function,
    )
    return text_splitter.split_documents(docs)


def summarize_docs(llm, docs):
    llm = OpenAI(temperature=temperature, openai_api_key=openai_api_key, model_name=model_name)
    chain = load_summarize_chain(llm, chain_type="map_reduce")
    return chain.run(docs)

class MdnaQA:
    def __init__(self, llm, docs):
        self.docs = docs
        self.chain = load_qa_chain(llm, chain_type="stuff")
        embeddings = OpenAIEmbeddings()
        self.docsearch = Chroma.from_documents(docs, embeddings)

    def ask(self, question):
        input_documents = self.docsearch.similarity_search(question)
        return self.chain.run(input_documents=input_documents, question=question)


filename = '2023-05-12_2023_q1_goog_mdna.txt'
loader = TextLoader(filename)
documents = loader.load()
model_name = "text-davinci-003"
tiktoken_len = tiktoken_len_builder(model_name)
docs = split_documents(documents, tiktoken_len)
tokens_sum = sum(tiktoken_len(d.page_content) for d in docs)

title = "Alphabet's Q1 2023 10-Q MD&A"

with gr.Blocks(title=title) as demo:
    gr.Markdown(f'# {title}')
    gr.Markdown('Video tutorial ')
    gr.Markdown('Link to a blog post the video tutorial')
    gr.Markdown("You can an API key [from OpenAI](https://platform.openai.com/account/api-keys)")
    openai_api_key = gr.Text(
        value=os.getenv("OPENAI_API_KEY"),
        type="password",
        label="OpenAI API key",
    )
    temperature = gr.Slider(0, 2, value=0, step=0.1, label="Temperature", info="adjusts a model's output from predictable to random")
    mdna = gr.State(docs)
    tokens_total = gr.Textbox(label="Total input tokens", value=tokens_sum, info='how many tokens will be spent on input / embeddings')
    with gr.Tabs(visible=True) as tabs:
        with gr.TabItem("Summary"):
            
            summarize = gr.Button("Summarize MD&A", variant='primary', info='On click you spent tokens on input, instructions and output')
            summary = gr.TextArea(label='Summary')

            def summarize_mdna(docs, api_key, temp):
                llm = OpenAI(temperature=temp, openai_api_key=api_key)
                # mdna_summary = summarize_docs(llm, docs)
                return 'HaHa'

            summarize.click(summarize_mdna, inputs=[mdna, openai_api_key, temperature], outputs=[summary])
        with gr.TabItem("QA with MD&A"):
            start_qa = gr.Button("Start QA with MD&A", variant='primary')
            chatbot = gr.Chatbot(label="QA with MD&A", visible=False)
            question = gr.Textbox(
                label="Your question", interactive=True, visible=False
            )
            qa_chat = gr.State()
            send = gr.Button("Ask question", variant='primary', visible=False)

            def start_chat(docs, openai_api_key):
                # qa_chat = MdnaQA(docs, openai_api_key)
                qa_chat = MDNAQAMock()
                return (
                    qa_chat,
                    gr.Textbox.update(visible=True),
                    gr.Textbox.update(visible=True),
                    gr.Button.update(visible=True)
                )

            start_qa.click(
                start_chat, [openai_api_key], [qa_chat, chatbot, question, send]
            )

            def respond(qa_chat, question, chat_history):
                answer = qa_chat.ask(question)
                chat_history.append((question, answer))
                time.sleep(3)
                return "", chat_history

            send.click(respond, [qa_chat, question, chatbot], [question, chatbot])
            question.submit(
                respond, [qa_chat, question, chatbot], [question, chatbot]
            )


demo.launch()