File size: 3,667 Bytes
5c46efb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10f7f3f
5c46efb
 
 
 
 
 
33ca1a1
10f7f3f
ba771c2
 
10f7f3f
5d2ee3e
33ca1a1
 
 
 
 
 
 
 
 
10f7f3f
 
 
33e91c3
5c46efb
10f7f3f
 
5c46efb
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import argparse
from dataclasses import asdict
import json
import os
import streamlit as st
from datasets import load_dataset

from data_driven_characters.character import get_character_definition
from data_driven_characters.corpus import (
    get_corpus_summaries,
    load_docs,
)

from data_driven_characters.chatbots import (
    SummaryChatBot,
    RetrievalChatBot,
    SummaryRetrievalChatBot,
)
from data_driven_characters.interfaces import CommandLine, Streamlit


OUTPUT_ROOT = "output"






def create_chatbot(corpus, character_name, chatbot_type, retrieval_docs, summary_type):
    # logging
    corpus_name = os.path.splitext(os.path.basename(corpus))[0]
    output_dir = f"{OUTPUT_ROOT}/{corpus_name}/summarytype_{summary_type}"
    ####  corpus é fixo do Dov Tzamir, carregado em main()
    ####  
    os.makedirs(output_dir, exist_ok=True)
    summaries_dir = f"{output_dir}/summaries"
    character_definitions_dir = f"{output_dir}/character_definitions"
    os.makedirs(character_definitions_dir, exist_ok=True)

    # load docs
    docs = load_docs(corpus_path=corpus, chunk_size=2048, chunk_overlap=64)

    # generate summaries
    corpus_summaries = get_corpus_summaries(
        docs=docs, summary_type=summary_type, cache_dir=summaries_dir
    )

    # get character definition
    character_definition = get_character_definition(
        name=character_name,
        corpus_summaries=corpus_summaries,
        cache_dir=character_definitions_dir,
    )
    print(json.dumps(asdict(character_definition), indent=4))

    # construct retrieval documents
    if retrieval_docs == "raw":
        documents = [
            doc.page_content
            for doc in load_docs(corpus_path=corpus, chunk_size=256, chunk_overlap=16)
        ]
    elif retrieval_docs == "summarized":
        documents = corpus_summaries
    else:
        raise ValueError(f"Unknown retrieval docs type: {retrieval_docs}")

    # initialize chatbot
    if chatbot_type == "summary":
        chatbot = SummaryChatBot(character_definition=character_definition)
    elif chatbot_type == "retrieval":
        chatbot = RetrievalChatBot(
            character_definition=character_definition,
            documents=documents,
        )
    elif chatbot_type == "summary_retrieval":
        chatbot = SummaryRetrievalChatBot(
            character_definition=character_definition,
            documents=documents,
        )
    else:
        raise ValueError(f"Unknown chatbot type: {chatbot_type}")
    exit
    return chatbot


##  python -m streamlit run chat_dov.py -- --corpus data/tzamir.txt --character_name Dov --chatbot_type retrieval --retrieval_docs raw --interface streamlit

def main():

    # parametros fixos para Dov Tzamir, arquivos ja processados , exceto indice que são em memoria
    st.header("Converse com o avatar do Dov Tzamir")  # antes era title
    st.subheader("Baseado no texto do livro Fragmentos de Memória do Tito")  # antes era write
    st.write(" ")
    st.image("data/Dov_foto_peq.jpg")

    chatbot = st.cache_resource(create_chatbot)(
        "data/tzamir.txt",    #args.corpus,  
        "Dov",               #args.character_name,
        "retrieval",         #args.chatbot_type,
        "raw",               #args.retrieval_docs,
        "map_reduce",        #args.summary_type,
    )

    st.write(" ")
    st.write("Digite o seu diálogo aqui finalizando a linha com ENTER")
    st.write("Voce pode continuar o diálogo, apagando sua perguntanda anterior e  digitando aqui novamente")
    openai_api_key = os.environ["OPENAI_API_KEY"] 


    app = Streamlit(chatbot=chatbot)
    app.run()


if __name__ == "__main__":
    main()