File size: 2,341 Bytes
7df99d8
4043b0f
 
 
 
 
8191a3f
4043b0f
 
 
 
 
 
 
7df99d8
4043b0f
3a27847
 
4043b0f
 
 
 
 
 
 
 
 
7df99d8
4043b0f
 
7df99d8
a5ac299
4c798e0
 
770f7e9
9e99164
4043b0f
 
 
 
 
 
 
 
7da5893
4043b0f
 
 
 
 
 
 
 
7df99d8
4043b0f
 
 
 
 
 
 
 
 
 
f54e5c5
4043b0f
 
 
 
 
 
770f7e9
4043b0f
 
7df99d8
4043b0f
 
 
 
 
 
 
7df99d8
4043b0f
f54e5c5
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# Think Paraguayo

import os
import random
import time

os.system("wget https://huggingface.co/thinkPy/gua-a_v0.2-dpo_mistral-7b_GGUF/resolve/main/gua-a_v0.2-dpo_mistral-7b_q4_K_M.gguf -O model.gguf")

from llama_cpp import Llama
import gradio as gr
from ragatouille import RAGPretrainedModel
from llama_index.core import Document, SimpleDirectoryReader
from llama_index.core.node_parser import SentenceSplitter

max_seq_length = 256 

prompt = """Responde a preguntas de forma clara, amable, concisa y solamente en el lenguaje español, sobre el libro Ñande Ypykuéra.
Contexto
-------------------------
{}
-------------------------
### Pregunta:
{}
### Respuesta:
{}"""

llm = Llama(model_path="model.gguf",
            n_ctx=max_seq_length,
            n_threads=2)

DOC_PATH = "/home/user/app/index" 

RAG = RAGPretrainedModel.from_pretrained("AdrienB134/ColBERTv2.0-spanish-mmarcoES")
RAG = RAG.from_index(DOC_PATH, n_gpu=None)
RAG.search("init", None, k=1)

def reformat_rag(results_rag):
  if results_rag is not None:
    return [result["content"] for result in results_rag]
  else:
    return [""]

def chat_stream_completion(message, history):

    context = reformat_rag(RAG.search(message, None, k=1))
    context = " \n ".join(context)

    full_prompt = prompt.format(context,message,"")
    print(full_prompt)

    response = llm.create_completion(
        prompt=full_prompt,
        temperature=0.01,
        max_tokens=max_seq_length,
        stream=True
    )
    
    message_repl = ""
    for chunk in response:
      if len(chunk['choices'][0]["text"]) != 0:
        message_repl = message_repl + chunk['choices'][0]["text"]
      yield message_repl


def launcher():
    with gr.Blocks(css=css) as demo:
        gr.Markdown("# Think Paraguayo")
        gr.Markdown("## Conoce la cultura guaraní!!")

        with gr.Row(variant='panel'):
            with gr.Column(scale=1):
                gr.Image(value="think_paraguayo.jpeg", type="filepath", label="Imagen Estática")

            with gr.Column(scale=1):
                
                chatbot = gr.ChatInterface(
                    fn=chat_stream_completion,
                    retry_btn = None,
                    stop_btn = None,
                    undo_btn = None
                ).queue()

    demo.launch()

if __name__ == "__main__":
    launcher()