File size: 2,641 Bytes
7df99d8
4043b0f
 
 
 
 
9660f99
4043b0f
 
 
 
 
 
 
7df99d8
4043b0f
9660f99
3a27847
6ec80d0
4043b0f
6ec80d0
4043b0f
 
 
 
 
 
7df99d8
4043b0f
 
7df99d8
a5ac299
4c798e0
 
770f7e9
9e99164
4043b0f
 
 
 
 
 
 
 
7da5893
4043b0f
 
 
 
 
 
 
3d3a459
7df99d8
4043b0f
 
 
 
 
 
 
 
 
11f2b0f
 
 
 
 
 
 
 
 
 
 
 
4043b0f
f54e5c5
4043b0f
 
11f2b0f
4043b0f
 
 
11f2b0f
4043b0f
 
7df99d8
4043b0f
 
 
 
 
 
 
7df99d8
4043b0f
f54e5c5
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# Think Paraguayo

import os
import random
import time

os.system("wget https://huggingface.co/thinkPy/gua-a_v0.1-ft_mistral-7b_GGUF/resolve/main/gua-a_v0.1-ft_mistral-7b_q4_K_M.gguf -O model.gguf")

from llama_cpp import Llama
import gradio as gr
from ragatouille import RAGPretrainedModel
from llama_index.core import Document, SimpleDirectoryReader
from llama_index.core.node_parser import SentenceSplitter

max_seq_length = 256 

prompt = """Eres gua-a un modelo de lenguaje entrenado para responder preguntas sobre la cultura guaraní, debes responder de forma clara, amable, concisa y solamente en el lenguaje español, si encuentras las respuesta en este prompt puedes copiarla.
Contexto
--------------------------
{}
--------------------------
### Pregunta:
{}
### Respuesta:
{}"""

llm = Llama(model_path="model.gguf",
            n_ctx=max_seq_length,
            n_threads=2)

DOC_PATH = "/home/user/app/index" 

RAG = RAGPretrainedModel.from_pretrained("AdrienB134/ColBERTv2.0-spanish-mmarcoES")
RAG = RAG.from_index(DOC_PATH, n_gpu=None)
RAG.search("init", None, k=1)

def reformat_rag(results_rag):
  if results_rag is not None:
    return [result["content"] for result in results_rag]
  else:
    return [""]

def chat_stream_completion(message, history):

    context = reformat_rag(RAG.search(message, None, k=1))
    context = " \n ".join(context)

    full_prompt = prompt.format(context,message,"")
    print(full_prompt)

    response = llm.create_completion(
        prompt=full_prompt,
        temperature=0.1,
        max_tokens=max_seq_length,
        stream=True
    )
    
    message_repl = ""
    for chunk in response:
      if len(chunk['choices'][0]["text"]) != 0:
        message_repl = message_repl + chunk['choices'][0]["text"]
      yield message_repl

css = """
    h1 {
        font-size: 32px;
        text-align: center;
    }
    h2 {
        text-align: center;
    }
    img {
        height: 750px;  /* Reducing the image height */
    }
    """

def launcher():
    with gr.Blocks(css=css) as demo:
        gr.Markdown("# Think Paraguayo")
        gr.Markdown("## Conoce la cultura guaraní.")

        with gr.Row(variant='panel'):
            with gr.Column(scale=1):
                gr.Image(value="think_paraguayo.jpeg", type="filepath", label="")

            with gr.Column(scale=1):
                
                chatbot = gr.ChatInterface(
                    fn=chat_stream_completion,
                    retry_btn = None,
                    stop_btn = None,
                    undo_btn = None
                ).queue()

    demo.launch()

if __name__ == "__main__":
    launcher()