File size: 7,253 Bytes
3145713
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fe620e6
3145713
fe620e6
3145713
 
 
 
fe620e6
3145713
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
import streamlit as st
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TextIteratorStreamer,
    AutoConfig
)
from huggingface_hub import login
from threading import Thread
import PyPDF2
import pandas as pd
import torch
import time
import os

# Check if 'peft' is installed
try:
    from peft import PeftModel, PeftConfig
except ImportError:
    raise ImportError(
        "The 'peft' library is required but not installed. "
        "Please install it using: `pip install peft`"
    )

# πŸ” Hugging Face Token via Environment Variable
HF_TOKEN = os.environ.get("HF_TOKEN")
if not HF_TOKEN:
    raise ValueError("Missing Hugging Face Token. Please set the HF_TOKEN environment variable.")

# πŸŽ› Model base and adapters
BASE_MODEL_NAME = "unicamp-dl/ptt5-base-portuguese-vocab" #"neuralmind/bert-base-portuguese-cased" #"pierreguillou/gpt2-small-portuguese" # #"mistralai/Mistral-7B-Instruct-v0.2"
MODEL_OPTIONS = {
    "Full Fine-Tuned": "amiguel/mistral-angolan-laborlaw-ptt5" #"amiguel/mistral-angolan-laborlaw-bert-base-pt", #"amiguel/mistral-angolan-laborlaw-gpt2",#, #"amiguel/mistral-angolan-laborlaw",
    "LoRA Adapter": "amiguel/SmolLM2-360M-concise-reasoning-lora",
    "QLoRA Adapter": "amiguel/SmolLM2-360M-concise-reasoning-qlora"
}


# πŸ–Ό UI Setup
st.set_page_config(page_title="Assistente LGT | Angola", page_icon="πŸš€", layout="centered")
st.title("πŸš€ Assistente LGT | Angola πŸš€")

USER_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/9904d9a0d445ab0488cf7395cb863cce7621d897/USER_AVATAR.png"
BOT_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/991f4c6e4e1dc7a8e24876ca5aae5228bcdb4dba/Ataliba_Avatar.jpg"

# Sidebar
with st.sidebar:
    st.header("Model Selection πŸ€–")
    model_type = st.selectbox("Choose Model Type", list(MODEL_OPTIONS.keys()), index=0)
    selected_model = MODEL_OPTIONS[model_type]

    st.header("Upload Documents πŸ“‚")
    uploaded_file = st.file_uploader("Choose a PDF or XLSX file", type=["pdf", "xlsx"], label_visibility="collapsed")

# Chat memory
if "messages" not in st.session_state:
    st.session_state.messages = []

# πŸ” File processing
@st.cache_data
def process_file(uploaded_file):
    if uploaded_file is None:
        return ""
    try:
        if uploaded_file.type == "application/pdf":
            reader = PyPDF2.PdfReader(uploaded_file)
            return "\n".join(page.extract_text() or "" for page in reader.pages)
        elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
            df = pd.read_excel(uploaded_file)
            return df.to_markdown()
    except Exception as e:
        st.error(f"πŸ“„ Error processing file: {str(e)}")
        return ""

# 🧠 Load model and tokenizer
@st.cache_resource
def load_model(model_type, selected_model):
    try:
        login(token=HF_TOKEN)
        device = "cuda" if torch.cuda.is_available() else "cpu"
        dtype = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float32

        tokenizer = AutoTokenizer.from_pretrained(selected_model, token=HF_TOKEN)

        if model_type == "Full Fine-Tuned":
            model = AutoModelForCausalLM.from_pretrained(
                selected_model,
                device_map="auto",
                torch_dtype=dtype,
                token=HF_TOKEN
            )
        else:
            base_model = AutoModelForCausalLM.from_pretrained(
                BASE_MODEL_NAME,
                device_map="auto",
                torch_dtype=dtype,
                token=HF_TOKEN
            )
            model = PeftModel.from_pretrained(
                base_model,
                selected_model,
                is_trainable=False,
                torch_dtype=dtype,
                token=HF_TOKEN
            )
        return model, tokenizer
    except Exception as e:
        st.error(f"πŸ€– Model loading failed: {str(e)}")
        return None, None

# πŸš€ Generate response
def generate_with_streaming(prompt, file_context, model, tokenizer):
    full_prompt = f"Analisa este contexto:\n{file_context}\n\nPergunta: {prompt}\nResposta:"

    inputs = tokenizer(full_prompt, return_tensors="pt")
    inputs = {k: v.to(model.device) for k, v in inputs.items()}

    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
    gen_kwargs = {
        "input_ids": inputs["input_ids"],
        "attention_mask": inputs["attention_mask"],
        "max_new_tokens": 1024,
        "temperature": 0.7,
        "top_p": 0.9,
        "repetition_penalty": 1.1,
        "do_sample": True,
        "use_cache": True,
        "streamer": streamer
    }

    Thread(target=model.generate, kwargs=gen_kwargs).start()
    return streamer

# 🧾 Display chat history
for msg in st.session_state.messages:
    avatar = USER_AVATAR if msg["role"] == "user" else BOT_AVATAR
    with st.chat_message(msg["role"], avatar=avatar):
        st.markdown(msg["content"])

# πŸ”Ž Main interaction loop
if prompt := st.chat_input("Pergunta sobre a LGT?"):
    # Display user message
    with st.chat_message("user", avatar=USER_AVATAR):
        st.markdown(prompt)
    st.session_state.messages.append({"role": "user", "content": prompt})

    # Load model if needed
    if "model" not in st.session_state or st.session_state.get("model_type") != model_type:
        with st.spinner("πŸ”„ A carregar modelo..."):
            model, tokenizer = load_model(model_type, selected_model)
            if not model:
                st.stop()
            st.session_state.model = model
            st.session_state.tokenizer = tokenizer
            st.session_state.model_type = model_type
    else:
        model = st.session_state.model
        tokenizer = st.session_state.tokenizer

    # Prepare context
    file_context = process_file(uploaded_file) or "Sem contexto adicional disponΓ­vel."

    # Generate assistant response
    with st.chat_message("assistant", avatar=BOT_AVATAR):
        response_box = st.empty()
        full_response = ""
        try:
            start_time = time.time()
            streamer = generate_with_streaming(prompt, file_context, model, tokenizer)

            for chunk in streamer:
                full_response += chunk.strip() + " "
                response_box.markdown(full_response + "β–Œ", unsafe_allow_html=True)

            # Token and speed metrics
            end_time = time.time()
            input_tokens = len(tokenizer(prompt)["input_ids"])
            output_tokens = len(tokenizer(full_response)["input_ids"])
            speed = output_tokens / (end_time - start_time)
            cost_usd = ((input_tokens / 1e6) * 5) + ((output_tokens / 1e6) * 15)
            cost_aoa = cost_usd * 1160

            st.caption(
                f"πŸ”‘ Input Tokens: {input_tokens} | Output Tokens: {output_tokens} | "
                f"πŸ•’ Speed: {speed:.1f}t/s | πŸ’° USD: ${cost_usd:.4f} | πŸ‡¦πŸ‡΄ AOA: {cost_aoa:.2f}"
            )

            response_box.markdown(full_response.strip())
            st.session_state.messages.append({"role": "assistant", "content": full_response.strip()})

        except Exception as e:
            st.error(f"⚑ Erro ao gerar resposta: {str(e)}")