File size: 7,012 Bytes
3aafe68
e65b516
27d2634
81e998f
330fc4f
b5b8672
330fc4f
0eb710b
3358b89
571e560
 
 
 
 
 
 
 
 
1836de9
 
 
330fc4f
 
1836de9
330fc4f
81e998f
330fc4f
b5b8672
1836de9
 
e65b516
1836de9
e65b516
1836de9
e65b516
81e998f
9e65977
b5b8672
a454488
 
 
1836de9
330fc4f
e65b516
 
 
1836de9
27d2634
330fc4f
 
 
 
 
b5b8672
1836de9
330fc4f
 
b5b8672
1836de9
330fc4f
 
ac19c17
 
 
 
 
 
 
 
 
 
 
 
 
330fc4f
1836de9
330fc4f
1836de9
026c97a
1836de9
 
 
 
e65b516
 
 
 
 
1836de9
e65b516
 
 
 
 
 
1836de9
e65b516
 
 
 
 
1836de9
 
e65b516
 
1836de9
026c97a
 
 
330fc4f
1836de9
e65b516
ac19c17
 
 
 
 
 
 
 
 
 
 
e65b516
 
ac19c17
 
 
 
 
 
 
 
 
 
 
 
1836de9
ac19c17
1836de9
 
 
 
 
330fc4f
0373f3c
1836de9
e65b516
1836de9
026c97a
1836de9
026c97a
1836de9
026c97a
e65b516
1836de9
27b07a6
 
1836de9
a454488
0373f3c
330fc4f
27b07a6
0373f3c
1836de9
330fc4f
27b07a6
a454488
0eb710b
e65b516
1836de9
0373f3c
 
1836de9
0373f3c
 
 
 
1836de9
0eb710b
a454488
 
 
1836de9
 
 
a454488
1836de9
 
a454488
 
 
 
 
1836de9
0373f3c
 
1836de9
a941d96
 
330fc4f
1836de9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
from huggingface_hub import login
from threading import Thread
import PyPDF2
import pandas as pd
import torch
import time

# Check if 'peft' is installed
try:
    from peft import PeftModel, PeftConfig
except ImportError:
    raise ImportError(
        "The 'peft' library is required but not installed. "
        "Please install it using: `pip install peft`"
    )

# πŸ” Hardcoded Hugging Face Token
HF_TOKEN = HF_TOKEN  # Replace with your actual token

# Set page configuration
st.set_page_config(
    page_title="Assistente LGT | Angola",
    page_icon="πŸš€",
    layout="centered"
)

# Model base and options
BASE_MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"
MODEL_OPTIONS = {
    "Full Fine-Tuned": "amiguel/mistral-angolan-laborlaw",
    "LoRA Adapter": "amiguel/SmolLM2-360M-concise-reasoning-lora",
    "QLoRA Adapter": "amiguel/SmolLM2-360M-concise-reasoning-qlora"
}

st.title("πŸš€ Assistente LGT | Angola πŸš€")

USER_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/9904d9a0d445ab0488cf7395cb863cce7621d897/USER_AVATAR.png"
BOT_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/991f4c6e4e1dc7a8e24876ca5aae5228bcdb4dba/Ataliba_Avatar.jpg"

# Sidebar
with st.sidebar:
    st.header("Model Selection πŸ€–")
    model_type = st.selectbox("Choose Model Type", list(MODEL_OPTIONS.keys()), index=0)
    selected_model = MODEL_OPTIONS[model_type]

    st.header("Upload Documents πŸ“‚")
    uploaded_file = st.file_uploader(
        "Choose a PDF or XLSX file",
        type=["pdf", "xlsx"],
        label_visibility="collapsed"
    )

# Session state
if "messages" not in st.session_state:
    st.session_state.messages = []

# File processor
@st.cache_data
def process_file(uploaded_file):
    if uploaded_file is None:
        return ""
    
    try:
        if uploaded_file.type == "application/pdf":
            pdf_reader = PyPDF2.PdfReader(uploaded_file)
            return "\n".join([page.extract_text() for page in pdf_reader.pages])
        elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
            df = pd.read_excel(uploaded_file)
            return df.to_markdown()
    except Exception as e:
        st.error(f"πŸ“„ Error processing file: {str(e)}")
        return ""

# Model loader
@st.cache_resource
def load_model(model_type, selected_model):
    try:
        login(token=HF_TOKEN)

        tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME, token=HF_TOKEN)

        if model_type == "Full Fine-Tuned":
            model = AutoModelForCausalLM.from_pretrained(
                selected_model,
                torch_dtype=torch.bfloat16,
                device_map="auto",
                token=HF_TOKEN
            )
        else:
            base_model = AutoModelForCausalLM.from_pretrained(
                BASE_MODEL_NAME,
                torch_dtype=torch.bfloat16,
                device_map="auto",
                token=HF_TOKEN
            )
            model = PeftModel.from_pretrained(
                base_model,
                selected_model,
                torch_dtype=torch.bfloat16,
                is_trainable=False,
                token=HF_TOKEN
            )
        return model, tokenizer

    except Exception as e:
        st.error(f"πŸ€– Model loading failed: {str(e)}")
        return None

# Generation function
def generate_with_kv_cache(prompt, file_context, model, tokenizer, use_cache=True):
    full_prompt = f"Analyze this context:\n{file_context}\n\nQuestion: {prompt}\nAnswer:"
    
    streamer = TextIteratorStreamer(
        tokenizer, 
        skip_prompt=True, 
        skip_special_tokens=True
    )
    
    inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
    
    generation_kwargs = {
        "input_ids": inputs["input_ids"],
        "attention_mask": inputs["attention_mask"],
        "max_new_tokens": 1024,
        "temperature": 0.7,
        "top_p": 0.9,
        "repetition_penalty": 1.1,
        "do_sample": True,
        "use_cache": use_cache,
        "streamer": streamer
    }
    
    Thread(target=model.generate, kwargs=generation_kwargs).start()
    return streamer

# Display chat history
for message in st.session_state.messages:
    avatar = USER_AVATAR if message["role"] == "user" else BOT_AVATAR
    with st.chat_message(message["role"], avatar=avatar):
        st.markdown(message["content"])

# Prompt interaction
if prompt := st.chat_input("Ask your inspection question..."):

    # Load model if necessary
    if "model" not in st.session_state or st.session_state.get("model_type") != model_type:
        model_data = load_model(model_type, selected_model)
        if model_data is None:
            st.error("Failed to load model.")
            st.stop()

        st.session_state.model, st.session_state.tokenizer = model_data
        st.session_state.model_type = model_type

    model = st.session_state.model
    tokenizer = st.session_state.tokenizer

    with st.chat_message("user", avatar=USER_AVATAR):
        st.markdown(prompt)
    st.session_state.messages.append({"role": "user", "content": prompt})

    file_context = process_file(uploaded_file)

    if model and tokenizer:
        try:
            with st.chat_message("assistant", avatar=BOT_AVATAR):
                start_time = time.time()
                streamer = generate_with_kv_cache(prompt, file_context, model, tokenizer, use_cache=True)

                response_container = st.empty()
                full_response = ""

                for chunk in streamer:
                    cleaned_chunk = chunk.replace("<think>", "").replace("</think>", "").strip()
                    full_response += cleaned_chunk + " "
                    response_container.markdown(full_response + "β–Œ", unsafe_allow_html=True)

                end_time = time.time()
                input_tokens = len(tokenizer(prompt)["input_ids"])
                output_tokens = len(tokenizer(full_response)["input_ids"])
                speed = output_tokens / (end_time - start_time)

                input_cost = (input_tokens / 1_000_000) * 5
                output_cost = (output_tokens / 1_000_000) * 15
                total_cost_usd = input_cost + output_cost
                total_cost_aoa = total_cost_usd * 1160

                st.caption(
                    f"πŸ”‘ Input Tokens: {input_tokens} | Output Tokens: {output_tokens} | "
                    f"πŸ•’ Speed: {speed:.1f}t/s | πŸ’° Cost (USD): ${total_cost_usd:.4f} | "
                    f"πŸ’΅ Cost (AOA): {total_cost_aoa:.4f}"
                )

                response_container.markdown(full_response)
                st.session_state.messages.append({"role": "assistant", "content": full_response})

        except Exception as e:
            st.error(f"⚑ Generation error: {str(e)}")
    else:
        st.error("πŸ€– Model not loaded!")