Spaces:

amiguel
/

amiguel-fintune_naming_model

Sleeping

File size: 7,012 Bytes

3aafe68
e65b516
27d2634
81e998f
330fc4f
b5b8672
330fc4f
0eb710b
3358b89
571e560
 
 
 
 
 
 
 
 
1836de9
 
 
330fc4f
 
1836de9
330fc4f
81e998f
330fc4f
b5b8672
1836de9
 
e65b516
1836de9
e65b516
1836de9
e65b516
81e998f
9e65977
b5b8672
a454488
 
 
1836de9
330fc4f
e65b516
 
 
1836de9
27d2634
330fc4f
 
 
 
 
b5b8672
1836de9
330fc4f
 
b5b8672
1836de9
330fc4f
 
ac19c17
 
 
 
 
 
 
 
 
 
 
 
 
330fc4f
1836de9
330fc4f
1836de9
026c97a
1836de9
 
 
 
e65b516
 
 
 
 
1836de9
e65b516
 
 
 
 
 
1836de9
e65b516
 
 
 
 
1836de9
 
e65b516
 
1836de9
026c97a
 
 
330fc4f
1836de9
e65b516
ac19c17
 
 
 
 
 
 
 
 
 
 
e65b516
 
ac19c17
 
 
 
 
 
 
 
 
 
 
 
1836de9
ac19c17
1836de9
 
 
 
 
330fc4f
0373f3c
1836de9
e65b516
1836de9
026c97a
1836de9
026c97a
1836de9
026c97a
e65b516
1836de9
27b07a6
 
1836de9
a454488
0373f3c
330fc4f
27b07a6
0373f3c
1836de9
330fc4f
27b07a6
a454488
0eb710b
e65b516
1836de9
0373f3c
 
1836de9
0373f3c
 
 
 
1836de9
0eb710b
a454488
 
 
1836de9
 
 
a454488
1836de9
 
a454488
 
 
 
 
1836de9
0373f3c
 
1836de9
a941d96
 
330fc4f
1836de9

import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
from huggingface_hub import login
from threading import Thread
import PyPDF2
import pandas as pd
import torch
import time

# Check if 'peft' is installed
try:
    from peft import PeftModel, PeftConfig
except ImportError:
    raise ImportError(
        "The 'peft' library is required but not installed. "
        "Please install it using: `pip install peft`"
    )

# 🔐 Hardcoded Hugging Face Token
HF_TOKEN = HF_TOKEN  # Replace with your actual token

# Set page configuration
st.set_page_config(
    page_title="Assistente LGT | Angola",
    page_icon="🚀",
    layout="centered"
)

# Model base and options
BASE_MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"
MODEL_OPTIONS = {
    "Full Fine-Tuned": "amiguel/mistral-angolan-laborlaw",
    "LoRA Adapter": "amiguel/SmolLM2-360M-concise-reasoning-lora",
    "QLoRA Adapter": "amiguel/SmolLM2-360M-concise-reasoning-qlora"
}

st.title("🚀 Assistente LGT | Angola 🚀")

USER_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/9904d9a0d445ab0488cf7395cb863cce7621d897/USER_AVATAR.png"
BOT_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/991f4c6e4e1dc7a8e24876ca5aae5228bcdb4dba/Ataliba_Avatar.jpg"

# Sidebar
with st.sidebar:
    st.header("Model Selection 🤖")
    model_type = st.selectbox("Choose Model Type", list(MODEL_OPTIONS.keys()), index=0)
    selected_model = MODEL_OPTIONS[model_type]

    st.header("Upload Documents 📂")
    uploaded_file = st.file_uploader(
        "Choose a PDF or XLSX file",
        type=["pdf", "xlsx"],
        label_visibility="collapsed"
    )

# Session state
if "messages" not in st.session_state:
    st.session_state.messages = []

# File processor
@st.cache_data
def process_file(uploaded_file):
    if uploaded_file is None:
        return ""
    
    try:
        if uploaded_file.type == "application/pdf":
            pdf_reader = PyPDF2.PdfReader(uploaded_file)
            return "\n".join([page.extract_text() for page in pdf_reader.pages])
        elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
            df = pd.read_excel(uploaded_file)
            return df.to_markdown()
    except Exception as e:
        st.error(f"📄 Error processing file: {str(e)}")
        return ""

# Model loader
@st.cache_resource
def load_model(model_type, selected_model):
    try:
        login(token=HF_TOKEN)

        tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME, token=HF_TOKEN)

        if model_type == "Full Fine-Tuned":
            model = AutoModelForCausalLM.from_pretrained(
                selected_model,
                torch_dtype=torch.bfloat16,
                device_map="auto",
                token=HF_TOKEN
            )
        else:
            base_model = AutoModelForCausalLM.from_pretrained(
                BASE_MODEL_NAME,
                torch_dtype=torch.bfloat16,
                device_map="auto",
                token=HF_TOKEN
            )
            model = PeftModel.from_pretrained(
                base_model,
                selected_model,
                torch_dtype=torch.bfloat16,
                is_trainable=False,
                token=HF_TOKEN
            )
        return model, tokenizer

    except Exception as e:
        st.error(f"🤖 Model loading failed: {str(e)}")
        return None

# Generation function
def generate_with_kv_cache(prompt, file_context, model, tokenizer, use_cache=True):
    full_prompt = f"Analyze this context:\n{file_context}\n\nQuestion: {prompt}\nAnswer:"
    
    streamer = TextIteratorStreamer(
        tokenizer, 
        skip_prompt=True, 
        skip_special_tokens=True
    )
    
    inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
    
    generation_kwargs = {
        "input_ids": inputs["input_ids"],
        "attention_mask": inputs["attention_mask"],
        "max_new_tokens": 1024,
        "temperature": 0.7,
        "top_p": 0.9,
        "repetition_penalty": 1.1,
        "do_sample": True,
        "use_cache": use_cache,
        "streamer": streamer
    }
    
    Thread(target=model.generate, kwargs=generation_kwargs).start()
    return streamer

# Display chat history
for message in st.session_state.messages:
    avatar = USER_AVATAR if message["role"] == "user" else BOT_AVATAR
    with st.chat_message(message["role"], avatar=avatar):
        st.markdown(message["content"])

# Prompt interaction
if prompt := st.chat_input("Ask your inspection question..."):

    # Load model if necessary
    if "model" not in st.session_state or st.session_state.get("model_type") != model_type:
        model_data = load_model(model_type, selected_model)
        if model_data is None:
            st.error("Failed to load model.")
            st.stop()

        st.session_state.model, st.session_state.tokenizer = model_data
        st.session_state.model_type = model_type

    model = st.session_state.model
    tokenizer = st.session_state.tokenizer

    with st.chat_message("user", avatar=USER_AVATAR):
        st.markdown(prompt)
    st.session_state.messages.append({"role": "user", "content": prompt})

    file_context = process_file(uploaded_file)

    if model and tokenizer:
        try:
            with st.chat_message("assistant", avatar=BOT_AVATAR):
                start_time = time.time()
                streamer = generate_with_kv_cache(prompt, file_context, model, tokenizer, use_cache=True)

                response_container = st.empty()
                full_response = ""

                for chunk in streamer:
                    cleaned_chunk = chunk.replace("<think>", "").replace("</think>", "").strip()
                    full_response += cleaned_chunk + " "
                    response_container.markdown(full_response + "▌", unsafe_allow_html=True)

                end_time = time.time()
                input_tokens = len(tokenizer(prompt)["input_ids"])
                output_tokens = len(tokenizer(full_response)["input_ids"])
                speed = output_tokens / (end_time - start_time)

                input_cost = (input_tokens / 1_000_000) * 5
                output_cost = (output_tokens / 1_000_000) * 15
                total_cost_usd = input_cost + output_cost
                total_cost_aoa = total_cost_usd * 1160

                st.caption(
                    f"🔑 Input Tokens: {input_tokens} | Output Tokens: {output_tokens} | "
                    f"🕒 Speed: {speed:.1f}t/s | 💰 Cost (USD): ${total_cost_usd:.4f} | "
                    f"💵 Cost (AOA): {total_cost_aoa:.4f}"
                )

                response_container.markdown(full_response)
                st.session_state.messages.append({"role": "assistant", "content": full_response})

        except Exception as e:
            st.error(f"⚡ Generation error: {str(e)}")
    else:
        st.error("🤖 Model not loaded!")