File size: 3,761 Bytes
c2d8087
 
98ae1f9
777ca73
98ae1f9
78cc306
c2d8087
78cc306
5455493
98ae1f9
 
777ca73
78cc306
777ca73
a0ec0f2
941234a
 
98ae1f9
 
777ca73
 
78cc306
c2d8087
98ae1f9
 
777ca73
 
78cc306
98ae1f9
 
777ca73
c2d8087
 
78cc306
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98ae1f9
 
 
78cc306
 
 
 
 
 
 
 
 
 
 
 
 
c2d8087
78cc306
98ae1f9
 
78cc306
98ae1f9
 
 
 
 
 
 
 
 
78cc306
98ae1f9
78cc306
 
 
98ae1f9
 
 
 
 
78cc306
 
c2d8087
 
 
78cc306
c2d8087
 
78cc306
c2d8087
98ae1f9
 
78cc306
ee61e04
 
c2d8087
78cc306
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import streamlit as st
import torch
import requests
import os
from transformers import AutoTokenizer, AutoModelForCausalLM
from huggingface_hub import login

# Load Hugging Face token from secrets
HF_TOKEN = os.getenv("Allie", None)
if HF_TOKEN:
    login(HF_TOKEN)

# All available models
model_map = {
    "FinGPT": {"id": "OpenFinAL/GPT2_FINGPT_QA", "local": True},
    "InvestLM": {"id": "yixuantt/InvestLM-mistral-AWQ", "local": False},
    "FinLLaMA": {"id": "us4/fin-llama3.1-8b", "local": False},
    "FinanceConnect": {"id": "ceadar-ie/FinanceConnect-13B", "local": True},
    "Sujet-Finance": {"id": "sujet-ai/Sujet-Finance-8B-v0.1", "local": True}
}

# Load local model
@st.cache_resource
def load_local_model(model_id):
    tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=HF_TOKEN)
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        torch_dtype=torch.float32,
        device_map="auto" if torch.cuda.is_available() else None,
        use_auth_token=HF_TOKEN
    )
    return model, tokenizer

# Build discursive prompt
def build_prompt(user_question):
    return (
        "You are a helpful and knowledgeable financial assistant named FinGPT. "
        "You explain financial terms and concepts clearly, with examples when useful.\n\n"
        f"User: {user_question.strip()}\n"
        "FinGPT:"
    )

# Clean up repeated parts
def clean_output(output_text):
    parts = output_text.split("FinGPT:")
    return parts[-1].strip() if len(parts) > 1 else output_text.strip()

# Local inference
def query_local_model(model_id, prompt):
    model, tokenizer = load_local_model(model_id)
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=200,
        temperature=0.7,
        top_k=50,
        top_p=0.95,
        repetition_penalty=1.2,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id,
        eos_token_id=tokenizer.eos_token_id
    )
    raw_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return clean_output(raw_output)

# Remote inference
def query_remote_model(model_id, prompt):
    headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
    payload = {"inputs": prompt, "parameters": {"max_new_tokens": 200}}
    response = requests.post(
        f"https://api-inference.huggingface.co/models/{model_id}",
        headers=headers,
        json=payload
    )
    if response.status_code == 200:
        result = response.json()
        return result[0]["generated_text"] if isinstance(result, list) else result.get("generated_text", "No output")
    else:
        raise RuntimeError(f"API Error: {response.status_code}{response.text}")

# Unified query handler
def query_model(model_entry, user_question):
    prompt = build_prompt(user_question)
    if model_entry["local"]:
        return query_local_model(model_entry["id"], prompt)
    else:
        return query_remote_model(model_entry["id"], prompt)

# Streamlit UI
st.set_page_config(page_title="Financial LLM Interface", layout="centered")
st.title("💼 Financial LLM Evaluation Interface")

model_choice = st.selectbox("Select a Financial Model", list(model_map.keys()))
user_question = st.text_area("Enter your financial question:", "What is CAP in finance?")

if st.button("Get Response"):
    with st.spinner("Generating discursive response..."):
        try:
            model_entry = model_map[model_choice]
            answer = query_model(model_entry, user_question)
            st.markdown("### 🧠 Response:")
            st.text_area("💬 Response from FinGPT:", value=answer, height=200, disabled=True)

        except Exception as e:
            st.error(f"❌ Error: {e}")