Spaces:
Sleeping
Sleeping
File size: 3,761 Bytes
c2d8087 98ae1f9 777ca73 98ae1f9 78cc306 c2d8087 78cc306 5455493 98ae1f9 777ca73 78cc306 777ca73 a0ec0f2 941234a 98ae1f9 777ca73 78cc306 c2d8087 98ae1f9 777ca73 78cc306 98ae1f9 777ca73 c2d8087 78cc306 98ae1f9 78cc306 c2d8087 78cc306 98ae1f9 78cc306 98ae1f9 78cc306 98ae1f9 78cc306 98ae1f9 78cc306 c2d8087 78cc306 c2d8087 78cc306 c2d8087 98ae1f9 78cc306 ee61e04 c2d8087 78cc306 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
import streamlit as st
import torch
import requests
import os
from transformers import AutoTokenizer, AutoModelForCausalLM
from huggingface_hub import login
# Load Hugging Face token from secrets
HF_TOKEN = os.getenv("Allie", None)
if HF_TOKEN:
login(HF_TOKEN)
# All available models
model_map = {
"FinGPT": {"id": "OpenFinAL/GPT2_FINGPT_QA", "local": True},
"InvestLM": {"id": "yixuantt/InvestLM-mistral-AWQ", "local": False},
"FinLLaMA": {"id": "us4/fin-llama3.1-8b", "local": False},
"FinanceConnect": {"id": "ceadar-ie/FinanceConnect-13B", "local": True},
"Sujet-Finance": {"id": "sujet-ai/Sujet-Finance-8B-v0.1", "local": True}
}
# Load local model
@st.cache_resource
def load_local_model(model_id):
tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=HF_TOKEN)
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.float32,
device_map="auto" if torch.cuda.is_available() else None,
use_auth_token=HF_TOKEN
)
return model, tokenizer
# Build discursive prompt
def build_prompt(user_question):
return (
"You are a helpful and knowledgeable financial assistant named FinGPT. "
"You explain financial terms and concepts clearly, with examples when useful.\n\n"
f"User: {user_question.strip()}\n"
"FinGPT:"
)
# Clean up repeated parts
def clean_output(output_text):
parts = output_text.split("FinGPT:")
return parts[-1].strip() if len(parts) > 1 else output_text.strip()
# Local inference
def query_local_model(model_id, prompt):
model, tokenizer = load_local_model(model_id)
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(
**inputs,
max_new_tokens=200,
temperature=0.7,
top_k=50,
top_p=0.95,
repetition_penalty=1.2,
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id
)
raw_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
return clean_output(raw_output)
# Remote inference
def query_remote_model(model_id, prompt):
headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
payload = {"inputs": prompt, "parameters": {"max_new_tokens": 200}}
response = requests.post(
f"https://api-inference.huggingface.co/models/{model_id}",
headers=headers,
json=payload
)
if response.status_code == 200:
result = response.json()
return result[0]["generated_text"] if isinstance(result, list) else result.get("generated_text", "No output")
else:
raise RuntimeError(f"API Error: {response.status_code} — {response.text}")
# Unified query handler
def query_model(model_entry, user_question):
prompt = build_prompt(user_question)
if model_entry["local"]:
return query_local_model(model_entry["id"], prompt)
else:
return query_remote_model(model_entry["id"], prompt)
# Streamlit UI
st.set_page_config(page_title="Financial LLM Interface", layout="centered")
st.title("💼 Financial LLM Evaluation Interface")
model_choice = st.selectbox("Select a Financial Model", list(model_map.keys()))
user_question = st.text_area("Enter your financial question:", "What is CAP in finance?")
if st.button("Get Response"):
with st.spinner("Generating discursive response..."):
try:
model_entry = model_map[model_choice]
answer = query_model(model_entry, user_question)
st.markdown("### 🧠 Response:")
st.text_area("💬 Response from FinGPT:", value=answer, height=200, disabled=True)
except Exception as e:
st.error(f"❌ Error: {e}")
|