Spaces:

Jforeverss
/

finchat222

Runtime error

File size: 5,840 Bytes

import streamlit as st
import pdfplumber
import torch
from transformers import RobertaTokenizer, RobertaModel
import nltk
import openai
from torch import nn
import torch.nn.functional as F
from nltk.tokenize import sent_tokenize
import os

print(os.listdir('.'))

nltk.download('punkt')

class Bert_model(nn.Module):
    def __init__(self, hidden_size, dropout_rate):
        super(Bert_model, self).__init__()
        self.hidden_size = hidden_size
        self.bert = RobertaModel.from_pretrained('deepset/roberta-base-squad2')
        self.cls_prj = nn.Linear(hidden_size, hidden_size, bias=True)
        self.cls_dropout = nn.Dropout(dropout_rate)
        self.cls_final = nn.Linear(hidden_size, 2, bias=True)

    def forward(self, input_ids, attention_mask):
        bert_outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        bert_sequence_output = bert_outputs.last_hidden_state
        bert_pooled_output = bert_sequence_output[:, 0, :]
        pooled_output = self.cls_prj(bert_pooled_output)
        pooled_output = self.cls_dropout(pooled_output)
        logits = self.cls_final(pooled_output)
        return logits

model_path = "model.pt" 
state_dict = torch.load(model_path)
device = torch.device("cuda:0") 

model = Bert_model(hidden_size=768, dropout_rate=0.1) 
model = nn.DataParallel(model)
model.load_state_dict(state_dict)
model = model.to(device)
model.eval()

tokenizer = RobertaTokenizer.from_pretrained('deepset/roberta-base-squad2')


def preprocess_pdf(pdf_path, tokenizer):
    with pdfplumber.open(pdf_path) as pdf:
        text = " ".join([page.extract_text() for page in pdf.pages[2:]])
        tokenized_text = tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=512,
            padding='max_length',
            return_attention_mask=True
        )
        input_ids = torch.tensor([tokenized_text['input_ids']])
        attention_mask = torch.tensor([tokenized_text['attention_mask']])
        return input_ids, attention_mask, text

def translate_text(text, target_language):
    response = openai.ChatCompletion.create(
        model="gpt-4-1106-preview",
        messages=[
            {"role": "system", "content": "You are a helpful assistant that translates English text to other languages."},
            {"role": "user", "content": f'Translate the following English text to {target_language}: "{text}"'},
        ],
    )
    return response.choices[0].message['content']

def explain_term(term):
    response = openai.ChatCompletion.create(
        model="gpt-4-1106-preview",
        messages=[
            {
                "role": "system",
                "content": "You are a helpful assistant that provides definitions."
            },
            {
                "role": "user",
                "content": f"Explain the term: {term}"
            },
        ],
    )
    return response['choices'][0]['message']['content']

st.title('FinChat')

api_key = st.text_input("Enter your OpenAI API key:", type="password")

if api_key:
    try:
        openai.api_key = api_key

        openai.ChatCompletion.create(
            model="gpt-4-1106-preview",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": "Hello"},
            ],
        )

        st.success("API key is valid!")
        
    except Exception as e:
        st.error(f"Failed to validate API key: {e}")
else:
    st.warning("Please enter your OpenAI API key.")

uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")

language = st.selectbox('Select your language', ['English', 'French','Chinese','Korean','Spanish','German','Japanese'])

if uploaded_file is not None:
    with open("temp.pdf", "wb") as f:
        f.write(uploaded_file.getbuffer())
    input_ids, attention_mask, text = preprocess_pdf("temp.pdf", tokenizer)
    st.write('File successfully uploaded and processed')

    question = st.text_input("Enter your question:")

    if question:
        sentences = sent_tokenize(text)
        predictions = []

        for sentence in sentences:
            inputs = tokenizer.encode_plus(question, sentence, return_tensors='pt', padding='max_length', truncation=True, max_length=512)
            input_ids = inputs['input_ids'].to(device)
            attention_mask = inputs['attention_mask'].to(device)

            with torch.no_grad():
                outputs = model(input_ids, attention_mask)
                probabilities = F.softmax(outputs, dim=1)
                max_value, max_index = torch.max(probabilities, dim=1)
                prediction = max_index.item()
                predictions.append((sentence, prediction, probabilities[0].tolist()))

        predictions.sort(key=lambda pair: pair[1], reverse=True)
        top_5_sentences = predictions[:13]

        chat_history = "\n".join([sentence[0] for sentence in top_5_sentences])

        response = openai.ChatCompletion.create(
            model="gpt-4-1106-preview",
            messages=[
                {"role": "system", "content": "You are a helpful generator which read the short paragraphs and answer the question."},
                {"role": "user", "content": chat_history},
                {"role": "user", "content": question},
            ]
        )

        if language != 'English':
            response_content = translate_text(response.choices[0].message['content'], language)
        else:
            response_content = response.choices[0].message['content']

        st.text("Answer: " + response_content)

term = st.text_input("Enter a term you want to define:")
 
if term:
    definition = explain_term(term)

    if language != 'English':
        definition = translate_text(definition, language)

    st.text("Definition: " + definition)