Spaces:

Jforeverss
/

finchat222

Runtime error

App Files Files

Jforeverss commited on Oct 24, 2023

Commit

9991ce4

1 Parent(s): f74b624

Create demo123

Browse files

Files changed (1) hide show

demo123 +173 -0

demo123 ADDED Viewed

	@@ -0,0 +1,173 @@

+# Install Streamlit and pyngrok
+!pip install -q streamlit
+!pip install -q pyngrok
+!pip install -q pdfplumber
+!pip install -q transformers
+!pip install -q tabula-py
+!pip install -q openai
+# Write the Streamlit app script
+# Write the Streamlit app script
+%%writefile app.py
+import streamlit as st
+import pdfplumber
+import torch
+from transformers import RobertaTokenizer, RobertaModel
+import nltk
+import openai
+from torch import nn
+import torch.nn.functional as F
+from nltk.tokenize import sent_tokenize
+# Download the 'punkt' package
+nltk.download('punkt')
+openai.api_key = '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'
+# Define your model architecture
+class Bert_model(nn.Module):
+    def __init__(self, hidden_size, dropout_rate):
+        super(Bert_model, self).__init__()
+        self.hidden_size = hidden_size
+        self.bert = RobertaModel.from_pretrained('deepset/roberta-base-squad2')
+        self.cls_prj = nn.Linear(hidden_size, hidden_size, bias=True)
+        self.cls_dropout = nn.Dropout(dropout_rate)
+        self.cls_final = nn.Linear(hidden_size, 2, bias=True)
+    def forward(self, input_ids, attention_mask):
+        bert_outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
+        bert_sequence_output = bert_outputs.last_hidden_state
+        bert_pooled_output = bert_sequence_output[:, 0, :]
+        pooled_output = self.cls_prj(bert_pooled_output)
+        pooled_output = self.cls_dropout(pooled_output)
+        logits = self.cls_final(pooled_output)
+        return logits
+# Load the model
+model_path = "/content/model.pt"  # Replace with your actual model path
+state_dict = torch.load(model_path)
+device = "cuda"  # or "cpu" if GPU is not available
+# Instantiate the model architecture
+model = Bert_model(hidden_size=768, dropout_rate=0.1)  # Adjust the hidden size to match the saved model
+model = nn.DataParallel(model)
+model.load_state_dict(state_dict)
+model = model.to(device)
+model.eval()
+# Load the tokenizer
+tokenizer = RobertaTokenizer.from_pretrained('deepset/roberta-base-squad2')
+# Function to preprocess PDF text
+def preprocess_pdf(pdf_path, tokenizer):
+    with pdfplumber.open(pdf_path) as pdf:
+        text = " ".join([page.extract_text() for page in pdf.pages[2:]])
+        tokenized_text = tokenizer.encode_plus(
+            text,
+            add_special_tokens=True,
+            max_length=512,
+            padding='max_length',
+            return_attention_mask=True
+        )
+        input_ids = torch.tensor([tokenized_text['input_ids']])
+        attention_mask = torch.tensor([tokenized_text['attention_mask']])
+        return input_ids, attention_mask, text
+def translate_text(text, target_language):
+    response = openai.ChatCompletion.create(
+        model="gpt-4",
+        messages=[
+            {"role": "system", "content": "You are a helpful assistant that translates English text to other languages."},
+            {"role": "user", "content": f'Translate the following English text to {target_language}: "{text}"'},
+        ],
+    )
+    return response.choices[0].message['content']
+def explain_term(term):
+    response = openai.ChatCompletion.create(
+        model="gpt-4",
+        messages=[
+            {
+                "role": "system",
+                "content": "You are a helpful assistant that provides definitions."
+            },
+            {
+                "role": "user",
+                "content": f"Explain the term: {term}"
+            },
+        ],
+    )
+    return response['choices'][0]['message']['content']
+# Streamlit code to upload file
+st.title('FinQA (Financial Question-Answering)')
+uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
+# Select language
+language = st.selectbox('Select your language', ['English', 'French','Chinese','Korean','Spanish','German','Japanese'])
+if uploaded_file is not None:
+    with open("temp.pdf", "wb") as f:
+        f.write(uploaded_file.getbuffer())
+    input_ids, attention_mask, text = preprocess_pdf("temp.pdf", tokenizer)
+    st.write('File successfully uploaded and processed')
+    # Ask a question
+    question = st.text_input("Enter your question:")
+    if question:
+        sentences = sent_tokenize(text)
+        predictions = []
+        for sentence in sentences:
+            inputs = tokenizer.encode_plus(question, sentence, return_tensors='pt', padding='max_length', truncation=True, max_length=512)
+            input_ids = inputs['input_ids'].to(device)
+            attention_mask = inputs['attention_mask'].to(device)
+            with torch.no_grad():
+                outputs = model(input_ids, attention_mask)
+                probabilities = F.softmax(outputs, dim=1)
+                max_value, max_index = torch.max(probabilities, dim=1)
+                prediction = max_index.item()
+                predictions.append((sentence, prediction, probabilities[0].tolist()))
+        predictions.sort(key=lambda pair: pair[1], reverse=True)
+        top_5_sentences = predictions[:13]
+        #st.write("Top 5 Relevant Sentences:")
+        #for sentence, prediction, probabilities in top_5_sentences:
+            #st.write(f"Sentence: {sentence}, Prediction: {prediction}, Probability: {probabilities[prediction]}")
+        # Prepare the chat history with the top 3 sentences
+        chat_history = "\n".join([sentence[0] for sentence in top_5_sentences])
+        # Ask the question using OpenAI API
+        openai.api_key = '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'  # Replace with your actual OpenAI API key
+        response = openai.ChatCompletion.create(
+            model="gpt-4",
+            messages=[
+                {"role": "system", "content": "You are a helpful generator which read the short paragraphs and answer the question."},
+                {"role": "user", "content": chat_history},
+                {"role": "user", "content": question},
+            ]
+        )
+        if language != 'English':
+            response_content = translate_text(response.choices[0].message['content'], language)
+        else:
+            response_content = response.choices[0].message['content']
+        st.text("Answer: " + response_content)
+term = st.text_input("Enter a term you want to define:")
+if term:
+    # Define the term using OpenAI API
+    definition = explain_term(term)
+    if language != 'English':
+        definition = translate_text(definition, language)
+    st.text("Definition: " + definition)