Ilvir commited on
Commit
f4c31e1
·
1 Parent(s): 09dba1b

Upload 3 files

Browse files
Files changed (3) hide show
  1. pages/answers.py +35 -0
  2. pages/gpt.py +35 -0
  3. pages/imdb.py +19 -0
pages/answers.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
3
+
4
+ model_name = "timpal0l/mdeberta-v3-base-squad2"
5
+ model = AutoModelForQuestionAnswering.from_pretrained(model_name)
6
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
7
+
8
+ def get_answer(context, question):
9
+ nlp = pipeline('question-answering', model=model, tokenizer=tokenizer)
10
+ QA_input = {'question': question, 'context': context}
11
+ res = nlp(QA_input)
12
+ answer = res['answer']
13
+ return answer
14
+
15
+ def main():
16
+ st.title("Question Answering App")
17
+ st.markdown("Enter the context and question, then click on 'Get Answer' to retrieve the answer.")
18
+
19
+
20
+ context = st.text_area("Context", "Enter the context here...")
21
+ question = st.text_input("Question", "Enter the question here...")
22
+
23
+
24
+ if st.button("Get Answer"):
25
+
26
+ if context.strip() == "" or question.strip() == "":
27
+ st.warning("Please enter the context and question.")
28
+ else:
29
+
30
+ answer = get_answer(context, question)
31
+ st.success(f"Answer: {answer}")
32
+
33
+
34
+ if __name__ == "__main__":
35
+ main()
pages/gpt.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import GPT2LMHeadModel, GPT2Tokenizer
2
+ import torch
3
+
4
+ model = GPT2LMHeadModel.from_pretrained(
5
+ 'sberbank-ai/rugpt3small_based_on_gpt2',
6
+ output_attentions = False,
7
+ output_hidden_states = False,
8
+ )
9
+ # Вешаем сохраненные веса на нашу модель
10
+ model.load_state_dict(torch.load('models/model.pt'), map_location=torch.device('cpu'))
11
+
12
+
13
+ def generate_text(model, tokenizer, prompt, length, num_samples, temperature):
14
+ input_ids = tokenizer.encode(prompt, return_tensors='pt')
15
+ output_sequences = model.generate(
16
+ input_ids=input_ids,
17
+ max_length=length,
18
+ num_return_sequences=num_samples,
19
+ temperature=temperature
20
+ )
21
+
22
+ generated_texts = []
23
+ for output_sequence in output_sequences:
24
+ generated_text = tokenizer.decode(output_sequence, clean_up_tokenization_spaces=True)
25
+ generated_texts.append(generated_text)
26
+
27
+ return generated_texts
28
+
29
+
30
+ if st.button('Сгенерировать текст'):
31
+ generated_texts = generate_text(model, tokenizer, prompt, length, num_samples, temperature)
32
+ for i, text in enumerate(generated_texts):
33
+ st.write(f'Текст {i+1}:')
34
+ st.write(text)
35
+
pages/imdb.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ import pandas as pd
4
+
5
+ import matplotlib.pyplot as plt
6
+
7
+ import re
8
+ import string
9
+ from collections import Counter
10
+ from nltk.corpus import stopwords
11
+ stop_words = set(stopwords.words('english'))
12
+
13
+ from sklearn.model_selection import train_test_split
14
+
15
+ import torch
16
+ from torch.utils.data import DataLoader, TensorDataset
17
+ import torch.nn as nn
18
+ import torchutils as tu
19
+ from torchmetrics.classification import BinaryAccuracy