final / app.py
Ahmed235's picture
Update app.py
a0a0944 verified
raw
history blame
2.34 kB
from pptx import Presentation
import re
import gradio as gr
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
import torch
torch.device("cpu")
import torch.nn.functional as F
from transformers import pipeline
# Load the pre-trained model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("Ahmed235/roberta_classification")
model = AutoModelForSequenceClassification.from_pretrained("Ahmed235/roberta_classification")
# Create a summarization pipeline
summarizer = pipeline("summarization", model="Falconsai/text_summarization")
def extract_text_from_pptx(file_path):
presentation = Presentation(file_path)
text = []
for slide_number, slide in enumerate(presentation.slides, start=1):
for shape in slide.shapes:
if hasattr(shape, "text"):
text.append(shape.text)
return "\n".join(text)
def predict_pptx_content(file_path):
extracted_text = extract_text_from_pptx(file_path)
cleaned_text = re.sub(r'\s+', ' ', extracted_text)
# Tokenize and encode the cleaned text
input_encoding = tokenizer(cleaned_text, truncation=True, padding=True, return_tensors="pt")
# Perform inference
with torch.no_grad():
outputs = model(**input_encoding)
logits = outputs.logits
probabilities = F.softmax(logits, dim=1)
predicted_label_id = torch.argmax(logits, dim=1).item()
predicted_label = model.config.id2label[predicted_label_id]
predicted_probability = probabilities[0][predicted_label_id].item()
# Summarize the cleaned text
summary = summarizer(cleaned_text, max_length=80, min_length=30, do_sample=False)[0]['summary_text']
prediction = {
"Predicted Label": predicted_label,
"Evaluation": f"Evaluate the topic according to {predicted_label} is: {predicted_probability}",
"Summary": summary
}
return prediction
# Define the Gradio interface
iface = gr.Interface(
fn=predict_pptx_content,
inputs=gr.File(type="filepath", label="Upload PowerPoint (.pptx) file"),
outputs=["text", "text", "text"], # Predicted Label, Evaluation, Summary
live=False, # Change to True for one-time analysis
title="<h1 style='color: lightgreen; text-align: center;'>PPTX Analyzer</h1>",
)
# Deploy the Gradio interface
iface.launch(share=True)