Spaces:

Ahmed235
/

final

Sleeping

App Files Files Community

final / app.py

Ahmed235

Update app.py

a0a0944 verified over 1 year ago

raw

history blame

2.34 kB

	from pptx import Presentation
	import re
	import gradio as gr
	from transformers import AutoModelForSequenceClassification, AutoTokenizer
	import torch
	import torch
	torch.device("cpu")
	import torch.nn.functional as F
	from transformers import pipeline

	# Load the pre-trained model and tokenizer
	tokenizer = AutoTokenizer.from_pretrained("Ahmed235/roberta_classification")
	model = AutoModelForSequenceClassification.from_pretrained("Ahmed235/roberta_classification")

	# Create a summarization pipeline
	summarizer = pipeline("summarization", model="Falconsai/text_summarization")

	def extract_text_from_pptx(file_path):
	presentation = Presentation(file_path)
	text = []
	for slide_number, slide in enumerate(presentation.slides, start=1):
	for shape in slide.shapes:
	if hasattr(shape, "text"):
	text.append(shape.text)
	return "\n".join(text)

	def predict_pptx_content(file_path):
	extracted_text = extract_text_from_pptx(file_path)
	cleaned_text = re.sub(r'\s+', ' ', extracted_text)

	# Tokenize and encode the cleaned text
	input_encoding = tokenizer(cleaned_text, truncation=True, padding=True, return_tensors="pt")

	# Perform inference
	with torch.no_grad():
	outputs = model(**input_encoding)
	logits = outputs.logits

	probabilities = F.softmax(logits, dim=1)

	predicted_label_id = torch.argmax(logits, dim=1).item()
	predicted_label = model.config.id2label[predicted_label_id]
	predicted_probability = probabilities[0][predicted_label_id].item()

	# Summarize the cleaned text
	summary = summarizer(cleaned_text, max_length=80, min_length=30, do_sample=False)[0]['summary_text']

	prediction = {
	"Predicted Label": predicted_label,
	"Evaluation": f"Evaluate the topic according to {predicted_label} is: {predicted_probability}",
	"Summary": summary
	}

	return prediction

	# Define the Gradio interface
	iface = gr.Interface(
	fn=predict_pptx_content,
	inputs=gr.File(type="filepath", label="Upload PowerPoint (.pptx) file"),
	outputs=["text", "text", "text"], # Predicted Label, Evaluation, Summary
	live=False, # Change to True for one-time analysis
	title="<h1 style='color: lightgreen; text-align: center;'>PPTX Analyzer</h1>",
	)

	# Deploy the Gradio interface
	iface.launch(share=True)