ghostai1's picture
Update app.py
938ef03 verified
raw
history blame
2.07 kB
# 🔄 Text Paraphraser | CPU-only HF Space
import gradio as gr
from transformers import (
AutoTokenizer,
AutoModelForSeq2SeqLM,
pipeline,
)
# 1️⃣ Load model + slow tokenizer explicitly
MODEL_ID = "Vamsi/T5_Paraphrase_Paws"
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=False)
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_ID)
# 2️⃣ Create paraphrase pipeline with our slow tokenizer
paraphraser = pipeline(
"text2text-generation",
model=model,
tokenizer=tokenizer,
device=-1, # CPU
)
def paraphrase(text: str, num_variations: int):
if not text.strip():
return []
prompt = "paraphrase: " + text.strip()
outputs = paraphraser(
prompt,
max_length=128,
num_return_sequences=num_variations,
do_sample=True,
top_k=120,
top_p=0.95
)
return [out["generated_text"].strip() for out in outputs]
with gr.Blocks(title="🔄 Text Paraphraser") as demo:
gr.Markdown(
"# 🔄 Text Paraphraser\n"
"Enter a sentence and get multiple alternative rewrites—all on CPU."
)
with gr.Row():
input_text = gr.Textbox(
label="Input Sentence",
placeholder="Type something to paraphrase…",
lines=3
)
variations = gr.Slider(
1, 5, value=3, step=1,
label="Number of Variations"
)
run_btn = gr.Button("Paraphrase 🔁", variant="primary")
output_df = gr.Dataframe(
label="Paraphrases",
headers=[f"Variant #{i}" for i in range(1, 6)],
datatype=["str"]*5,
interactive=False,
row_count=1
)
def format_for_dataframe(results):
# Pad out to 5 columns
variants = results + [""]*(5 - len(results))
return [variants]
run_btn.click(
fn=lambda text, n: format_for_dataframe(paraphrase(text, n)),
inputs=[input_text, variations],
outputs=output_df
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0")