import torch import pandas as pd import re import gradio as gr from transformers import AutoTokenizer, AutoModelForSeq2SeqLM # Set device device = "cuda" if torch.cuda.is_available() else "cpu" print(f"Using device: {device}") # Load model model_name = "rajistics/informal_formal_style_transfer" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device) # Load dataset df = pd.read_csv("hf://datasets/thesherrycode/gen-z-slangs-translation/gen_z_slangs_translation.csv") df = df[["Gen-Z Slang", "Plain English"]].dropna().drop_duplicates() df.columns = ["slang", "formal"] # Random samples from dataset gradio_examples = [["[Gen-Z Example] " + row["slang"]] for _, row in df.sample(3, random_state=1).iterrows()] # Manual examples examples = [ ["hey, can u send me the stuff by tonight?"], ["yo sorry i missed the call, was busy"], ["lemme know if ur free tmrw to chat abt the thing"], ["bro the file’s messed up, fix it asap pls"], ["i'm out rn, text u later"] ] # Clean output def clean_output(output: str): return re.sub(r"(?i)make this sentence more formal", "", output).strip() # Model inference def make_formal(text): if not text.strip(): return "⚠️ Please enter some text." prompt = "[Casual] " + text.strip() + " [Formal]" inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to(device) outputs = model.generate(**inputs, max_new_tokens=100) result = tokenizer.decode(outputs[0], skip_special_tokens=True) return clean_output(result) # Gradio app iface = gr.Interface( fn=make_formal, inputs=gr.Textbox( lines=3, label="πŸ—£οΈ Your Slang / Casual Text", placeholder="e.g., yo can u help me out real quick?" ), outputs=gr.Textbox( label="πŸ“„ Formal & Polished Version", lines=4, interactive=True ), title="πŸ’¬ Text Polisher: From Slang to Formal", description=( "Transform casual, Gen-Z slang, or unpolished English into clear, professional language. 🧠✨\n\n" "This demo uses a text generation model to rewrite input sentences with improved formality β€” great for school, work, or writing more professionally.\n\n" "✍️ The output is editable β€” feel free to tweak before using/copying!" ), article=( "**Project by Jonathan Friedman** \n\n" "πŸ“Œ Task: Text Generation (Formality Transfer using Sequence-to-Sequence)\n\n" "🧠 Model: rajistics/informal_formal_style_transfer\n\n" "πŸ“š Dataset: thesherrycode/gen-z-slangs-translation (used to generate real-world Gen-Z slang examples)\n\n" "πŸ› οΈ Tech Stack: Hugging Face Transformers, Gradio" ), examples=examples + gradio_examples, theme="soft" ) iface.launch()