File size: 2,787 Bytes
4e8d173
720de2c
 
 
 
4e8d173
 
720de2c
 
 
 
 
4e8d173
 
720de2c
4e8d173
720de2c
4e8d173
 
 
 
720de2c
4e8d173
 
720de2c
4e8d173
720de2c
 
 
 
 
4e8d173
 
720de2c
4e8d173
720de2c
4e8d173
720de2c
4e8d173
720de2c
 
 
 
 
 
 
4e8d173
720de2c
4e8d173
720de2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4e8d173
 
720de2c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79

import torch
import pandas as pd
import re
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Set device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Load model
model_name = "rajistics/informal_formal_style_transfer"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)

# Load dataset
df = pd.read_csv("hf://datasets/thesherrycode/gen-z-slangs-translation/gen_z_slangs_translation.csv")
df = df[["Gen-Z Slang", "Plain English"]].dropna().drop_duplicates()
df.columns = ["slang", "formal"]

# Random samples from dataset
gradio_examples = [["[Gen-Z Example] " + row["slang"]] for _, row in df.sample(3, random_state=1).iterrows()]

# Manual examples
examples = [
    ["hey, can u send me the stuff by tonight?"],
    ["yo sorry i missed the call, was busy"],
    ["lemme know if ur free tmrw to chat abt the thing"],
    ["bro the file’s messed up, fix it asap pls"],
    ["i'm out rn, text u later"]
]

# Clean output
def clean_output(output: str):
    return re.sub(r"(?i)make this sentence more formal", "", output).strip()

# Model inference
def make_formal(text):
    if not text.strip():
        return "⚠️ Please enter some text."
    prompt = "[Casual] " + text.strip() + " [Formal]"
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to(device)
    outputs = model.generate(**inputs, max_new_tokens=100)
    result = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return clean_output(result)

# Gradio app
iface = gr.Interface(
    fn=make_formal,
    inputs=gr.Textbox(
        lines=3,
        label="πŸ—£οΈ Your Slang / Casual Text",
        placeholder="e.g., yo can u help me out real quick?"
    ),
    outputs=gr.Textbox(
        label="πŸ“„ Formal & Polished Version",
        lines=4,
        interactive=True
    ),
    title="πŸ’¬ Text Polisher: From Slang to Formal",
    description=(
        "Transform casual, Gen-Z slang, or unpolished English into clear, professional language. 🧠✨\n\n"
        "This demo uses a text generation model to rewrite input sentences with improved formality β€” great for school, work, or writing more professionally.\n\n"
        "✍️ The output is editable β€” feel free to tweak before using/copying!"
    ),
    article=(
        "**Project by Jonathan Friedman**  \n"
        "πŸ“Œ Task: Text Generation (Formality Transfer using Sequence-to-Sequence)\n"
        "🧠 Model: rajistics/informal_formal_style_transfer\n"
        "πŸ“š Dataset: thesherrycode/gen-z-slangs-translation\n"
        "πŸ› οΈ Tech Stack: Hugging Face Transformers, Gradio"
    ),
    examples=examples + gradio_examples,
    theme="soft"
)

iface.launch()