File size: 2,846 Bytes
4e8d173
720de2c
 
 
 
4e8d173
 
720de2c
 
 
 
 
4e8d173
 
720de2c
4e8d173
720de2c
4e8d173
 
 
 
720de2c
4e8d173
 
720de2c
4e8d173
720de2c
 
 
 
 
4e8d173
 
720de2c
4e8d173
720de2c
4e8d173
720de2c
4e8d173
720de2c
 
 
 
 
 
 
4e8d173
720de2c
4e8d173
720de2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c82ead8
 
 
1cc27a5
720de2c
 
 
 
4e8d173
 
720de2c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79

import torch
import pandas as pd
import re
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Set device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Load model
model_name = "rajistics/informal_formal_style_transfer"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)

# Load dataset
df = pd.read_csv("hf://datasets/thesherrycode/gen-z-slangs-translation/gen_z_slangs_translation.csv")
df = df[["Gen-Z Slang", "Plain English"]].dropna().drop_duplicates()
df.columns = ["slang", "formal"]

# Random samples from dataset
gradio_examples = [["[Gen-Z Example] " + row["slang"]] for _, row in df.sample(3, random_state=1).iterrows()]

# Manual examples
examples = [
    ["hey, can u send me the stuff by tonight?"],
    ["yo sorry i missed the call, was busy"],
    ["lemme know if ur free tmrw to chat abt the thing"],
    ["bro the file’s messed up, fix it asap pls"],
    ["i'm out rn, text u later"]
]

# Clean output
def clean_output(output: str):
    return re.sub(r"(?i)make this sentence more formal", "", output).strip()

# Model inference
def make_formal(text):
    if not text.strip():
        return "⚠️ Please enter some text."
    prompt = "[Casual] " + text.strip() + " [Formal]"
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to(device)
    outputs = model.generate(**inputs, max_new_tokens=100)
    result = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return clean_output(result)

# Gradio app
iface = gr.Interface(
    fn=make_formal,
    inputs=gr.Textbox(
        lines=3,
        label="πŸ—£οΈ Your Slang / Casual Text",
        placeholder="e.g., yo can u help me out real quick?"
    ),
    outputs=gr.Textbox(
        label="πŸ“„ Formal & Polished Version",
        lines=4,
        interactive=True
    ),
    title="πŸ’¬ Text Polisher: From Slang to Formal",
    description=(
        "Transform casual, Gen-Z slang, or unpolished English into clear, professional language. 🧠✨\n\n"
        "This demo uses a text generation model to rewrite input sentences with improved formality β€” great for school, work, or writing more professionally.\n\n"
        "✍️ The output is editable β€” feel free to tweak before using/copying!"
    ),
    article=(
        "**Project by Jonathan Friedman**  \n\n"
        "πŸ“Œ Task: Text Generation (Formality Transfer using Sequence-to-Sequence)\n\n"
        "🧠 Model: rajistics/informal_formal_style_transfer\n\n"
        "πŸ“š Dataset: thesherrycode/gen-z-slangs-translation (used to generate real-world Gen-Z slang examples)\n\n"
        "πŸ› οΈ Tech Stack: Hugging Face Transformers, Gradio"
    ),
    examples=examples + gradio_examples,
    theme="soft"
)

iface.launch()