4b-demo / app.py
sumuks's picture
sumuks HF Staff
Update app.py
0440349 verified
raw
history blame
2.34 kB
import gradio as gr
import spaces
import torch
from threading import Thread
from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM, TextIteratorStreamer
model_id = "textcleanlm/textclean-4B"
model = None
tokenizer = None
def load_model():
global model, tokenizer
if model is None:
tokenizer = AutoTokenizer.from_pretrained(model_id)
# Add padding token if needed
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
# Try different model classes
for model_class in [AutoModelForSeq2SeqLM, AutoModelForCausalLM, AutoModel]:
try:
model = model_class.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
device_map="auto"
)
break
except:
continue
if model is None:
raise ValueError(f"Could not load model {model_id}")
return model, tokenizer
@spaces.GPU(duration=60)
def clean_text(text):
model, tokenizer = load_model()
inputs = tokenizer(text, return_tensors="pt", max_length=4096, truncation=True)
inputs = {k: v.cuda() for k, v in inputs.items()}
# Enable streaming
streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
generation_kwargs = dict(
**inputs,
max_length=4096,
num_beams=1, # Set to 1 for streaming
do_sample=True,
temperature=1.0,
streamer=streamer,
)
# Run generation in a separate thread
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
# Yield text as it's generated
generated_text = ""
for new_text in streamer:
generated_text += new_text
yield generated_text
thread.join()
iface = gr.Interface(
fn=clean_text,
inputs=gr.Textbox(
lines=5,
placeholder="Enter text to clean...",
label="Input Text"
),
outputs=gr.Textbox(
lines=5,
label="Cleaned Text"
),
title="TextClean-4B Demo",
description="Simple demo for text cleaning using textcleanlm/textclean-4B model"
)
if __name__ == "__main__":
iface.launch()