sumuks HF Staff commited on
Commit
0804cf7
·
verified ·
1 Parent(s): 35ad08d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -0
app.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import spaces
3
+ import torch
4
+ from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM
5
+
6
+ model_id = "textcleanlm/textclean-4B"
7
+ model = None
8
+ tokenizer = None
9
+
10
+ def load_model():
11
+ global model, tokenizer
12
+ if model is None:
13
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
14
+
15
+ # Add padding token if needed
16
+ if tokenizer.pad_token is None:
17
+ tokenizer.pad_token = tokenizer.eos_token
18
+
19
+ # Try different model classes
20
+ for model_class in [AutoModelForSeq2SeqLM, AutoModelForCausalLM, AutoModel]:
21
+ try:
22
+ model = model_class.from_pretrained(
23
+ model_id,
24
+ torch_dtype=torch.bfloat16,
25
+ device_map="auto"
26
+ )
27
+ break
28
+ except:
29
+ continue
30
+
31
+ if model is None:
32
+ raise ValueError(f"Could not load model {model_id}")
33
+
34
+ return model, tokenizer
35
+
36
+ @spaces.GPU(duration=60)
37
+ def clean_text(text):
38
+ model, tokenizer = load_model()
39
+
40
+ inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
41
+ inputs = {k: v.cuda() for k, v in inputs.items()}
42
+
43
+ with torch.no_grad():
44
+ outputs = model.generate(
45
+ **inputs,
46
+ max_length=512,
47
+ num_beams=4,
48
+ early_stopping=True
49
+ )
50
+
51
+ cleaned_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
52
+ return cleaned_text
53
+
54
+ iface = gr.Interface(
55
+ fn=clean_text,
56
+ inputs=gr.Textbox(
57
+ lines=5,
58
+ placeholder="Enter text to clean...",
59
+ label="Input Text"
60
+ ),
61
+ outputs=gr.Textbox(
62
+ lines=5,
63
+ label="Cleaned Text"
64
+ ),
65
+ title="TextClean-4B Demo",
66
+ description="Simple demo for text cleaning using textcleanlm/textclean-4B model"
67
+ )
68
+
69
+ if __name__ == "__main__":
70
+ iface.launch()