habulaj commited on
Commit
1e03fe2
·
verified ·
1 Parent(s): b6db17e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +108 -231
app.py CHANGED
@@ -8,263 +8,167 @@ import time
8
  import logging
9
  import os
10
  import gc
11
- from typing import Dict, Any, Optional, List, Tuple
12
- import psutil
13
- from contextlib import contextmanager
14
-
15
- num_cores = psutil.cpu_count(logical=False)
16
- num_threads = min(4, num_cores)
17
 
 
18
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
19
- os.environ["OMP_NUM_THREADS"] = str(num_threads)
20
- os.environ["MKL_NUM_THREADS"] = str(num_threads)
21
- os.environ["OPENBLAS_NUM_THREADS"] = str(num_threads)
22
- os.environ["VECLIB_MAXIMUM_THREADS"] = str(num_threads)
23
- os.environ["NUMEXPR_NUM_THREADS"] = str(num_threads)
24
-
25
- torch.set_num_threads(num_threads)
26
  torch.set_num_interop_threads(1)
27
- torch.backends.mkl.enabled = True
28
- torch.backends.mkldnn.enabled = True
29
- torch.backends.quantized.engine = 'qnnpack'
30
- torch.cuda.empty_cache = lambda: None
31
 
32
  logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
33
- log = logging.getLogger("news-filter-optimized")
34
 
35
  device = "cpu"
36
  torch.set_default_device(device)
37
 
38
- @contextmanager
39
- def memory_efficient_context():
40
- try:
41
- gc.collect()
42
- yield
43
- finally:
44
- gc.collect()
45
-
46
- class OptimizedTokenizerWrapper:
47
- def __init__(self, tokenizer):
48
- self.tokenizer = tokenizer
49
- self._template_cache = {}
50
-
51
- def apply_chat_template(self, messages, **kwargs):
52
- content = messages[0]['content'] if messages else ""
53
- key = hash(content[:100])
54
-
55
- if key not in self._template_cache:
56
- result = self.tokenizer.apply_chat_template(messages, **kwargs)
57
- if len(self._template_cache) > 100:
58
- self._template_cache.clear()
59
- self._template_cache[key] = result
60
-
61
- return self._template_cache[key]
62
-
63
- def decode(self, *args, **kwargs):
64
- return self.tokenizer.decode(*args, **kwargs)
65
-
66
- def __getattr__(self, name):
67
- return getattr(self.tokenizer, name)
68
-
69
- print("🚀 Carregando modelo...")
70
- log.info("🚀 Carregando modelo...")
71
-
72
- model_config = {
73
- "device_map": device,
74
- "torch_dtype": torch.float16,
75
- "low_cpu_mem_usage": True,
76
- "use_cache": True,
77
- "trust_remote_code": True,
78
- "attn_implementation": "eager",
79
- }
80
 
81
  model = AutoPeftModelForCausalLM.from_pretrained(
82
- "habulaj/filterinstruct180",
83
- **model_config
 
 
 
 
 
84
  )
85
 
86
  tokenizer = AutoTokenizer.from_pretrained(
87
  "habulaj/filterinstruct180",
88
  use_fast=True,
89
- padding_side="left",
90
- model_max_length=1024,
91
- clean_up_tokenization_spaces=False,
92
  )
93
 
94
  if tokenizer.pad_token is None:
95
  tokenizer.pad_token = tokenizer.eos_token
96
 
97
- tokenizer = OptimizedTokenizerWrapper(tokenizer)
98
-
99
  model.eval()
100
- for param in model.parameters():
101
- param.requires_grad = False
102
-
103
- try:
104
- model = torch.compile(model, mode="reduce-overhead")
105
- log.info("✅ Modelo compilado")
106
- except Exception as e:
107
- log.warning(f"⚠️ Torch compile não disponível: {e}")
108
-
109
- if hasattr(model, 'fuse_linear_layers'):
110
- model.fuse_linear_layers()
111
-
112
- log.info("✅ Modelo carregado")
113
 
114
- tokenizer.tokenizer.chat_template = """{% for message in messages %}{% if message['role'] == 'user' %}{% if loop.first %}<|begin_of_text|><|start_header_id|>user<|end_header_id|>
 
 
 
115
 
116
- {{ message['content'] }}<|eot_id|>{% else %}<|start_header_id|>user<|end_header_id|>
 
 
117
 
118
- {{ message['content'] }}<|eot_id|>{% endif %}{% elif message['role'] == 'assistant' %}<|start_header_id|>assistant<|end_header_id|>
 
 
 
119
 
120
- {{ message['content'] }}<|eot_id|>{% endif %}{% endfor %}{% if add_generation_prompt %}<|start_header_id|>assistant<|end_header_id|>
 
 
 
 
121
 
122
- {% endif %}"""
123
 
124
  generation_config = GenerationConfig(
125
- max_new_tokens=150,
126
- temperature=0.8,
127
- do_sample=False,
 
128
  use_cache=True,
129
  eos_token_id=tokenizer.eos_token_id,
130
  pad_token_id=tokenizer.eos_token_id,
131
- repetition_penalty=1.1,
132
- length_penalty=1.0,
133
- num_beams=1,
134
- early_stopping=True,
135
  )
136
 
137
- def extract_json_optimized(text: str) -> str:
138
- if not hasattr(extract_json_optimized, 'pattern'):
139
- extract_json_optimized.pattern = re.compile(r'\{.*?\}', re.DOTALL)
140
-
141
- match = extract_json_optimized.pattern.search(text)
142
- return match.group(0) if match else text
143
 
144
- def preprocess_input_optimized(title: str, content: str) -> List[Dict[str, str]]:
145
- max_title_length = 100
146
- max_content_length = 500
147
-
148
- title = title[:max_title_length] if len(title) > max_title_length else title
149
- content = content[:max_content_length] if len(content) > max_content_length else content
150
-
151
- return [{
152
- "role": "user",
153
- "content": f"""Analyze the news title and content, and return the filters in JSON format with the defined fields.
154
 
155
  Please respond ONLY with the JSON filter, do NOT add any explanations, system messages, or extra text.
156
 
157
  Title: "{title}"
158
  Content: "{content}"
159
  """
160
- }]
161
-
162
- def analyze_news_optimized(title: str, content: str) -> str:
163
- try:
164
- with memory_efficient_context():
165
- start_time = time.time()
166
-
167
- messages = preprocess_input_optimized(title, content)
168
-
169
- inputs = tokenizer.apply_chat_template(
170
- messages,
171
- tokenize=True,
172
- add_generation_prompt=True,
173
- return_tensors="pt",
174
- padding=False,
175
- truncation=True,
176
- max_length=1024,
177
- )
178
-
179
- with torch.no_grad(), torch.inference_mode():
180
- with torch.autocast(device_type='cpu', dtype=torch.float16):
181
- outputs = model.generate(
182
- inputs,
183
- generation_config=generation_config,
184
- num_return_sequences=1,
185
- output_scores=False,
186
- output_hidden_states=False,
187
- output_attentions=False,
188
- return_dict_in_generate=False,
189
- use_cache=True,
190
- do_sample=False,
191
- )
192
-
193
- generated_tokens = outputs[0][inputs.shape[1]:]
194
- generated_text = tokenizer.decode(
195
- generated_tokens,
196
- skip_special_tokens=True,
197
- clean_up_tokenization_spaces=False
198
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
 
200
- json_result = extract_json_optimized(generated_text)
201
-
202
- duration = time.time() - start_time
203
- log.info(f"✅ Análise concluída em {duration:.2f}s")
204
-
205
- del outputs, inputs, generated_tokens
206
-
207
- try:
208
- parsed_json = json.loads(json_result)
209
- return json.dumps(parsed_json, indent=2, ensure_ascii=False)
210
- except json.JSONDecodeError:
211
- return json_result
212
-
213
  except Exception as e:
214
- log.exception("❌ Erro durante análise:")
215
  return f"Erro durante a análise: {str(e)}"
216
 
217
- def warmup_optimized():
218
  log.info("🔥 Executando warmup...")
219
  try:
220
- for i in range(3):
221
- result = analyze_news_optimized(f"Test title {i}", f"Test content {i}")
222
- log.info(f"Warmup {i+1}/3 concluído")
223
-
224
- gc.collect()
225
- log.info("✅ Warmup concluído")
226
  except Exception as e:
227
  log.warning(f"⚠️ Warmup falhou: {e}")
228
 
229
- def create_optimized_interface():
230
- with gr.Blocks(
231
- title="Analisador de Notícias - Ultra Otimizado",
232
- theme=gr.themes.Monochrome(),
233
- css="""
234
- .gradio-container {
235
- max-width: 1200px !important;
236
- }
237
- .performance-info {
238
- background: #f8f9fa;
239
- border-left: 4px solid #007bff;
240
- padding: 15px;
241
- margin: 10px 0;
242
- }
243
- """
244
- ) as demo:
245
 
246
- gr.Markdown("# 🚀 Analisador de Notícias - Ultra Otimizado")
247
 
248
  with gr.Row():
249
  with gr.Column(scale=1):
250
  title_input = gr.Textbox(
251
  label="Título da Notícia",
252
- placeholder="Ex: Legendary Musician Carlos Mendes Dies at 78",
253
- max_lines=3
254
  )
255
 
256
  content_input = gr.Textbox(
257
  label="Conteúdo da Notícia",
258
- placeholder="Ex: Carlos Mendes, the internationally acclaimed Brazilian guitarist...",
259
- max_lines=6
260
  )
261
 
262
- analyze_btn = gr.Button(" Analisar Notícia", variant="primary")
263
-
264
- with gr.Row():
265
- example_btn1 = gr.Button("📻 Exemplo 1", size="sm")
266
- example_btn2 = gr.Button("⚽ Exemplo 2", size="sm")
267
- example_btn3 = gr.Button("💼 Exemplo 3", size="sm")
268
 
269
  with gr.Column(scale=1):
270
  output = gr.Textbox(
@@ -276,62 +180,35 @@ def create_optimized_interface():
276
 
277
  status = gr.Textbox(
278
  label="Status",
279
- value=" Pronto para análise",
280
  interactive=False
281
  )
282
 
283
- def analyze_with_status(title: str, content: str) -> Tuple[str, str]:
284
  if not title.strip() or not content.strip():
285
- return "❌ Preencha todos os campos", "Erro: Campos obrigatórios não preenchidos"
286
 
287
  try:
288
- start_time = time.time()
289
- result = analyze_news_optimized(title, content)
290
- duration = time.time() - start_time
291
-
292
- return f"✅ Análise concluída em {duration:.2f}s", result
293
  except Exception as e:
294
  return f"❌ Erro: {str(e)}", f"Erro: {str(e)}"
295
 
296
- examples = [
297
- ("Legendary Musician Carlos Mendes Dies at 78", "Carlos Mendes, the internationally acclaimed Brazilian guitarist and composer known for blending traditional bossa nova with modern jazz, has died at the age of 78."),
298
- ("Brazil Defeats Argentina 2-1 in Copa America Final", "In a thrilling match at the Maracana Stadium, Brazil secured victory over Argentina with goals from Neymar and Vinicius Jr. The match was watched by over 200 million viewers worldwide."),
299
- ("Tech Giant Announces Major Layoffs Affecting 10,000 Employees", "The technology company announced significant workforce reductions citing economic uncertainty and changing market conditions. The layoffs will affect multiple departments across different regions.")
300
- ]
301
-
302
  analyze_btn.click(
303
- fn=analyze_with_status,
304
  inputs=[title_input, content_input],
305
  outputs=[status, output]
306
  )
307
-
308
- example_btn1.click(
309
- fn=lambda: examples[0],
310
- outputs=[title_input, content_input]
311
- )
312
-
313
- example_btn2.click(
314
- fn=lambda: examples[1],
315
- outputs=[title_input, content_input]
316
- )
317
-
318
- example_btn3.click(
319
- fn=lambda: examples[2],
320
- outputs=[title_input, content_input]
321
- )
322
 
323
  return demo
324
 
325
  if __name__ == "__main__":
326
- warmup_optimized()
327
-
328
- print("🚀 Iniciando interface...")
329
- demo = create_optimized_interface()
330
  demo.launch(
331
  share=False,
332
  server_name="0.0.0.0",
333
  server_port=7860,
334
- show_error=True,
335
- max_threads=num_threads,
336
- show_api=False,
337
  )
 
8
  import logging
9
  import os
10
  import gc
 
 
 
 
 
 
11
 
12
+ # Configurações de otimização
13
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
14
+ os.environ["OMP_NUM_THREADS"] = "2"
15
+ os.environ["MKL_NUM_THREADS"] = "2"
16
+ torch.set_num_threads(2)
 
 
 
 
17
  torch.set_num_interop_threads(1)
 
 
 
 
18
 
19
  logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
20
+ log = logging.getLogger("news-filter-gradio")
21
 
22
  device = "cpu"
23
  torch.set_default_device(device)
24
 
25
+ # Carrega modelo e tokenizer
26
+ print("🚀 Carregando modelo e tokenizer...")
27
+ log.info("🚀 Carregando modelo e tokenizer...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  model = AutoPeftModelForCausalLM.from_pretrained(
30
+ "habulaj/filterinstruct180",
31
+ device_map=device,
32
+ torch_dtype=torch.bfloat16,
33
+ load_in_4bit=False,
34
+ low_cpu_mem_usage=True,
35
+ use_cache=True,
36
+ trust_remote_code=True
37
  )
38
 
39
  tokenizer = AutoTokenizer.from_pretrained(
40
  "habulaj/filterinstruct180",
41
  use_fast=True,
42
+ padding_side="left"
 
 
43
  )
44
 
45
  if tokenizer.pad_token is None:
46
  tokenizer.pad_token = tokenizer.eos_token
47
 
 
 
48
  model.eval()
49
+ log.info("✅ Modelo carregado (eval mode).")
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
+ tokenizer.chat_template = """{% for message in messages %}
52
+ {%- if message['role'] == 'user' %}
53
+ {%- if loop.first %}
54
+ <|begin_of_text|><|start_header_id|>user<|end_header_id|>
55
 
56
+ {{ message['content'] }}<|eot_id|>
57
+ {%- else %}
58
+ <|start_header_id|>user<|end_header_id|>
59
 
60
+ {{ message['content'] }}<|eot_id|>
61
+ {%- endif %}
62
+ {%- elif message['role'] == 'assistant' %}
63
+ <|start_header_id|>assistant<|end_header_id|>
64
 
65
+ {{ message['content'] }}<|eot_id|>
66
+ {%- endif %}
67
+ {%- endfor %}
68
+ {%- if add_generation_prompt %}
69
+ <|start_header_id|>assistant<|end_header_id|>
70
 
71
+ {%- endif %}"""
72
 
73
  generation_config = GenerationConfig(
74
+ max_new_tokens=200,
75
+ temperature=1.0,
76
+ min_p=0.1,
77
+ do_sample=True,
78
  use_cache=True,
79
  eos_token_id=tokenizer.eos_token_id,
80
  pad_token_id=tokenizer.eos_token_id,
 
 
 
 
81
  )
82
 
83
+ def extract_json(text):
84
+ match = re.search(r'\{.*\}', text, flags=re.DOTALL)
85
+ if match:
86
+ return match.group(0)
87
+ return text
 
88
 
89
+ def analyze_news(title, content):
90
+ try:
91
+ log.info(f"🧠 Inferência iniciada para: {title}")
92
+ start_time = time.time()
93
+
94
+ messages = [
95
+ {
96
+ "role": "user",
97
+ "content": f"""Analyze the news title and content, and return the filters in JSON format with the defined fields.
 
98
 
99
  Please respond ONLY with the JSON filter, do NOT add any explanations, system messages, or extra text.
100
 
101
  Title: "{title}"
102
  Content: "{content}"
103
  """
104
+ }
105
+ ]
106
+
107
+ inputs = tokenizer.apply_chat_template(
108
+ messages,
109
+ tokenize=True,
110
+ add_generation_prompt=True,
111
+ return_tensors="pt",
112
+ )
113
+
114
+ with torch.no_grad(), torch.inference_mode():
115
+ outputs = model.generate(
116
+ input_ids=inputs,
117
+ generation_config=generation_config,
118
+ num_return_sequences=1,
119
+ output_scores=False,
120
+ return_dict_in_generate=False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  )
122
+
123
+ prompt_text = tokenizer.decode(inputs[0], skip_special_tokens=False)
124
+ decoded_text = tokenizer.decode(outputs[0], skip_special_tokens=False)
125
+ generated_only = decoded_text[len(prompt_text):].strip()
126
+ json_result = extract_json(generated_only)
127
+
128
+ duration = time.time() - start_time
129
+ log.info(f"✅ JSON extraído em {duration:.2f}s")
130
+
131
+ del outputs, inputs
132
+ gc.collect()
133
+
134
+ try:
135
+ parsed_json = json.loads(json_result)
136
+ return json.dumps(parsed_json, indent=2, ensure_ascii=False)
137
+ except json.JSONDecodeError:
138
+ return json_result
139
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  except Exception as e:
141
+ log.exception("❌ Erro inesperado:")
142
  return f"Erro durante a análise: {str(e)}"
143
 
144
+ def warmup_model():
145
  log.info("🔥 Executando warmup...")
146
  try:
147
+ analyze_news("Test title", "Test content")
148
+ log.info(" Warmup concluído.")
 
 
 
 
149
  except Exception as e:
150
  log.warning(f"⚠️ Warmup falhou: {e}")
151
 
152
+ def create_interface():
153
+ with gr.Blocks(title="Analisador de Notícias", theme=gr.themes.Soft()) as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
 
155
+ gr.Markdown("# 📰 Analisador de Notícias")
156
 
157
  with gr.Row():
158
  with gr.Column(scale=1):
159
  title_input = gr.Textbox(
160
  label="Título da Notícia",
161
+ placeholder="Digite o título da notícia...",
162
+ lines=2
163
  )
164
 
165
  content_input = gr.Textbox(
166
  label="Conteúdo da Notícia",
167
+ placeholder="Digite o conteúdo da notícia...",
168
+ lines=6
169
  )
170
 
171
+ analyze_btn = gr.Button("🔍 Analisar Notícia", variant="primary")
 
 
 
 
 
172
 
173
  with gr.Column(scale=1):
174
  output = gr.Textbox(
 
180
 
181
  status = gr.Textbox(
182
  label="Status",
183
+ value="Aguardando entrada...",
184
  interactive=False
185
  )
186
 
187
+ def update_status_and_analyze(title, content):
188
  if not title.strip() or not content.strip():
189
+ return "❌ Preencha título e conteúdo.", "Erro: Campos obrigatórios."
190
 
191
  try:
192
+ result = analyze_news(title, content)
193
+ return f"✅ Análise concluída!", result
 
 
 
194
  except Exception as e:
195
  return f"❌ Erro: {str(e)}", f"Erro: {str(e)}"
196
 
 
 
 
 
 
 
197
  analyze_btn.click(
198
+ fn=update_status_and_analyze,
199
  inputs=[title_input, content_input],
200
  outputs=[status, output]
201
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
 
203
  return demo
204
 
205
  if __name__ == "__main__":
206
+ warmup_model()
207
+ print("🚀 Iniciando interface Gradio...")
208
+ demo = create_interface()
 
209
  demo.launch(
210
  share=False,
211
  server_name="0.0.0.0",
212
  server_port=7860,
213
+ show_error=True
 
 
214
  )