oberbics commited on
Commit
0110182
Β·
verified Β·
1 Parent(s): c66b983

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -36
app.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  import gradio as gr
3
  import json
4
  import os
@@ -28,8 +27,8 @@ MAP_TILES = {
28
  }
29
  }
30
 
31
- # Model configuration
32
- MODEL_NAME = "numind/NuExtract-1.5-tiny"
33
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
34
  TORCH_DTYPE = torch.bfloat16 if DEVICE == "cuda" else torch.float32
35
  MAX_INPUT_LENGTH = 20000 # For sliding window processing
@@ -77,48 +76,47 @@ class SafeGeocoder:
77
  return None
78
 
79
 
80
- # Fixed model loading function
81
  def load_model():
82
  global tokenizer, model
83
  try:
84
  if model is None:
85
- # First, try to get configuration to check model type
86
- config = AutoConfig.from_pretrained(MODEL_NAME, trust_remote_code=True)
87
- print(f"Loading model config: {config.__class__.__name__}")
88
-
89
- # Load tokenizer with appropriate options
90
  tokenizer = AutoTokenizer.from_pretrained(
91
- MODEL_NAME,
92
- trust_remote_code=True,
93
- use_fast=False # Try with use_fast=False if the regular tokenizer fails
94
  )
95
 
96
- print(f"Successfully loaded tokenizer: {tokenizer.__class__.__name__}")
97
-
98
- # Load the model
99
  model = AutoModelForCausalLM.from_pretrained(
100
  MODEL_NAME,
101
  torch_dtype=TORCH_DTYPE,
102
- device_map="auto",
103
  trust_remote_code=True
104
- ).eval()
105
 
106
  print(f"βœ… Loaded {MODEL_NAME} on {DEVICE}")
107
 
108
  # Test the model
109
  test_text = "Test in Berlin."
110
  test_template = '{"test_location": ""}'
111
- prompt = f"<|input|>\n### Template:\n{test_template}\n### Text:\n{test_text}\n\n<|output|>"
 
112
 
113
- inputs = tokenizer(prompt, return_tensors="pt", max_length=20000, truncation=True).to(DEVICE)
114
- outputs = model.generate(
115
- **inputs,
116
- max_new_tokens=50,
117
- temperature=0.0,
118
- do_sample=False
119
- )
 
 
 
 
 
120
  result = tokenizer.decode(outputs[0], skip_special_tokens=True)
121
 
 
122
  if "<|output|>" in result and "Berlin" in result:
123
  return "βœ… Modell erfolgreich geladen und getestet!"
124
 
@@ -137,25 +135,40 @@ def extract_info(template, text):
137
  return "❌ Modell nicht geladen", "Bitte zuerst das Modell laden"
138
 
139
  try:
140
- prompt = f"<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>"
 
 
 
 
 
 
141
  inputs = tokenizer(
142
- prompt,
143
  return_tensors="pt",
144
  truncation=True,
145
- max_length=20000
 
146
  ).to(DEVICE)
147
 
148
- outputs = model.generate(
149
- **inputs,
150
- max_new_tokens=1000,
151
- temperature=0.0,
152
- do_sample=False,
153
- pad_token_id=tokenizer.eos_token_id
154
- )
 
155
 
 
156
  result_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
157
- json_text = result_text.split("<|output|>")[1].strip() if "<|output|>" in result_text else result_text
158
 
 
 
 
 
 
 
 
159
  try:
160
  extracted = json.loads(json_text)
161
  return "βœ… Erfolgreich extrahiert", json.dumps(extracted, indent=2)
@@ -163,6 +176,9 @@ def extract_info(template, text):
163
  return "❌ JSON Parsing Fehler", json_text
164
 
165
  except Exception as e:
 
 
 
166
  return f"❌ Fehler: {str(e)}", "{}"
167
 
168
  def create_map(df, location_col):
@@ -369,6 +385,10 @@ h2 {
369
  border-top: 1px solid #eaeaea;
370
  }
371
  </style>
 
 
 
 
372
  """
373
  with gr.Blocks(css=custom_css, title="Daten Strukturieren und Analysieren") as demo:
374
  gr.HTML("""
 
 
1
  import gradio as gr
2
  import json
3
  import os
 
27
  }
28
  }
29
 
30
+ # Model configuration - corrected model name
31
+ MODEL_NAME = "numind/NuExtract-tiny-v1.5" # Fixed model name according to documentation
32
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
33
  TORCH_DTYPE = torch.bfloat16 if DEVICE == "cuda" else torch.float32
34
  MAX_INPUT_LENGTH = 20000 # For sliding window processing
 
76
  return None
77
 
78
 
79
+ # Corrected model loading function based on official usage example
80
  def load_model():
81
  global tokenizer, model
82
  try:
83
  if model is None:
84
+ # Load tokenizer exactly as shown in the usage example
 
 
 
 
85
  tokenizer = AutoTokenizer.from_pretrained(
86
+ MODEL_NAME,
87
+ trust_remote_code=True
 
88
  )
89
 
90
+ # Load model exactly as shown in the usage example
 
 
91
  model = AutoModelForCausalLM.from_pretrained(
92
  MODEL_NAME,
93
  torch_dtype=TORCH_DTYPE,
 
94
  trust_remote_code=True
95
+ ).to(DEVICE).eval()
96
 
97
  print(f"βœ… Loaded {MODEL_NAME} on {DEVICE}")
98
 
99
  # Test the model
100
  test_text = "Test in Berlin."
101
  test_template = '{"test_location": ""}'
102
+ test_template_formatted = json.dumps(json.loads(test_template), indent=4)
103
+ prompt = f"<|input|>\n### Template:\n{test_template_formatted}\n### Text:\n{test_text}\n\n<|output|>"
104
 
105
+ # Create inputs with proper padding and truncation
106
+ inputs = tokenizer([prompt], return_tensors="pt", truncation=True, max_length=MAX_INPUT_LENGTH).to(DEVICE)
107
+
108
+ # Generate output
109
+ with torch.no_grad():
110
+ outputs = model.generate(
111
+ **inputs,
112
+ max_new_tokens=50,
113
+ temperature=0.0,
114
+ do_sample=False
115
+ )
116
+
117
  result = tokenizer.decode(outputs[0], skip_special_tokens=True)
118
 
119
+ # Check if output contains expected text
120
  if "<|output|>" in result and "Berlin" in result:
121
  return "βœ… Modell erfolgreich geladen und getestet!"
122
 
 
135
  return "❌ Modell nicht geladen", "Bitte zuerst das Modell laden"
136
 
137
  try:
138
+ # Format the template as proper JSON with indentation as per usage example
139
+ template_formatted = json.dumps(json.loads(template), indent=4)
140
+
141
+ # Create prompt exactly as shown in the usage example
142
+ prompt = f"<|input|>\n### Template:\n{template_formatted}\n### Text:\n{text}\n\n<|output|>"
143
+
144
+ # Tokenize with proper settings
145
  inputs = tokenizer(
146
+ [prompt],
147
  return_tensors="pt",
148
  truncation=True,
149
+ padding=True,
150
+ max_length=MAX_INPUT_LENGTH
151
  ).to(DEVICE)
152
 
153
+ # Generate output with torch.no_grad() for efficiency
154
+ with torch.no_grad():
155
+ outputs = model.generate(
156
+ **inputs,
157
+ max_new_tokens=MAX_NEW_TOKENS,
158
+ temperature=0.0,
159
+ do_sample=False
160
+ )
161
 
162
+ # Decode the result
163
  result_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
164
 
165
+ # Extract the output part
166
+ if "<|output|>" in result_text:
167
+ json_text = result_text.split("<|output|>")[1].strip()
168
+ else:
169
+ json_text = result_text
170
+
171
+ # Try to parse as JSON
172
  try:
173
  extracted = json.loads(json_text)
174
  return "βœ… Erfolgreich extrahiert", json.dumps(extracted, indent=2)
 
176
  return "❌ JSON Parsing Fehler", json_text
177
 
178
  except Exception as e:
179
+ import traceback
180
+ trace = traceback.format_exc()
181
+ print(f"Error in extract_info: {e}\n{trace}")
182
  return f"❌ Fehler: {str(e)}", "{}"
183
 
184
  def create_map(df, location_col):
 
385
  border-top: 1px solid #eaeaea;
386
  }
387
  </style>
388
+ """
389
+
390
+
391
+
392
  """
393
  with gr.Blocks(css=custom_css, title="Daten Strukturieren und Analysieren") as demo:
394
  gr.HTML("""