oberbics commited on
Commit
0284ff4
Β·
verified Β·
1 Parent(s): 3d65f4a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -41
app.py CHANGED
@@ -87,64 +87,99 @@ class SafeGeocoder:
87
  def load_model():
88
  global tokenizer, model
89
  try:
90
- # Try to import Qwen2 components from modelscope
91
- try:
92
- from modelscope import AutoTokenizer as MSAutoTokenizer
93
- tokenizer = MSAutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
94
- model = AutoModelForCausalLM.from_pretrained(
95
- MODEL_NAME,
96
- torch_dtype=TORCH_DTYPE,
97
- trust_remote_code=True
98
- ).to(DEVICE).eval()
99
- print("Loaded model using modelscope AutoTokenizer")
100
- except:
101
- # If modelscope approach fails, try with specific revision
102
- tokenizer = AutoTokenizer.from_pretrained(
103
- MODEL_NAME,
104
- trust_remote_code=True,
105
- revision="main" # Try specifying a revision
106
- )
107
 
108
- model = AutoModelForCausalLM.from_pretrained(
109
- MODEL_NAME,
110
- torch_dtype=TORCH_DTYPE,
111
- trust_remote_code=True,
112
- revision="main" # Try specifying a revision
113
- ).to(DEVICE).eval()
114
 
115
- print(f"βœ… Loaded {MODEL_NAME} on {DEVICE}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
- # Test the model
118
- test_text = "Test in Berlin."
119
- test_template = '{"test_location": ""}'
120
- test_template_formatted = json.dumps(json.loads(test_template), indent=4)
121
- prompt = f"<|input|>\n### Template:\n{test_template_formatted}\n### Text:\n{test_text}\n\n<|output|>"
122
 
123
- # Create inputs with proper padding and truncation
124
- inputs = tokenizer([prompt], return_tensors="pt", truncation=True, max_length=MAX_INPUT_LENGTH).to(DEVICE)
 
 
 
 
 
 
125
 
126
- # Generate output
127
  with torch.no_grad():
128
  outputs = model.generate(
129
  **inputs,
130
- max_new_tokens=50,
131
  temperature=0.0,
132
  do_sample=False
133
  )
134
-
135
- result = tokenizer.decode(outputs[0], skip_special_tokens=True)
136
 
137
- # Check if output contains expected text
138
- if "<|output|>" in result and "Berlin" in result:
139
- return "βœ… Modell erfolgreich geladen und getestet!"
140
 
141
- return "⚠️ Modell-Test nicht erfolgreich. Bitte versuchen Sie es erneut."
 
 
 
 
142
 
 
 
 
 
 
 
 
143
  except Exception as e:
144
  import traceback
145
  trace = traceback.format_exc()
146
- print(f"Error loading model: {e}\n{trace}")
147
- return f"❌ Fehler beim Laden des Modells: {str(e)}"
148
  @spaces.GPU
149
  def extract_info(template, text):
150
  global tokenizer, model
 
87
  def load_model():
88
  global tokenizer, model
89
  try:
90
+ if model is None:
91
+ # Only load the tokenizer first (no CUDA initialization)
92
+ try:
93
+ from modelscope import AutoTokenizer as MSAutoTokenizer
94
+ tokenizer = MSAutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
95
+ print("Loaded tokenizer using modelscope AutoTokenizer")
96
+ except:
97
+ # Fall back to regular tokenizer
98
+ tokenizer = AutoTokenizer.from_pretrained(
99
+ MODEL_NAME,
100
+ trust_remote_code=True,
101
+ revision="main"
102
+ )
103
+ print("Loaded tokenizer using standard AutoTokenizer")
 
 
 
104
 
105
+ # For the model, we'll only create a loading configuration but not actually load it yet
106
+ # This avoids CUDA initialization in the main process
107
+ print(f"Tokenizer successfully loaded, model will be loaded when needed")
108
+ return "βœ… Tokenizer erfolgreich geladen. Model wird bei Bedarf geladen."
 
 
109
 
110
+ except Exception as e:
111
+ import traceback
112
+ trace = traceback.format_exc()
113
+ print(f"Error loading tokenizer: {e}\n{trace}")
114
+ return f"❌ Fehler beim Laden des Tokenizers: {str(e)}"
115
+
116
+ # Then, modify your extract_info function to load the model on first use
117
+ @spaces.GPU
118
+ def extract_info(template, text):
119
+ global tokenizer, model
120
+
121
+ if tokenizer is None:
122
+ return "❌ Tokenizer nicht geladen", "Bitte zuerst den Tokenizer laden"
123
+
124
+ try:
125
+ # Load model if not loaded yet
126
+ if model is None:
127
+ try:
128
+ model = AutoModelForCausalLM.from_pretrained(
129
+ MODEL_NAME,
130
+ torch_dtype=TORCH_DTYPE,
131
+ trust_remote_code=True,
132
+ revision="main"
133
+ ).to(DEVICE).eval()
134
+ print(f"βœ… Model loaded successfully on {DEVICE}")
135
+ except Exception as e:
136
+ return f"❌ Fehler beim Laden des Modells: {str(e)}", "{}"
137
 
138
+ # Format the template as proper JSON with indentation
139
+ template_formatted = json.dumps(json.loads(template), indent=4)
140
+
141
+ # Create prompt
142
+ prompt = f"<|input|>\n### Template:\n{template_formatted}\n### Text:\n{text}\n\n<|output|>"
143
 
144
+ # Tokenize with proper settings
145
+ inputs = tokenizer(
146
+ [prompt],
147
+ return_tensors="pt",
148
+ truncation=True,
149
+ padding=True,
150
+ max_length=MAX_INPUT_LENGTH
151
+ ).to(DEVICE)
152
 
153
+ # Generate output with torch.no_grad() for efficiency
154
  with torch.no_grad():
155
  outputs = model.generate(
156
  **inputs,
157
+ max_new_tokens=MAX_NEW_TOKENS,
158
  temperature=0.0,
159
  do_sample=False
160
  )
 
 
161
 
162
+ # Decode the result
163
+ result_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
164
 
165
+ # Extract the output part
166
+ if "<|output|>" in result_text:
167
+ json_text = result_text.split("<|output|>")[1].strip()
168
+ else:
169
+ json_text = result_text
170
 
171
+ # Try to parse as JSON
172
+ try:
173
+ extracted = json.loads(json_text)
174
+ return "βœ… Erfolgreich extrahiert", json.dumps(extracted, indent=2)
175
+ except json.JSONDecodeError:
176
+ return "❌ JSON Parsing Fehler", json_text
177
+
178
  except Exception as e:
179
  import traceback
180
  trace = traceback.format_exc()
181
+ print(f"Error in extract_info: {e}\n{trace}")
182
+ return f"❌ Fehler: {str(e)}", "{}"
183
  @spaces.GPU
184
  def extract_info(template, text):
185
  global tokenizer, model