Spaces:

oberbics
/

HistorySpace

Running on Zero

App Files Files Community

oberbics commited on Apr 22

Commit

0284ff4

verified ·

1 Parent(s): 3d65f4a

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -41

app.py CHANGED Viewed

@@ -87,64 +87,99 @@ class SafeGeocoder:
 def load_model():
     global tokenizer, model
     try:
-        # Try to import Qwen2 components from modelscope
-        try:
-            from modelscope import AutoTokenizer as MSAutoTokenizer
-            tokenizer = MSAutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
-            model = AutoModelForCausalLM.from_pretrained(
-                MODEL_NAME,
-                torch_dtype=TORCH_DTYPE,
-                trust_remote_code=True
-            ).to(DEVICE).eval()
-            print("Loaded model using modelscope AutoTokenizer")
-        except:
-            # If modelscope approach fails, try with specific revision
-            tokenizer = AutoTokenizer.from_pretrained(
-                MODEL_NAME,
-                trust_remote_code=True,
-                revision="main"  # Try specifying a revision
-            )
-            model = AutoModelForCausalLM.from_pretrained(
-                MODEL_NAME,
-                torch_dtype=TORCH_DTYPE,
-                trust_remote_code=True,
-                revision="main"  # Try specifying a revision
-            ).to(DEVICE).eval()
-        print(f"✅ Loaded {MODEL_NAME} on {DEVICE}")
-        # Test the model
-        test_text = "Test in Berlin."
-        test_template = '{"test_location": ""}'
-        test_template_formatted = json.dumps(json.loads(test_template), indent=4)
-        prompt = f"<|input|>\n### Template:\n{test_template_formatted}\n### Text:\n{test_text}\n\n<|output|>"
-        # Create inputs with proper padding and truncation
-        inputs = tokenizer([prompt], return_tensors="pt", truncation=True, max_length=MAX_INPUT_LENGTH).to(DEVICE)
-        # Generate output
         with torch.no_grad():
             outputs = model.generate(
                 **inputs,
-                max_new_tokens=50,
                 temperature=0.0,
                 do_sample=False
             )
-        result = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Check if output contains expected text
-        if "<|output|>" in result and "Berlin" in result:
-            return "✅ Modell erfolgreich geladen und getestet!"
-        return "⚠️ Modell-Test nicht erfolgreich. Bitte versuchen Sie es erneut."
     except Exception as e:
         import traceback
         trace = traceback.format_exc()
-        print(f"Error loading model: {e}\n{trace}")
-        return f"❌ Fehler beim Laden des Modells: {str(e)}"
 @spaces.GPU
 def extract_info(template, text):
     global tokenizer, model

 def load_model():
     global tokenizer, model
     try:
+        if model is None:
+            # Only load the tokenizer first (no CUDA initialization)
+            try:
+                from modelscope import AutoTokenizer as MSAutoTokenizer
+                tokenizer = MSAutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
+                print("Loaded tokenizer using modelscope AutoTokenizer")
+            except:
+                # Fall back to regular tokenizer
+                tokenizer = AutoTokenizer.from_pretrained(
+                    MODEL_NAME,
+                    trust_remote_code=True,
+                    revision="main"
+                )
+                print("Loaded tokenizer using standard AutoTokenizer")
+            # For the model, we'll only create a loading configuration but not actually load it yet
+            # This avoids CUDA initialization in the main process
+            print(f"Tokenizer successfully loaded, model will be loaded when needed")
+            return "✅ Tokenizer erfolgreich geladen. Model wird bei Bedarf geladen."
+    except Exception as e:
+        import traceback
+        trace = traceback.format_exc()
+        print(f"Error loading tokenizer: {e}\n{trace}")
+        return f"❌ Fehler beim Laden des Tokenizers: {str(e)}"
+# Then, modify your extract_info function to load the model on first use
+@spaces.GPU
+def extract_info(template, text):
+    global tokenizer, model
+    if tokenizer is None:
+        return "❌ Tokenizer nicht geladen", "Bitte zuerst den Tokenizer laden"
+    try:
+        # Load model if not loaded yet
+        if model is None:
+            try:
+                model = AutoModelForCausalLM.from_pretrained(
+                    MODEL_NAME,
+                    torch_dtype=TORCH_DTYPE,
+                    trust_remote_code=True,
+                    revision="main"
+                ).to(DEVICE).eval()
+                print(f"✅ Model loaded successfully on {DEVICE}")
+            except Exception as e:
+                return f"❌ Fehler beim Laden des Modells: {str(e)}", "{}"
+        # Format the template as proper JSON with indentation
+        template_formatted = json.dumps(json.loads(template), indent=4)
+        # Create prompt
+        prompt = f"<|input|>\n### Template:\n{template_formatted}\n### Text:\n{text}\n\n<|output|>"
+        # Tokenize with proper settings
+        inputs = tokenizer(
+            [prompt],
+            return_tensors="pt",
+            truncation=True,
+            padding=True,
+            max_length=MAX_INPUT_LENGTH
+        ).to(DEVICE)
+        # Generate output with torch.no_grad() for efficiency
         with torch.no_grad():
             outputs = model.generate(
                 **inputs,
+                max_new_tokens=MAX_NEW_TOKENS,
                 temperature=0.0,
                 do_sample=False
             )
+        # Decode the result
+        result_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Extract the output part
+        if "<|output|>" in result_text:
+            json_text = result_text.split("<|output|>")[1].strip()
+        else:
+            json_text = result_text
+        # Try to parse as JSON
+        try:
+            extracted = json.loads(json_text)
+            return "✅ Erfolgreich extrahiert", json.dumps(extracted, indent=2)
+        except json.JSONDecodeError:
+            return "❌ JSON Parsing Fehler", json_text
     except Exception as e:
         import traceback
         trace = traceback.format_exc()
+        print(f"Error in extract_info: {e}\n{trace}")
+        return f"❌ Fehler: {str(e)}", "{}"
 @spaces.GPU
 def extract_info(template, text):
     global tokenizer, model