Spaces:

oberbics
/

HistorySpace

Sleeping

App Files Files Community

oberbics commited on Apr 22

Commit

0110182

verified ·

1 Parent(s): c66b983

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -36

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
 import gradio as gr
 import json
 import os
@@ -28,8 +27,8 @@ MAP_TILES = {
     }
 }
-# Model configuration
-MODEL_NAME = "numind/NuExtract-1.5-tiny"
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 TORCH_DTYPE = torch.bfloat16 if DEVICE == "cuda" else torch.float32
 MAX_INPUT_LENGTH = 20000  # For sliding window processing
@@ -77,48 +76,47 @@ class SafeGeocoder:
             return None
-# Fixed model loading function
 def load_model():
     global tokenizer, model
     try:
         if model is None:
-            # First, try to get configuration to check model type
-            config = AutoConfig.from_pretrained(MODEL_NAME, trust_remote_code=True)
-            print(f"Loading model config: {config.__class__.__name__}")
-            # Load tokenizer with appropriate options
             tokenizer = AutoTokenizer.from_pretrained(
-                MODEL_NAME,
-                trust_remote_code=True,
-                use_fast=False  # Try with use_fast=False if the regular tokenizer fails
             )
-            print(f"Successfully loaded tokenizer: {tokenizer.__class__.__name__}")
-            # Load the model
             model = AutoModelForCausalLM.from_pretrained(
                 MODEL_NAME,
                 torch_dtype=TORCH_DTYPE,
-                device_map="auto",
                 trust_remote_code=True
-            ).eval()
             print(f"✅ Loaded {MODEL_NAME} on {DEVICE}")
             # Test the model
             test_text = "Test in Berlin."
             test_template = '{"test_location": ""}'
-            prompt = f"<|input|>\n### Template:\n{test_template}\n### Text:\n{test_text}\n\n<|output|>"
-            inputs = tokenizer(prompt, return_tensors="pt", max_length=20000, truncation=True).to(DEVICE)
-            outputs = model.generate(
-                **inputs,
-                max_new_tokens=50,
-                temperature=0.0,
-                do_sample=False
-            )
             result = tokenizer.decode(outputs[0], skip_special_tokens=True)
             if "<|output|>" in result and "Berlin" in result:
                 return "✅ Modell erfolgreich geladen und getestet!"
@@ -137,25 +135,40 @@ def extract_info(template, text):
         return "❌ Modell nicht geladen", "Bitte zuerst das Modell laden"
     try:
-        prompt = f"<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>"
         inputs = tokenizer(
-            prompt,
             return_tensors="pt",
             truncation=True,
-            max_length=20000
         ).to(DEVICE)
-        outputs = model.generate(
-            **inputs,
-            max_new_tokens=1000,
-            temperature=0.0,
-            do_sample=False,
-            pad_token_id=tokenizer.eos_token_id
-        )
         result_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        json_text = result_text.split("<|output|>")[1].strip() if "<|output|>" in result_text else result_text
         try:
             extracted = json.loads(json_text)
             return "✅ Erfolgreich extrahiert", json.dumps(extracted, indent=2)
@@ -163,6 +176,9 @@ def extract_info(template, text):
             return "❌ JSON Parsing Fehler", json_text
     except Exception as e:
         return f"❌ Fehler: {str(e)}", "{}"
 def create_map(df, location_col):
@@ -369,6 +385,10 @@ h2 {
     border-top: 1px solid #eaeaea;
 }
 </style>
 """
 with gr.Blocks(css=custom_css, title="Daten Strukturieren und Analysieren") as demo:
     gr.HTML("""

 import gradio as gr
 import json
 import os
     }
 }
+# Model configuration - corrected model name
+MODEL_NAME = "numind/NuExtract-tiny-v1.5"  # Fixed model name according to documentation
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 TORCH_DTYPE = torch.bfloat16 if DEVICE == "cuda" else torch.float32
 MAX_INPUT_LENGTH = 20000  # For sliding window processing
             return None
+# Corrected model loading function based on official usage example
 def load_model():
     global tokenizer, model
     try:
         if model is None:
+            # Load tokenizer exactly as shown in the usage example
             tokenizer = AutoTokenizer.from_pretrained(
+                MODEL_NAME,
+                trust_remote_code=True
             )
+            # Load model exactly as shown in the usage example
             model = AutoModelForCausalLM.from_pretrained(
                 MODEL_NAME,
                 torch_dtype=TORCH_DTYPE,
                 trust_remote_code=True
+            ).to(DEVICE).eval()
             print(f"✅ Loaded {MODEL_NAME} on {DEVICE}")
             # Test the model
             test_text = "Test in Berlin."
             test_template = '{"test_location": ""}'
+            test_template_formatted = json.dumps(json.loads(test_template), indent=4)
+            prompt = f"<|input|>\n### Template:\n{test_template_formatted}\n### Text:\n{test_text}\n\n<|output|>"
+            # Create inputs with proper padding and truncation
+            inputs = tokenizer([prompt], return_tensors="pt", truncation=True, max_length=MAX_INPUT_LENGTH).to(DEVICE)
+            # Generate output
+            with torch.no_grad():
+                outputs = model.generate(
+                    **inputs,
+                    max_new_tokens=50,
+                    temperature=0.0,
+                    do_sample=False
+                )
             result = tokenizer.decode(outputs[0], skip_special_tokens=True)
+            # Check if output contains expected text
             if "<|output|>" in result and "Berlin" in result:
                 return "✅ Modell erfolgreich geladen und getestet!"
         return "❌ Modell nicht geladen", "Bitte zuerst das Modell laden"
     try:
+        # Format the template as proper JSON with indentation as per usage example
+        template_formatted = json.dumps(json.loads(template), indent=4)
+        # Create prompt exactly as shown in the usage example
+        prompt = f"<|input|>\n### Template:\n{template_formatted}\n### Text:\n{text}\n\n<|output|>"
+        # Tokenize with proper settings
         inputs = tokenizer(
+            [prompt],
             return_tensors="pt",
             truncation=True,
+            padding=True,
+            max_length=MAX_INPUT_LENGTH
         ).to(DEVICE)
+        # Generate output with torch.no_grad() for efficiency
+        with torch.no_grad():
+            outputs = model.generate(
+                **inputs,
+                max_new_tokens=MAX_NEW_TOKENS,
+                temperature=0.0,
+                do_sample=False
+            )
+        # Decode the result
         result_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Extract the output part
+        if "<|output|>" in result_text:
+            json_text = result_text.split("<|output|>")[1].strip()
+        else:
+            json_text = result_text
+        # Try to parse as JSON
         try:
             extracted = json.loads(json_text)
             return "✅ Erfolgreich extrahiert", json.dumps(extracted, indent=2)
             return "❌ JSON Parsing Fehler", json_text
     except Exception as e:
+        import traceback
+        trace = traceback.format_exc()
+        print(f"Error in extract_info: {e}\n{trace}")
         return f"❌ Fehler: {str(e)}", "{}"
 def create_map(df, location_col):
     border-top: 1px solid #eaeaea;
 }
 </style>
+"""
 """
 with gr.Blocks(css=custom_css, title="Daten Strukturieren und Analysieren") as demo:
     gr.HTML("""