Spaces:

Tonic
/

Convert-to-Json

Running on Zero

App Files Files Community

Tonic commited on 5 days ago

Commit

8252047

unverified ·

1 Parent(s): ab48ce6

adds HF_TOKEN and loads the model

Browse files

Files changed (1) hide show

app.py +58 -13

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import gradio as gr
 import json
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import spaces
@@ -12,31 +13,59 @@ model = None
 tokenizer = None
 def load_model():
-    """Load the Osmosis Structure model and tokenizer"""
     global model, tokenizer
     try:
         print("Loading Osmosis Structure model...")
-        # Load tokenizer
         tokenizer = AutoTokenizer.from_pretrained(
             MODEL_NAME,
-            trust_remote_code=True
         )
-        # Load model
         model = AutoModelForCausalLM.from_pretrained(
             MODEL_NAME,
             torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
             device_map="auto" if torch.cuda.is_available() else None,
-            trust_remote_code=True
         )
         print("✅ Osmosis Structure model loaded successfully!")
         return True
     except Exception as e:
-        print(f"❌ Error loading model: {e}")
         return False
 @spaces.GPU
@@ -45,7 +74,7 @@ def text_to_json(input_text, max_tokens=512, temperature=0.6, top_p=0.95, top_k=
     global model, tokenizer
     if model is None or tokenizer is None:
-        return "❌ Model not loaded. Please wait for model initialization."
     try:
         # Create a structured prompt for JSON conversion
@@ -150,8 +179,10 @@ def create_demo():
         Convert unstructured text into well-formatted JSON using the Osmosis Structure 0.6B model.
         This model is specifically trained for structured data extraction and format conversion.
         """)
         gr.Markdown("""
         ### ℹ️ About Osmosis Structure
@@ -159,8 +190,16 @@ def create_demo():
         - **Architecture**: Qwen3 (specialized for structured data)
         - **Purpose**: Converting unstructured text to structured JSON format
         - **Optimizations**: Fine-tuned for data extraction and format conversion tasks
         The model automatically identifies key information in your text and organizes it into logical JSON structures.
         """)
         with gr.Row():
@@ -237,6 +276,7 @@ def create_demo():
             label="Click on any example to try it"
         )
         # Event handlers
         convert_btn.click(
             fn=text_to_json,
@@ -259,17 +299,22 @@ def create_demo():
 if __name__ == "__main__":
     print("🌊 Initializing Osmosis Structure Demo...")
     # Load model at startup
     if load_model():
         print("🚀 Creating Gradio interface...")
         demo = create_demo()
         demo.launch(
-            share=True,
             show_error=True,
             show_tips=True,
-            enable_queue=True,
-            ssr_mode=False,
-            mcp_server=True
         )
     else:
-        print("❌ Failed to load model. Please check your setup.")

 import gradio as gr
 import json
 import torch
+import os
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import spaces
 tokenizer = None
 def load_model():
+    """Load the Osmosis Structure model and tokenizer with HF token for gated repos"""
     global model, tokenizer
     try:
         print("Loading Osmosis Structure model...")
+        # Get HF token from environment variables
+        hf_token = os.environ.get("HF_KEY")
+        if not hf_token:
+            print("⚠️ Warning: HF_KEY not found in environment variables")
+            print("Attempting to load without token...")
+            hf_token = None
+        else:
+            print("✅ HF token found, accessing gated repository...")
+        # Load tokenizer with token
+        print("Loading tokenizer...")
         tokenizer = AutoTokenizer.from_pretrained(
             MODEL_NAME,
+            trust_remote_code=True,
+            token=hf_token,
+            use_auth_token=hf_token  # Backward compatibility
         )
+        print("Loading model...")
+        # Load model with token
         model = AutoModelForCausalLM.from_pretrained(
             MODEL_NAME,
             torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
             device_map="auto" if torch.cuda.is_available() else None,
+            trust_remote_code=True,
+            token=hf_token,
+            use_auth_token=hf_token  # Backward compatibility
         )
         print("✅ Osmosis Structure model loaded successfully!")
         return True
     except Exception as e:
+        error_msg = f"❌ Error loading model: {e}"
+        print(error_msg)
+        # Provide helpful error messages for common issues
+        if "401" in str(e) or "authentication" in str(e).lower():
+            print("💡 This appears to be an authentication error.")
+            print("Please ensure:")
+            print("1. HF_KEY is set correctly in your Space secrets")
+            print("2. Your token has access to the gated repository")
+            print("3. You have accepted the model's license agreement")
+        elif "404" in str(e) or "not found" in str(e).lower():
+            print("💡 Model repository not found.")
+            print("Please check if the model name is correct and accessible")
         return False
 @spaces.GPU
     global model, tokenizer
     if model is None or tokenizer is None:
+        return "❌ Model not loaded. Please check the console for loading errors."
     try:
         # Create a structured prompt for JSON conversion
         Convert unstructured text into well-formatted JSON using the Osmosis Structure 0.6B model.
         This model is specifically trained for structured data extraction and format conversion.
+        > **Note**: This model requires authentication. Ensure your HF_KEY is properly configured in Space secrets.
         """)
         gr.Markdown("""
         ### ℹ️ About Osmosis Structure
         - **Architecture**: Qwen3 (specialized for structured data)
         - **Purpose**: Converting unstructured text to structured JSON format
         - **Optimizations**: Fine-tuned for data extraction and format conversion tasks
+        - **Access**: Requires HF authentication token for gated repository
         The model automatically identifies key information in your text and organizes it into logical JSON structures.
+        ### 🔐 Authentication Setup
+        To use this model, ensure you have:
+        1. Set `HF_KEY` in your Space secrets with a valid Hugging Face token
+        2. Accepted the model's license agreement on Hugging Face
+        3. Ensured your token has access to the gated repository
         """)
         with gr.Row():
             label="Click on any example to try it"
         )
         # Event handlers
         convert_btn.click(
             fn=text_to_json,
 if __name__ == "__main__":
     print("🌊 Initializing Osmosis Structure Demo...")
+    # Check HF token availability
+    hf_token = os.environ.get("HF_KEY")
+    if hf_token:
+        print("✅ HF_KEY found in environment")
+    else:
+        print("⚠️ HF_KEY not found - this may cause issues with gated repositories")
     # Load model at startup
     if load_model():
         print("🚀 Creating Gradio interface...")
         demo = create_demo()
         demo.launch(
+            share=False,
             show_error=True,
             show_tips=True,
+            enable_queue=True
         )
     else:
+        print("❌ Failed to load model. Please check your HF_KEY and model access permissions.")