Spaces:

Tonic
/

Convert-to-Json

Running on Zero

App Files Files Community

Tonic commited on 2 days ago

Commit

ab48ce6

unverified ·

1 Parent(s): 98dfc81

add demo with mcp enabled

Browse files

Files changed (3) hide show

README.md +2 -2
app.py +275 -0
requirements.txt +3 -0

README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
 title: Convert To Json
-emoji: 🚀
 colorFrom: yellow
-colorTo: purple
 sdk: gradio
 sdk_version: 5.33.0
 app_file: app.py

 ---
 title: Convert To Json
+emoji: 🔬📅📊
 colorFrom: yellow
+colorTo: blue
 sdk: gradio
 sdk_version: 5.33.0
 app_file: app.py

app.py ADDED Viewed

	@@ -0,0 +1,275 @@

+import gradio as gr
+import json
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import spaces
+# Model configuration
+MODEL_NAME = "osmosis-ai/Osmosis-Structure-0.6B"
+# Global variables to store the model and tokenizer
+model = None
+tokenizer = None
+def load_model():
+    """Load the Osmosis Structure model and tokenizer"""
+    global model, tokenizer
+    try:
+        print("Loading Osmosis Structure model...")
+        # Load tokenizer
+        tokenizer = AutoTokenizer.from_pretrained(
+            MODEL_NAME,
+            trust_remote_code=True
+        )
+        # Load model
+        model = AutoModelForCausalLM.from_pretrained(
+            MODEL_NAME,
+            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+            device_map="auto" if torch.cuda.is_available() else None,
+            trust_remote_code=True
+        )
+        print("✅ Osmosis Structure model loaded successfully!")
+        return True
+    except Exception as e:
+        print(f"❌ Error loading model: {e}")
+        return False
+@spaces.GPU
+def text_to_json(input_text, max_tokens=512, temperature=0.6, top_p=0.95, top_k=20):
+    """Convert plain text to structured JSON using Osmosis Structure model"""
+    global model, tokenizer
+    if model is None or tokenizer is None:
+        return "❌ Model not loaded. Please wait for model initialization."
+    try:
+        # Create a structured prompt for JSON conversion
+        messages = [
+            {
+                "role": "system",
+                "content": "You are a helpful assistant that converts unstructured text into well-formatted JSON. Extract key information and organize it into a logical, structured format. Always respond with valid JSON."
+            },
+            {
+                "role": "user",
+                "content": f"Convert this text to JSON format:\n\n{input_text}"
+            }
+        ]
+        # Apply chat template
+        formatted_prompt = tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True
+        )
+        # Tokenize the input
+        inputs = tokenizer(
+            formatted_prompt,
+            return_tensors="pt",
+            truncation=True,
+            max_length=2048
+        )
+        # Move to device if using GPU
+        if torch.cuda.is_available():
+            inputs = {k: v.to(model.device) for k, v in inputs.items()}
+        # Generation parameters based on model config
+        generation_config = {
+            "max_new_tokens": max_tokens,
+            "temperature": temperature,
+            "top_p": top_p,
+            "top_k": top_k,
+            "do_sample": True,
+            "pad_token_id": tokenizer.pad_token_id,
+            "eos_token_id": tokenizer.eos_token_id,
+            "repetition_penalty": 1.1,
+        }
+        # Generate response
+        with torch.no_grad():
+            outputs = model.generate(
+                **inputs,
+                **generation_config
+            )
+        # Decode the response
+        generated_tokens = outputs[0][len(inputs["input_ids"][0]):]
+        generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)
+        # Clean up the response
+        generated_text = generated_text.strip()
+        # Try to extract JSON from the response
+        json_start = generated_text.find('{')
+        json_end = generated_text.rfind('}')
+        if json_start != -1 and json_end != -1 and json_end > json_start:
+            json_text = generated_text[json_start:json_end+1]
+        else:
+            # If no clear JSON boundaries, try to clean the whole response
+            json_text = generated_text
+            # Remove common prefixes
+            prefixes_to_remove = ["```json", "```", "Here's the JSON:", "JSON:", "```json\n"]
+            for prefix in prefixes_to_remove:
+                if json_text.startswith(prefix):
+                    json_text = json_text[len(prefix):].strip()
+            # Remove common suffixes
+            suffixes_to_remove = ["```", "\n```"]
+            for suffix in suffixes_to_remove:
+                if json_text.endswith(suffix):
+                    json_text = json_text[:-len(suffix)].strip()
+        # Validate and format JSON
+        try:
+            parsed_json = json.loads(json_text)
+            return json.dumps(parsed_json, indent=2, ensure_ascii=False)
+        except json.JSONDecodeError:
+            # If still not valid JSON, return the cleaned text with a note
+            return f"Generated response (may need manual cleanup):\n\n{json_text}"
+    except Exception as e:
+        return f"❌ Error generating JSON: {str(e)}"
+# Create Gradio interface
+def create_demo():
+    with gr.Blocks(
+        title="Osmosis Structure - Text to JSON Converter",
+        theme=gr.themes.Soft()
+    ) as demo:
+        gr.Markdown("""
+        # 🌊 Osmosis Structure - Text to JSON Converter
+        Convert unstructured text into well-formatted JSON using the Osmosis Structure 0.6B model.
+        This model is specifically trained for structured data extraction and format conversion.
+        """)
+        gr.Markdown("""
+        ### ℹ️ About Osmosis Structure
+        - **Model**: Osmosis Structure 0.6B parameters
+        - **Architecture**: Qwen3 (specialized for structured data)
+        - **Purpose**: Converting unstructured text to structured JSON format
+        - **Optimizations**: Fine-tuned for data extraction and format conversion tasks
+        The model automatically identifies key information in your text and organizes it into logical JSON structures.
+        """)
+        with gr.Row():
+            with gr.Column(scale=1):
+                input_text = gr.Textbox(
+                    label="📝 Input Text",
+                    placeholder="Enter your unstructured text here...\n\nExample: 'John Smith is a 30-year-old software engineer from New York. He works at Tech Corp and has 5 years of experience in Python development.'",
+                    lines=8,
+                    max_lines=15
+                )
+                with gr.Accordion("⚙️ Generation Settings", open=False):
+                    max_tokens = gr.Slider(
+                        minimum=50,
+                        maximum=1000,
+                        value=512,
+                        step=10,
+                        label="Max Tokens",
+                        info="Maximum number of tokens to generate"
+                    )
+                    temperature = gr.Slider(
+                        minimum=0.1,
+                        maximum=1.0,
+                        value=0.6,
+                        step=0.1,
+                        label="Temperature",
+                        info="Controls randomness (lower = more focused)"
+                    )
+                    top_p = gr.Slider(
+                        minimum=0.1,
+                        maximum=1.0,
+                        value=0.95,
+                        step=0.05,
+                        label="Top-p",
+                        info="Nucleus sampling parameter"
+                    )
+                    top_k = gr.Slider(
+                        minimum=1,
+                        maximum=100,
+                        value=20,
+                        step=1,
+                        label="Top-k",
+                        info="Limits vocabulary for generation"
+                    )
+                convert_btn = gr.Button(
+                    "🔄 Convert to JSON",
+                    variant="primary",
+                    size="lg"
+                )
+            with gr.Column(scale=1):
+                output_json = gr.Textbox(
+                    label="📋 Generated JSON",
+                    lines=15,
+                    max_lines=20,
+                    interactive=False,
+                    show_copy_button=True
+                )
+        # Example inputs
+        gr.Markdown("### 📚 Example Inputs")
+        examples = gr.Examples(
+            examples=[
+                ["John Smith is a 30-year-old software engineer from New York. He works at Tech Corp and has 5 years of experience in Python development. His email is [email protected] and he graduated from MIT in 2018."],
+                ["Order #12345 was placed on March 15, 2024. Customer: Sarah Johnson, Address: 123 Main St, Boston MA 02101. Items: 2x Laptop ($999 each), 1x Mouse ($25). Total: $2023. Status: Shipped via FedEx, tracking: 1234567890."],
+                ["The conference will be held on June 10-12, 2024 at the Grand Hotel in San Francisco. Registration fee is $500 for early bird (before May 1) and $650 for regular registration. Contact [email protected] for questions."],
+                ["Product: Wireless Headphones Model XYZ-100. Price: $199.99. Features: Bluetooth 5.0, 30-hour battery, noise cancellation, wireless charging case. Colors available: Black, White, Blue. Warranty: 2 years. Rating: 4.5/5 stars (324 reviews)."]
+            ],
+            inputs=input_text,
+            label="Click on any example to try it"
+        )
+        # Event handlers
+        convert_btn.click(
+            fn=text_to_json,
+            inputs=[input_text, max_tokens, temperature, top_p, top_k],
+            outputs=output_json,
+            show_progress=True
+        )
+        # Allow Enter key to trigger conversion
+        input_text.submit(
+            fn=text_to_json,
+            inputs=[input_text, max_tokens, temperature, top_p, top_k],
+            outputs=output_json,
+            show_progress=True
+        )
+    return demo
+# Initialize the demo
+if __name__ == "__main__":
+    print("🌊 Initializing Osmosis Structure Demo...")
+    # Load model at startup
+    if load_model():
+        print("🚀 Creating Gradio interface...")
+        demo = create_demo()
+        demo.launch(
+            share=True,
+            show_error=True,
+            show_tips=True,
+            enable_queue=True,
+            ssr_mode=False,
+            mcp_server=True
+        )
+    else:
+        print("❌ Failed to load model. Please check your setup.")

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+torch
+transformers
+accelerate