Tonic commited on
Commit
ab48ce6
Β·
unverified Β·
1 Parent(s): 98dfc81

add demo with mcp enabled

Browse files
Files changed (3) hide show
  1. README.md +2 -2
  2. app.py +275 -0
  3. requirements.txt +3 -0
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
  title: Convert To Json
3
- emoji: πŸš€
4
  colorFrom: yellow
5
- colorTo: purple
6
  sdk: gradio
7
  sdk_version: 5.33.0
8
  app_file: app.py
 
1
  ---
2
  title: Convert To Json
3
+ emoji: πŸ”¬πŸ“…πŸ“Š
4
  colorFrom: yellow
5
+ colorTo: blue
6
  sdk: gradio
7
  sdk_version: 5.33.0
8
  app_file: app.py
app.py ADDED
@@ -0,0 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import json
3
+ import torch
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
+ import spaces
6
+
7
+ # Model configuration
8
+ MODEL_NAME = "osmosis-ai/Osmosis-Structure-0.6B"
9
+
10
+ # Global variables to store the model and tokenizer
11
+ model = None
12
+ tokenizer = None
13
+
14
+ def load_model():
15
+ """Load the Osmosis Structure model and tokenizer"""
16
+ global model, tokenizer
17
+
18
+ try:
19
+ print("Loading Osmosis Structure model...")
20
+
21
+ # Load tokenizer
22
+ tokenizer = AutoTokenizer.from_pretrained(
23
+ MODEL_NAME,
24
+ trust_remote_code=True
25
+ )
26
+
27
+ # Load model
28
+ model = AutoModelForCausalLM.from_pretrained(
29
+ MODEL_NAME,
30
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
31
+ device_map="auto" if torch.cuda.is_available() else None,
32
+ trust_remote_code=True
33
+ )
34
+
35
+ print("βœ… Osmosis Structure model loaded successfully!")
36
+ return True
37
+
38
+ except Exception as e:
39
+ print(f"❌ Error loading model: {e}")
40
+ return False
41
+
42
+ @spaces.GPU
43
+ def text_to_json(input_text, max_tokens=512, temperature=0.6, top_p=0.95, top_k=20):
44
+ """Convert plain text to structured JSON using Osmosis Structure model"""
45
+ global model, tokenizer
46
+
47
+ if model is None or tokenizer is None:
48
+ return "❌ Model not loaded. Please wait for model initialization."
49
+
50
+ try:
51
+ # Create a structured prompt for JSON conversion
52
+ messages = [
53
+ {
54
+ "role": "system",
55
+ "content": "You are a helpful assistant that converts unstructured text into well-formatted JSON. Extract key information and organize it into a logical, structured format. Always respond with valid JSON."
56
+ },
57
+ {
58
+ "role": "user",
59
+ "content": f"Convert this text to JSON format:\n\n{input_text}"
60
+ }
61
+ ]
62
+
63
+ # Apply chat template
64
+ formatted_prompt = tokenizer.apply_chat_template(
65
+ messages,
66
+ tokenize=False,
67
+ add_generation_prompt=True
68
+ )
69
+
70
+ # Tokenize the input
71
+ inputs = tokenizer(
72
+ formatted_prompt,
73
+ return_tensors="pt",
74
+ truncation=True,
75
+ max_length=2048
76
+ )
77
+
78
+ # Move to device if using GPU
79
+ if torch.cuda.is_available():
80
+ inputs = {k: v.to(model.device) for k, v in inputs.items()}
81
+
82
+ # Generation parameters based on model config
83
+ generation_config = {
84
+ "max_new_tokens": max_tokens,
85
+ "temperature": temperature,
86
+ "top_p": top_p,
87
+ "top_k": top_k,
88
+ "do_sample": True,
89
+ "pad_token_id": tokenizer.pad_token_id,
90
+ "eos_token_id": tokenizer.eos_token_id,
91
+ "repetition_penalty": 1.1,
92
+ }
93
+
94
+ # Generate response
95
+ with torch.no_grad():
96
+ outputs = model.generate(
97
+ **inputs,
98
+ **generation_config
99
+ )
100
+
101
+ # Decode the response
102
+ generated_tokens = outputs[0][len(inputs["input_ids"][0]):]
103
+ generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)
104
+
105
+ # Clean up the response
106
+ generated_text = generated_text.strip()
107
+
108
+ # Try to extract JSON from the response
109
+ json_start = generated_text.find('{')
110
+ json_end = generated_text.rfind('}')
111
+
112
+ if json_start != -1 and json_end != -1 and json_end > json_start:
113
+ json_text = generated_text[json_start:json_end+1]
114
+ else:
115
+ # If no clear JSON boundaries, try to clean the whole response
116
+ json_text = generated_text
117
+
118
+ # Remove common prefixes
119
+ prefixes_to_remove = ["```json", "```", "Here's the JSON:", "JSON:", "```json\n"]
120
+ for prefix in prefixes_to_remove:
121
+ if json_text.startswith(prefix):
122
+ json_text = json_text[len(prefix):].strip()
123
+
124
+ # Remove common suffixes
125
+ suffixes_to_remove = ["```", "\n```"]
126
+ for suffix in suffixes_to_remove:
127
+ if json_text.endswith(suffix):
128
+ json_text = json_text[:-len(suffix)].strip()
129
+
130
+ # Validate and format JSON
131
+ try:
132
+ parsed_json = json.loads(json_text)
133
+ return json.dumps(parsed_json, indent=2, ensure_ascii=False)
134
+ except json.JSONDecodeError:
135
+ # If still not valid JSON, return the cleaned text with a note
136
+ return f"Generated response (may need manual cleanup):\n\n{json_text}"
137
+
138
+ except Exception as e:
139
+ return f"❌ Error generating JSON: {str(e)}"
140
+
141
+ # Create Gradio interface
142
+ def create_demo():
143
+ with gr.Blocks(
144
+ title="Osmosis Structure - Text to JSON Converter",
145
+ theme=gr.themes.Soft()
146
+ ) as demo:
147
+
148
+ gr.Markdown("""
149
+ # 🌊 Osmosis Structure - Text to JSON Converter
150
+
151
+ Convert unstructured text into well-formatted JSON using the Osmosis Structure 0.6B model.
152
+ This model is specifically trained for structured data extraction and format conversion.
153
+ """)
154
+
155
+ gr.Markdown("""
156
+ ### ℹ️ About Osmosis Structure
157
+
158
+ - **Model**: Osmosis Structure 0.6B parameters
159
+ - **Architecture**: Qwen3 (specialized for structured data)
160
+ - **Purpose**: Converting unstructured text to structured JSON format
161
+ - **Optimizations**: Fine-tuned for data extraction and format conversion tasks
162
+
163
+ The model automatically identifies key information in your text and organizes it into logical JSON structures.
164
+ """)
165
+
166
+ with gr.Row():
167
+ with gr.Column(scale=1):
168
+ input_text = gr.Textbox(
169
+ label="πŸ“ Input Text",
170
+ placeholder="Enter your unstructured text here...\n\nExample: 'John Smith is a 30-year-old software engineer from New York. He works at Tech Corp and has 5 years of experience in Python development.'",
171
+ lines=8,
172
+ max_lines=15
173
+ )
174
+
175
+ with gr.Accordion("βš™οΈ Generation Settings", open=False):
176
+ max_tokens = gr.Slider(
177
+ minimum=50,
178
+ maximum=1000,
179
+ value=512,
180
+ step=10,
181
+ label="Max Tokens",
182
+ info="Maximum number of tokens to generate"
183
+ )
184
+
185
+ temperature = gr.Slider(
186
+ minimum=0.1,
187
+ maximum=1.0,
188
+ value=0.6,
189
+ step=0.1,
190
+ label="Temperature",
191
+ info="Controls randomness (lower = more focused)"
192
+ )
193
+
194
+ top_p = gr.Slider(
195
+ minimum=0.1,
196
+ maximum=1.0,
197
+ value=0.95,
198
+ step=0.05,
199
+ label="Top-p",
200
+ info="Nucleus sampling parameter"
201
+ )
202
+
203
+ top_k = gr.Slider(
204
+ minimum=1,
205
+ maximum=100,
206
+ value=20,
207
+ step=1,
208
+ label="Top-k",
209
+ info="Limits vocabulary for generation"
210
+ )
211
+
212
+ convert_btn = gr.Button(
213
+ "πŸ”„ Convert to JSON",
214
+ variant="primary",
215
+ size="lg"
216
+ )
217
+
218
+ with gr.Column(scale=1):
219
+ output_json = gr.Textbox(
220
+ label="πŸ“‹ Generated JSON",
221
+ lines=15,
222
+ max_lines=20,
223
+ interactive=False,
224
+ show_copy_button=True
225
+ )
226
+
227
+ # Example inputs
228
+ gr.Markdown("### πŸ“š Example Inputs")
229
+ examples = gr.Examples(
230
+ examples=[
231
+ ["John Smith is a 30-year-old software engineer from New York. He works at Tech Corp and has 5 years of experience in Python development. His email is [email protected] and he graduated from MIT in 2018."],
232
+ ["Order #12345 was placed on March 15, 2024. Customer: Sarah Johnson, Address: 123 Main St, Boston MA 02101. Items: 2x Laptop ($999 each), 1x Mouse ($25). Total: $2023. Status: Shipped via FedEx, tracking: 1234567890."],
233
+ ["The conference will be held on June 10-12, 2024 at the Grand Hotel in San Francisco. Registration fee is $500 for early bird (before May 1) and $650 for regular registration. Contact [email protected] for questions."],
234
+ ["Product: Wireless Headphones Model XYZ-100. Price: $199.99. Features: Bluetooth 5.0, 30-hour battery, noise cancellation, wireless charging case. Colors available: Black, White, Blue. Warranty: 2 years. Rating: 4.5/5 stars (324 reviews)."]
235
+ ],
236
+ inputs=input_text,
237
+ label="Click on any example to try it"
238
+ )
239
+
240
+ # Event handlers
241
+ convert_btn.click(
242
+ fn=text_to_json,
243
+ inputs=[input_text, max_tokens, temperature, top_p, top_k],
244
+ outputs=output_json,
245
+ show_progress=True
246
+ )
247
+
248
+ # Allow Enter key to trigger conversion
249
+ input_text.submit(
250
+ fn=text_to_json,
251
+ inputs=[input_text, max_tokens, temperature, top_p, top_k],
252
+ outputs=output_json,
253
+ show_progress=True
254
+ )
255
+
256
+ return demo
257
+
258
+ # Initialize the demo
259
+ if __name__ == "__main__":
260
+ print("🌊 Initializing Osmosis Structure Demo...")
261
+
262
+ # Load model at startup
263
+ if load_model():
264
+ print("πŸš€ Creating Gradio interface...")
265
+ demo = create_demo()
266
+ demo.launch(
267
+ share=True,
268
+ show_error=True,
269
+ show_tips=True,
270
+ enable_queue=True,
271
+ ssr_mode=False,
272
+ mcp_server=True
273
+ )
274
+ else:
275
+ print("❌ Failed to load model. Please check your setup.")
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ torch
2
+ transformers
3
+ accelerate