oberbics commited on
Commit
095dbb9
·
verified ·
1 Parent(s): cd66763

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -33
app.py CHANGED
@@ -1,49 +1,67 @@
1
  import json
2
- import torch
3
  import gradio as gr
4
- from transformers import AutoModelForCausalLM, AutoTokenizer
 
5
 
6
- # Simplified extraction function
7
- def extract_structure(template, text, progress=None):
 
 
 
 
 
 
 
 
 
8
  try:
9
- # Format the input
10
  prompt = f"<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>"
11
 
12
- # Generate prediction
13
- input_ids = tokenizer(prompt, return_tensors="pt", truncation=True).to(model.device)
14
- output = tokenizer.decode(model.generate(**input_ids, max_new_tokens=2000)[0], skip_special_tokens=True)
15
-
16
- # Extract result
17
- result = output.split("<|output|>")[1]
 
 
 
18
 
19
- # Highlight found items in text (simplified)
20
- highlighted = f"<p>Processed text of length {len(text)} characters</p>"
21
 
22
- return "Processing complete", result, highlighted
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  except Exception as e:
24
- return f"Error: {str(e)}", "{}", "<p>Processing failed</p>"
25
-
26
- # Load model
27
- model_name = "numind/NuExtract-1.5"
28
- try:
29
- model = AutoModelForCausalLM.from_pretrained(
30
- model_name,
31
- torch_dtype=torch.float16, # Using float16 instead of bfloat16 for better compatibility
32
- trust_remote_code=True,
33
- device_map="auto"
34
- )
35
- tokenizer = AutoTokenizer.from_pretrained(model_name)
36
- model_loaded = True
37
- except Exception as e:
38
- print(f"Model loading error: {e}")
39
- model_loaded = False
40
 
41
  # Create interface
42
  with gr.Blocks() as demo:
43
  gr.Markdown("# NuExtract-1.5 Demo")
44
 
45
- if not model_loaded:
46
- gr.Markdown("## ⚠️ Model failed to load. Using dummy mode.")
47
 
48
  with gr.Row():
49
  with gr.Column():
@@ -70,12 +88,26 @@ with gr.Blocks() as demo:
70
  outputs=[progress_output, result_output, html_output]
71
  )
72
 
73
- # Simple example
74
  gr.Examples(
75
  [
76
  [
77
  '{"name": "", "email": ""}',
78
  'Contact: John Smith ([email protected])'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  ]
80
  ],
81
  [template_input, text_input]
 
1
  import json
 
2
  import gradio as gr
3
+ import requests
4
+ import os
5
 
6
+ # Hugging Face API details
7
+ API_URL = "https://api-inference.huggingface.co/models/numind/NuExtract-1.5"
8
+ api_token = os.environ.get("HF_TOKEN", "") # Get token from environment variable
9
+
10
+ headers = {"Authorization": f"Bearer {api_token}"}
11
+
12
+ def query_api(payload):
13
+ response = requests.post(API_URL, headers=headers, json=payload)
14
+ return response.json()
15
+
16
+ def extract_structure(template, text):
17
  try:
18
+ # Format the input following NuExtract's format
19
  prompt = f"<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>"
20
 
21
+ # Call the API
22
+ payload = {
23
+ "inputs": prompt,
24
+ "parameters": {
25
+ "max_new_tokens": 2000,
26
+ "temperature": 0.01, # Nearly deterministic as recommended
27
+ "return_full_text": True
28
+ }
29
+ }
30
 
31
+ response = query_api(payload)
 
32
 
33
+ # Check for errors
34
+ if isinstance(response, dict) and "error" in response:
35
+ return f"API Error: {response['error']}", "{}", f"<p>Error occurred: {response['error']}</p>"
36
+
37
+ # Extract result - the API returns the full text so we need to split it
38
+ if isinstance(response, list) and len(response) > 0:
39
+ output = response[0].get("generated_text", "")
40
+ result = output.split("<|output|>")[1] if "<|output|>" in output else output
41
+
42
+ # Try to parse as JSON to format it nicely
43
+ try:
44
+ parsed = json.loads(result)
45
+ result = json.dumps(parsed, indent=2)
46
+ except:
47
+ pass
48
+
49
+ # Create a simple highlight
50
+ highlighted = f"<p>Successfully processed text of length {len(text)} characters</p>"
51
+
52
+ return "Processing complete", result, highlighted
53
+ else:
54
+ return "Unexpected API response", str(response), "<p>Please check API token and try again</p>"
55
+
56
  except Exception as e:
57
+ return f"Error: {str(e)}", "{}", f"<p>Processing failed: {str(e)}</p>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
  # Create interface
60
  with gr.Blocks() as demo:
61
  gr.Markdown("# NuExtract-1.5 Demo")
62
 
63
+ if not api_token:
64
+ gr.Markdown("## ⚠️ No API token found. Set HF_TOKEN in Space secrets.")
65
 
66
  with gr.Row():
67
  with gr.Column():
 
88
  outputs=[progress_output, result_output, html_output]
89
  )
90
 
91
+ # Examples
92
  gr.Examples(
93
  [
94
  [
95
  '{"name": "", "email": ""}',
96
  'Contact: John Smith ([email protected])'
97
+ ],
98
+ [
99
+ '''{
100
+ "Model": {
101
+ "Name": "",
102
+ "Number of parameters": "",
103
+ "Architecture": []
104
+ },
105
+ "Usage": {
106
+ "Use case": [],
107
+ "License": ""
108
+ }
109
+ }''',
110
+ '''We introduce Mistral 7B, a 7-billion-parameter language model engineered for superior performance and efficiency. Mistral 7B outperforms the best open 13B model (Llama 2) across all evaluated benchmarks, and the best released 34B model (Llama 1) in reasoning, mathematics, and code generation. Our model is released under the Apache 2.0 license.'''
111
  ]
112
  ],
113
  [template_input, text_input]