oberbics commited on
Commit
26a1605
·
verified ·
1 Parent(s): 095dbb9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -35
app.py CHANGED
@@ -2,93 +2,108 @@ import json
2
  import gradio as gr
3
  import requests
4
  import os
 
5
 
6
  # Hugging Face API details
7
  API_URL = "https://api-inference.huggingface.co/models/numind/NuExtract-1.5"
8
  api_token = os.environ.get("HF_TOKEN", "") # Get token from environment variable
9
-
10
  headers = {"Authorization": f"Bearer {api_token}"}
11
 
 
12
  def query_api(payload):
13
  response = requests.post(API_URL, headers=headers, json=payload)
14
- return response.json()
 
 
 
 
 
 
 
 
 
15
 
16
  def extract_structure(template, text):
17
  try:
18
- # Format the input following NuExtract's format
19
  prompt = f"<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>"
20
-
21
- # Call the API
22
  payload = {
23
  "inputs": prompt,
24
  "parameters": {
25
  "max_new_tokens": 2000,
26
- "temperature": 0.01, # Nearly deterministic as recommended
27
  "return_full_text": True
28
  }
29
  }
30
-
31
  response = query_api(payload)
32
-
33
- # Check for errors
34
  if isinstance(response, dict) and "error" in response:
35
  return f"API Error: {response['error']}", "{}", f"<p>Error occurred: {response['error']}</p>"
36
-
37
- # Extract result - the API returns the full text so we need to split it
38
  if isinstance(response, list) and len(response) > 0:
39
  output = response[0].get("generated_text", "")
40
- result = output.split("<|output|>")[1] if "<|output|>" in output else output
41
-
42
- # Try to parse as JSON to format it nicely
 
 
 
 
 
 
 
 
43
  try:
44
  parsed = json.loads(result)
45
  result = json.dumps(parsed, indent=2)
46
- except:
47
  pass
48
-
49
- # Create a simple highlight
50
- highlighted = f"<p>Successfully processed text of length {len(text)} characters</p>"
51
-
52
- return "Processing complete", result, highlighted
53
  else:
54
- return "Unexpected API response", str(response), "<p>Please check API token and try again</p>"
55
-
56
  except Exception as e:
57
- return f"Error: {str(e)}", "{}", f"<p>Processing failed: {str(e)}</p>"
58
 
59
- # Create interface
 
60
  with gr.Blocks() as demo:
61
- gr.Markdown("# NuExtract-1.5 Demo")
62
-
63
  if not api_token:
64
- gr.Markdown("## ⚠️ No API token found. Set HF_TOKEN in Space secrets.")
65
-
66
  with gr.Row():
67
  with gr.Column():
68
  template_input = gr.Textbox(
69
- label="Template (JSON)",
70
  value='{"name": "", "email": ""}',
71
  lines=5
72
  )
73
  text_input = gr.Textbox(
74
- label="Input Text",
75
  value="Contact: John Smith ([email protected])",
76
  lines=10
77
  )
78
  submit_btn = gr.Button("Extract Information")
79
-
80
  with gr.Column():
81
  progress_output = gr.Textbox(label="Progress")
82
  result_output = gr.Textbox(label="Extracted Information")
83
- html_output = gr.HTML(label="Highlighted Text")
84
-
85
  submit_btn.click(
86
  fn=extract_structure,
87
  inputs=[template_input, text_input],
88
  outputs=[progress_output, result_output, html_output]
89
  )
90
 
91
- # Examples
92
  gr.Examples(
93
  [
94
  [
@@ -114,4 +129,4 @@ with gr.Blocks() as demo:
114
  )
115
 
116
  if __name__ == "__main__":
117
- demo.launch()
 
2
  import gradio as gr
3
  import requests
4
  import os
5
+ import re
6
 
7
  # Hugging Face API details
8
  API_URL = "https://api-inference.huggingface.co/models/numind/NuExtract-1.5"
9
  api_token = os.environ.get("HF_TOKEN", "") # Get token from environment variable
 
10
  headers = {"Authorization": f"Bearer {api_token}"}
11
 
12
+
13
  def query_api(payload):
14
  response = requests.post(API_URL, headers=headers, json=payload)
15
+
16
+ # Debug logs
17
+ print("API STATUS CODE:", response.status_code)
18
+ print("RAW RESPONSE:", response.text)
19
+
20
+ try:
21
+ return response.json()
22
+ except Exception as e:
23
+ return {"error": f"Could not decode JSON: {str(e)}"}
24
+
25
 
26
  def extract_structure(template, text):
27
  try:
28
+ # Format the input for NuExtract
29
  prompt = f"<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>"
30
+
 
31
  payload = {
32
  "inputs": prompt,
33
  "parameters": {
34
  "max_new_tokens": 2000,
35
+ "temperature": 0.01,
36
  "return_full_text": True
37
  }
38
  }
39
+
40
  response = query_api(payload)
41
+
42
+ # Check for API error
43
  if isinstance(response, dict) and "error" in response:
44
  return f"API Error: {response['error']}", "{}", f"<p>Error occurred: {response['error']}</p>"
45
+
46
+ # Get generated text
47
  if isinstance(response, list) and len(response) > 0:
48
  output = response[0].get("generated_text", "")
49
+ print("Generated Text:", output) # Optional debugging
50
+
51
+ # Try to extract after <|output|>
52
+ if "<|output|>" in output:
53
+ result = output.split("<|output|>")[-1].strip()
54
+ else:
55
+ # Try to extract JSON-like structure using regex
56
+ json_match = re.search(r'({[\s\S]+})', output)
57
+ result = json_match.group(1) if json_match else output.strip()
58
+
59
+ # Attempt to format JSON nicely
60
  try:
61
  parsed = json.loads(result)
62
  result = json.dumps(parsed, indent=2)
63
+ except Exception:
64
  pass
65
+
66
+ highlighted = f"<p>✅ Successfully processed input of length {len(text)} characters.</p>"
67
+ return " Extraction Complete", result, highlighted
 
 
68
  else:
69
+ return "⚠️ Unexpected API Response", json.dumps(response, indent=2), "<p>Please check the API response format.</p>"
70
+
71
  except Exception as e:
72
+ return f"Error: {str(e)}", "{}", f"<p>Processing failed: {str(e)}</p>"
73
 
74
+
75
+ # Gradio Interface
76
  with gr.Blocks() as demo:
77
+ gr.Markdown("# 🧠 NuExtract-1.5 Information Extractor")
78
+
79
  if not api_token:
80
+ gr.Markdown("## ⚠️ No API token found. Set `HF_TOKEN` in the Space secrets.")
81
+
82
  with gr.Row():
83
  with gr.Column():
84
  template_input = gr.Textbox(
85
+ label="Template (JSON)",
86
  value='{"name": "", "email": ""}',
87
  lines=5
88
  )
89
  text_input = gr.Textbox(
90
+ label="Input Text",
91
  value="Contact: John Smith ([email protected])",
92
  lines=10
93
  )
94
  submit_btn = gr.Button("Extract Information")
95
+
96
  with gr.Column():
97
  progress_output = gr.Textbox(label="Progress")
98
  result_output = gr.Textbox(label="Extracted Information")
99
+ html_output = gr.HTML(label="Info")
100
+
101
  submit_btn.click(
102
  fn=extract_structure,
103
  inputs=[template_input, text_input],
104
  outputs=[progress_output, result_output, html_output]
105
  )
106
 
 
107
  gr.Examples(
108
  [
109
  [
 
129
  )
130
 
131
  if __name__ == "__main__":
132
+ demo.launch()