oceddyyy commited on
Commit
b271287
Β·
verified Β·
1 Parent(s): 4f309ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -9
app.py CHANGED
@@ -5,9 +5,9 @@ from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
5
  import gradio as gr
6
  from huggingface_hub import Repository
7
  from datetime import datetime
8
-
9
  from spacy.cli import download
10
 
 
11
  try:
12
  nlp = spacy.load("en_core_web_sm")
13
  except OSError:
@@ -15,6 +15,7 @@ except OSError:
15
  download("en_core_web_sm")
16
  nlp = spacy.load("en_core_web_sm")
17
 
 
18
  qg_model = AutoModelForSeq2SeqLM.from_pretrained("valhalla/t5-base-qa-qg-hl")
19
  qg_tokenizer = AutoTokenizer.from_pretrained("valhalla/t5-base-qa-qg-hl", use_fast=True)
20
  qg_pipeline = pipeline("text2text-generation", model=qg_model, tokenizer=qg_tokenizer)
@@ -22,6 +23,7 @@ qg_pipeline = pipeline("text2text-generation", model=qg_model, tokenizer=qg_toke
22
  # Global variable to accumulate Q&A
23
  batch_data = []
24
 
 
25
  def extract_paragraph_facts(raw_text):
26
  return [p.strip() for p in raw_text.strip().split("\n\n") if p.strip()]
27
 
@@ -63,7 +65,7 @@ def generate_single_qna(fact, noun_phrase, min_len, max_len, temperature, top_k,
63
 
64
  def generate_qna_all(input_text, selected_fact, selected_np, min_len, max_len, temperature, top_k, top_p):
65
  facts = extract_paragraph_facts(input_text)
66
- global batch_data # Access global batch_data
67
 
68
  if selected_fact:
69
  noun_phrase = selected_np if selected_np else auto_highlight_noun_phrase(selected_fact)
@@ -75,7 +77,7 @@ def generate_qna_all(input_text, selected_fact, selected_np, min_len, max_len, t
75
  result = generate_single_qna(fact, noun_phrase, min_len, max_len, temperature, top_k, top_p)
76
  batch_data.append(result)
77
 
78
- return json.dumps(batch_data, indent=2, ensure_ascii=False)
79
 
80
  def save_json_to_dataset():
81
  try:
@@ -87,14 +89,14 @@ def save_json_to_dataset():
87
  return "❌ HF_TOKEN not found in environment."
88
 
89
  repo_id = "UniversityAIChatbot/University_Inquiries_AI_Chatbot"
90
- target_file = "database.json" # Or change to database.json if needed
91
  local_dir = "hf_repo"
92
 
93
  repo = Repository(
94
  local_dir=local_dir,
95
  clone_from=repo_id,
96
  use_auth_token=hf_token,
97
- repo_type="space" # βœ… THIS IS THE KEY FIX
98
  )
99
 
100
  repo.git_pull()
@@ -119,13 +121,32 @@ def save_json_to_dataset():
119
 
120
  repo.push_to_hub(commit_message="πŸ“₯ Add new Q&A to database.json")
121
 
122
- # Reset the batch_data after pushing
123
- batch_data = []
124
 
125
  return "βœ… Data with timestamp successfully pushed to Space!"
126
  except Exception as e:
127
  return f"❌ Error: {str(e)}"
128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  def on_extract_facts(text):
130
  facts = extract_paragraph_facts(text)
131
  default_fact = facts[0] if facts else None
@@ -135,6 +156,7 @@ def on_select_fact(fact):
135
  noun_phrases = extract_noun_phrases(fact)
136
  return gr.update(choices=noun_phrases, value=noun_phrases[0] if noun_phrases else None)
137
 
 
138
  def main():
139
  with gr.Blocks() as demo:
140
  gr.Markdown("## Paragraph-to-Question Generator (Auto Q&A for HF Dataset)")
@@ -172,17 +194,31 @@ def main():
172
  placeholder='[\n{\n"question": "Your question?",\n"answer": "Your answer."\n}\n]'
173
  )
174
 
 
 
 
 
 
 
175
  with gr.Row():
176
  generate_btn = gr.Button("Generate Q&A")
 
177
  send_btn = gr.Button("πŸ“€ Send to Dataset")
178
 
 
 
179
  generate_btn.click(
180
  fn=generate_qna_all,
181
  inputs=[input_text, fact_dropdown, np_dropdown, min_len, max_len, temperature, top_k, top_p],
182
- outputs=output_json
 
 
 
 
 
 
183
  )
184
 
185
- send_status = gr.Textbox(label="Save Status", interactive=False)
186
  send_btn.click(fn=save_json_to_dataset, inputs=None, outputs=send_status)
187
 
188
  demo.launch()
 
5
  import gradio as gr
6
  from huggingface_hub import Repository
7
  from datetime import datetime
 
8
  from spacy.cli import download
9
 
10
+ # Load or download spaCy model
11
  try:
12
  nlp = spacy.load("en_core_web_sm")
13
  except OSError:
 
15
  download("en_core_web_sm")
16
  nlp = spacy.load("en_core_web_sm")
17
 
18
+ # Load Question Generation model
19
  qg_model = AutoModelForSeq2SeqLM.from_pretrained("valhalla/t5-base-qa-qg-hl")
20
  qg_tokenizer = AutoTokenizer.from_pretrained("valhalla/t5-base-qa-qg-hl", use_fast=True)
21
  qg_pipeline = pipeline("text2text-generation", model=qg_model, tokenizer=qg_tokenizer)
 
23
  # Global variable to accumulate Q&A
24
  batch_data = []
25
 
26
+ # Utility functions
27
  def extract_paragraph_facts(raw_text):
28
  return [p.strip() for p in raw_text.strip().split("\n\n") if p.strip()]
29
 
 
65
 
66
  def generate_qna_all(input_text, selected_fact, selected_np, min_len, max_len, temperature, top_k, top_p):
67
  facts = extract_paragraph_facts(input_text)
68
+ global batch_data
69
 
70
  if selected_fact:
71
  noun_phrase = selected_np if selected_np else auto_highlight_noun_phrase(selected_fact)
 
77
  result = generate_single_qna(fact, noun_phrase, min_len, max_len, temperature, top_k, top_p)
78
  batch_data.append(result)
79
 
80
+ return json.dumps(batch_data, indent=2, ensure_ascii=False), json.dumps(batch_data, indent=2, ensure_ascii=False)
81
 
82
  def save_json_to_dataset():
83
  try:
 
89
  return "❌ HF_TOKEN not found in environment."
90
 
91
  repo_id = "UniversityAIChatbot/University_Inquiries_AI_Chatbot"
92
+ target_file = "database.json"
93
  local_dir = "hf_repo"
94
 
95
  repo = Repository(
96
  local_dir=local_dir,
97
  clone_from=repo_id,
98
  use_auth_token=hf_token,
99
+ repo_type="space"
100
  )
101
 
102
  repo.git_pull()
 
121
 
122
  repo.push_to_hub(commit_message="πŸ“₯ Add new Q&A to database.json")
123
 
124
+ batch_data.clear()
 
125
 
126
  return "βœ… Data with timestamp successfully pushed to Space!"
127
  except Exception as e:
128
  return f"❌ Error: {str(e)}"
129
 
130
+ # New: Preview function
131
+ def preview_batch_data():
132
+ return json.dumps(batch_data, indent=2, ensure_ascii=False)
133
+
134
+ # New: Append from manual JSON editor
135
+ def append_json_to_batch(json_text):
136
+ global batch_data
137
+ try:
138
+ new_data = json.loads(json_text)
139
+ if isinstance(new_data, dict):
140
+ new_data = [new_data]
141
+ if not isinstance(new_data, list):
142
+ return "❌ Invalid format. Must be a list or object.", preview_batch_data()
143
+
144
+ batch_data.extend(new_data)
145
+ return "βœ… Successfully appended to batch_data.", preview_batch_data()
146
+ except Exception as e:
147
+ return f"❌ Error: {str(e)}", preview_batch_data()
148
+
149
+ # Dropdown callbacks
150
  def on_extract_facts(text):
151
  facts = extract_paragraph_facts(text)
152
  default_fact = facts[0] if facts else None
 
156
  noun_phrases = extract_noun_phrases(fact)
157
  return gr.update(choices=noun_phrases, value=noun_phrases[0] if noun_phrases else None)
158
 
159
+ # UI
160
  def main():
161
  with gr.Blocks() as demo:
162
  gr.Markdown("## Paragraph-to-Question Generator (Auto Q&A for HF Dataset)")
 
194
  placeholder='[\n{\n"question": "Your question?",\n"answer": "Your answer."\n}\n]'
195
  )
196
 
197
+ preview_box = gr.Textbox(
198
+ lines=14,
199
+ label="πŸ“¦ Preview batch_data (Global Variable)",
200
+ interactive=False
201
+ )
202
+
203
  with gr.Row():
204
  generate_btn = gr.Button("Generate Q&A")
205
+ append_btn = gr.Button("βž• Append JSON to Global Q&A List")
206
  send_btn = gr.Button("πŸ“€ Send to Dataset")
207
 
208
+ send_status = gr.Textbox(label="Save Status", interactive=False)
209
+
210
  generate_btn.click(
211
  fn=generate_qna_all,
212
  inputs=[input_text, fact_dropdown, np_dropdown, min_len, max_len, temperature, top_k, top_p],
213
+ outputs=[output_json, preview_box]
214
+ )
215
+
216
+ append_btn.click(
217
+ fn=append_json_to_batch,
218
+ inputs=output_json,
219
+ outputs=[send_status, preview_box]
220
  )
221
 
 
222
  send_btn.click(fn=save_json_to_dataset, inputs=None, outputs=send_status)
223
 
224
  demo.launch()