oceddyyy commited on
Commit
4f309ed
Β·
verified Β·
1 Parent(s): bd9d487

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -14
app.py CHANGED
@@ -1,5 +1,4 @@
1
  import json
2
- import re
3
  import os
4
  import spacy
5
  from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
@@ -20,6 +19,9 @@ qg_model = AutoModelForSeq2SeqLM.from_pretrained("valhalla/t5-base-qa-qg-hl")
20
  qg_tokenizer = AutoTokenizer.from_pretrained("valhalla/t5-base-qa-qg-hl", use_fast=True)
21
  qg_pipeline = pipeline("text2text-generation", model=qg_model, tokenizer=qg_tokenizer)
22
 
 
 
 
23
  def extract_paragraph_facts(raw_text):
24
  return [p.strip() for p in raw_text.strip().split("\n\n") if p.strip()]
25
 
@@ -61,28 +63,31 @@ def generate_single_qna(fact, noun_phrase, min_len, max_len, temperature, top_k,
61
 
62
  def generate_qna_all(input_text, selected_fact, selected_np, min_len, max_len, temperature, top_k, top_p):
63
  facts = extract_paragraph_facts(input_text)
64
- results = []
65
 
66
  if selected_fact:
67
  noun_phrase = selected_np if selected_np else auto_highlight_noun_phrase(selected_fact)
68
  result = generate_single_qna(selected_fact, noun_phrase, min_len, max_len, temperature, top_k, top_p)
69
- results.append(result)
70
  else:
71
  for fact in facts:
72
  noun_phrase = auto_highlight_noun_phrase(fact)
73
  result = generate_single_qna(fact, noun_phrase, min_len, max_len, temperature, top_k, top_p)
74
- results.append(result)
75
 
76
- return json.dumps(results, indent=2, ensure_ascii=False)
77
 
78
- def save_json_to_dataset(json_str):
79
  try:
 
 
 
80
  hf_token = os.environ.get("QandA_Generator")
81
  if not hf_token:
82
  return "❌ HF_TOKEN not found in environment."
83
 
84
  repo_id = "UniversityAIChatbot/University_Inquiries_AI_Chatbot"
85
- target_file = "dataset.json" # Or change to database.json if needed
86
  local_dir = "hf_repo"
87
 
88
  repo = Repository(
@@ -102,19 +107,20 @@ def save_json_to_dataset(json_str):
102
  else:
103
  existing_data = []
104
 
105
- new_data = json.loads(json_str)
106
-
107
  now = datetime.now()
108
- for entry in new_data:
109
  entry["month"] = now.strftime("%B")
110
  entry["year"] = now.year
111
 
112
- updated_data = existing_data + new_data
113
 
114
  with open(full_path, "w", encoding="utf-8") as f:
115
  json.dump(updated_data, f, indent=2, ensure_ascii=False)
116
 
117
- repo.push_to_hub(commit_message="πŸ“₯ Add new Q&A to dataset.json")
 
 
 
118
 
119
  return "βœ… Data with timestamp successfully pushed to Space!"
120
  except Exception as e:
@@ -163,7 +169,7 @@ def main():
163
  lines=14,
164
  label="Q&A JSON",
165
  interactive=True,
166
- placeholder='{\n"question": "Your question?",\n"answer": "Your answer."\n},'
167
  )
168
 
169
  with gr.Row():
@@ -177,7 +183,7 @@ def main():
177
  )
178
 
179
  send_status = gr.Textbox(label="Save Status", interactive=False)
180
- send_btn.click(fn=save_json_to_dataset, inputs=output_json, outputs=send_status)
181
 
182
  demo.launch()
183
 
 
1
  import json
 
2
  import os
3
  import spacy
4
  from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
 
19
  qg_tokenizer = AutoTokenizer.from_pretrained("valhalla/t5-base-qa-qg-hl", use_fast=True)
20
  qg_pipeline = pipeline("text2text-generation", model=qg_model, tokenizer=qg_tokenizer)
21
 
22
+ # Global variable to accumulate Q&A
23
+ batch_data = []
24
+
25
  def extract_paragraph_facts(raw_text):
26
  return [p.strip() for p in raw_text.strip().split("\n\n") if p.strip()]
27
 
 
63
 
64
  def generate_qna_all(input_text, selected_fact, selected_np, min_len, max_len, temperature, top_k, top_p):
65
  facts = extract_paragraph_facts(input_text)
66
+ global batch_data # Access global batch_data
67
 
68
  if selected_fact:
69
  noun_phrase = selected_np if selected_np else auto_highlight_noun_phrase(selected_fact)
70
  result = generate_single_qna(selected_fact, noun_phrase, min_len, max_len, temperature, top_k, top_p)
71
+ batch_data.append(result)
72
  else:
73
  for fact in facts:
74
  noun_phrase = auto_highlight_noun_phrase(fact)
75
  result = generate_single_qna(fact, noun_phrase, min_len, max_len, temperature, top_k, top_p)
76
+ batch_data.append(result)
77
 
78
+ return json.dumps(batch_data, indent=2, ensure_ascii=False)
79
 
80
+ def save_json_to_dataset():
81
  try:
82
+ if not batch_data:
83
+ return "❌ No data to save. Generate some Q&A first."
84
+
85
  hf_token = os.environ.get("QandA_Generator")
86
  if not hf_token:
87
  return "❌ HF_TOKEN not found in environment."
88
 
89
  repo_id = "UniversityAIChatbot/University_Inquiries_AI_Chatbot"
90
+ target_file = "database.json" # Or change to database.json if needed
91
  local_dir = "hf_repo"
92
 
93
  repo = Repository(
 
107
  else:
108
  existing_data = []
109
 
 
 
110
  now = datetime.now()
111
+ for entry in batch_data:
112
  entry["month"] = now.strftime("%B")
113
  entry["year"] = now.year
114
 
115
+ updated_data = existing_data + batch_data
116
 
117
  with open(full_path, "w", encoding="utf-8") as f:
118
  json.dump(updated_data, f, indent=2, ensure_ascii=False)
119
 
120
+ repo.push_to_hub(commit_message="πŸ“₯ Add new Q&A to database.json")
121
+
122
+ # Reset the batch_data after pushing
123
+ batch_data = []
124
 
125
  return "βœ… Data with timestamp successfully pushed to Space!"
126
  except Exception as e:
 
169
  lines=14,
170
  label="Q&A JSON",
171
  interactive=True,
172
+ placeholder='[\n{\n"question": "Your question?",\n"answer": "Your answer."\n}\n]'
173
  )
174
 
175
  with gr.Row():
 
183
  )
184
 
185
  send_status = gr.Textbox(label="Save Status", interactive=False)
186
+ send_btn.click(fn=save_json_to_dataset, inputs=None, outputs=send_status)
187
 
188
  demo.launch()
189