Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
import json
|
2 |
-
import re
|
3 |
import os
|
4 |
import spacy
|
5 |
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
|
@@ -20,6 +19,9 @@ qg_model = AutoModelForSeq2SeqLM.from_pretrained("valhalla/t5-base-qa-qg-hl")
|
|
20 |
qg_tokenizer = AutoTokenizer.from_pretrained("valhalla/t5-base-qa-qg-hl", use_fast=True)
|
21 |
qg_pipeline = pipeline("text2text-generation", model=qg_model, tokenizer=qg_tokenizer)
|
22 |
|
|
|
|
|
|
|
23 |
def extract_paragraph_facts(raw_text):
|
24 |
return [p.strip() for p in raw_text.strip().split("\n\n") if p.strip()]
|
25 |
|
@@ -61,28 +63,31 @@ def generate_single_qna(fact, noun_phrase, min_len, max_len, temperature, top_k,
|
|
61 |
|
62 |
def generate_qna_all(input_text, selected_fact, selected_np, min_len, max_len, temperature, top_k, top_p):
|
63 |
facts = extract_paragraph_facts(input_text)
|
64 |
-
|
65 |
|
66 |
if selected_fact:
|
67 |
noun_phrase = selected_np if selected_np else auto_highlight_noun_phrase(selected_fact)
|
68 |
result = generate_single_qna(selected_fact, noun_phrase, min_len, max_len, temperature, top_k, top_p)
|
69 |
-
|
70 |
else:
|
71 |
for fact in facts:
|
72 |
noun_phrase = auto_highlight_noun_phrase(fact)
|
73 |
result = generate_single_qna(fact, noun_phrase, min_len, max_len, temperature, top_k, top_p)
|
74 |
-
|
75 |
|
76 |
-
return json.dumps(
|
77 |
|
78 |
-
def save_json_to_dataset(
|
79 |
try:
|
|
|
|
|
|
|
80 |
hf_token = os.environ.get("QandA_Generator")
|
81 |
if not hf_token:
|
82 |
return "β HF_TOKEN not found in environment."
|
83 |
|
84 |
repo_id = "UniversityAIChatbot/University_Inquiries_AI_Chatbot"
|
85 |
-
target_file = "
|
86 |
local_dir = "hf_repo"
|
87 |
|
88 |
repo = Repository(
|
@@ -102,19 +107,20 @@ def save_json_to_dataset(json_str):
|
|
102 |
else:
|
103 |
existing_data = []
|
104 |
|
105 |
-
new_data = json.loads(json_str)
|
106 |
-
|
107 |
now = datetime.now()
|
108 |
-
for entry in
|
109 |
entry["month"] = now.strftime("%B")
|
110 |
entry["year"] = now.year
|
111 |
|
112 |
-
updated_data = existing_data +
|
113 |
|
114 |
with open(full_path, "w", encoding="utf-8") as f:
|
115 |
json.dump(updated_data, f, indent=2, ensure_ascii=False)
|
116 |
|
117 |
-
repo.push_to_hub(commit_message="π₯ Add new Q&A to
|
|
|
|
|
|
|
118 |
|
119 |
return "β
Data with timestamp successfully pushed to Space!"
|
120 |
except Exception as e:
|
@@ -163,7 +169,7 @@ def main():
|
|
163 |
lines=14,
|
164 |
label="Q&A JSON",
|
165 |
interactive=True,
|
166 |
-
placeholder='{\n"question": "Your question?",\n"answer": "Your answer."\n}
|
167 |
)
|
168 |
|
169 |
with gr.Row():
|
@@ -177,7 +183,7 @@ def main():
|
|
177 |
)
|
178 |
|
179 |
send_status = gr.Textbox(label="Save Status", interactive=False)
|
180 |
-
send_btn.click(fn=save_json_to_dataset, inputs=
|
181 |
|
182 |
demo.launch()
|
183 |
|
|
|
1 |
import json
|
|
|
2 |
import os
|
3 |
import spacy
|
4 |
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
|
|
|
19 |
qg_tokenizer = AutoTokenizer.from_pretrained("valhalla/t5-base-qa-qg-hl", use_fast=True)
|
20 |
qg_pipeline = pipeline("text2text-generation", model=qg_model, tokenizer=qg_tokenizer)
|
21 |
|
22 |
+
# Global variable to accumulate Q&A
|
23 |
+
batch_data = []
|
24 |
+
|
25 |
def extract_paragraph_facts(raw_text):
|
26 |
return [p.strip() for p in raw_text.strip().split("\n\n") if p.strip()]
|
27 |
|
|
|
63 |
|
64 |
def generate_qna_all(input_text, selected_fact, selected_np, min_len, max_len, temperature, top_k, top_p):
|
65 |
facts = extract_paragraph_facts(input_text)
|
66 |
+
global batch_data # Access global batch_data
|
67 |
|
68 |
if selected_fact:
|
69 |
noun_phrase = selected_np if selected_np else auto_highlight_noun_phrase(selected_fact)
|
70 |
result = generate_single_qna(selected_fact, noun_phrase, min_len, max_len, temperature, top_k, top_p)
|
71 |
+
batch_data.append(result)
|
72 |
else:
|
73 |
for fact in facts:
|
74 |
noun_phrase = auto_highlight_noun_phrase(fact)
|
75 |
result = generate_single_qna(fact, noun_phrase, min_len, max_len, temperature, top_k, top_p)
|
76 |
+
batch_data.append(result)
|
77 |
|
78 |
+
return json.dumps(batch_data, indent=2, ensure_ascii=False)
|
79 |
|
80 |
+
def save_json_to_dataset():
|
81 |
try:
|
82 |
+
if not batch_data:
|
83 |
+
return "β No data to save. Generate some Q&A first."
|
84 |
+
|
85 |
hf_token = os.environ.get("QandA_Generator")
|
86 |
if not hf_token:
|
87 |
return "β HF_TOKEN not found in environment."
|
88 |
|
89 |
repo_id = "UniversityAIChatbot/University_Inquiries_AI_Chatbot"
|
90 |
+
target_file = "database.json" # Or change to database.json if needed
|
91 |
local_dir = "hf_repo"
|
92 |
|
93 |
repo = Repository(
|
|
|
107 |
else:
|
108 |
existing_data = []
|
109 |
|
|
|
|
|
110 |
now = datetime.now()
|
111 |
+
for entry in batch_data:
|
112 |
entry["month"] = now.strftime("%B")
|
113 |
entry["year"] = now.year
|
114 |
|
115 |
+
updated_data = existing_data + batch_data
|
116 |
|
117 |
with open(full_path, "w", encoding="utf-8") as f:
|
118 |
json.dump(updated_data, f, indent=2, ensure_ascii=False)
|
119 |
|
120 |
+
repo.push_to_hub(commit_message="π₯ Add new Q&A to database.json")
|
121 |
+
|
122 |
+
# Reset the batch_data after pushing
|
123 |
+
batch_data = []
|
124 |
|
125 |
return "β
Data with timestamp successfully pushed to Space!"
|
126 |
except Exception as e:
|
|
|
169 |
lines=14,
|
170 |
label="Q&A JSON",
|
171 |
interactive=True,
|
172 |
+
placeholder='[\n{\n"question": "Your question?",\n"answer": "Your answer."\n}\n]'
|
173 |
)
|
174 |
|
175 |
with gr.Row():
|
|
|
183 |
)
|
184 |
|
185 |
send_status = gr.Textbox(label="Save Status", interactive=False)
|
186 |
+
send_btn.click(fn=save_json_to_dataset, inputs=None, outputs=send_status)
|
187 |
|
188 |
demo.launch()
|
189 |
|