zhangjf commited on
Commit
16b5280
·
1 Parent(s): feaf069

support downloading updated data

Browse files
Files changed (2) hide show
  1. app.py +1 -1
  2. merge_qas.py +56 -0
app.py CHANGED
@@ -173,7 +173,7 @@ def download(access_key):
173
  if not access_key.startswith(os.getenv('access_key')):
174
  chatbot_ret = [(f"Your entered Access Key:<br>{access_key}<br>is incorrect.", f"So i cannot provide you any information in this private space.")]
175
  file_ret = gr.File.update(value=None, visible=False)
176
- elif: access_key == f"{os.getenv('access_key')}: update":
177
  chatbot_ret = [(f"Your entered Access Key is correct.", f"The file containing new processed query-responses ({len(qas)-num_read_qas} in total) can be downloaded below.")]
178
  filename = f"qas-{num_read_qas}-{len(qas)}.json"
179
  with open(filename, "w", encoding="utf-8") as f:
 
173
  if not access_key.startswith(os.getenv('access_key')):
174
  chatbot_ret = [(f"Your entered Access Key:<br>{access_key}<br>is incorrect.", f"So i cannot provide you any information in this private space.")]
175
  file_ret = gr.File.update(value=None, visible=False)
176
+ elif access_key == f"{os.getenv('access_key')}: update":
177
  chatbot_ret = [(f"Your entered Access Key is correct.", f"The file containing new processed query-responses ({len(qas)-num_read_qas} in total) can be downloaded below.")]
178
  filename = f"qas-{num_read_qas}-{len(qas)}.json"
179
  with open(filename, "w", encoding="utf-8") as f:
merge_qas.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import collections
4
+
5
+ def read_qs():
6
+ qs = []
7
+ directory = "./dialogues_set"
8
+ filenames = [
9
+ 'dialogues_film.json',
10
+ 'dialogues_jindong.json',
11
+ 'dialogues_music.json',
12
+ 'dialogues_natural.json',
13
+ 'dialogues_taobao.json',
14
+ 'dialogues_travel_kd.json'
15
+ ]
16
+ for filename in filenames:
17
+ with open(f"{directory}/{filename}", "r", encoding="utf-8") as f:
18
+ for idx,line in enumerate(f):
19
+ idx2query = json.loads(line)
20
+ query = idx2query[str(idx)]
21
+ qs.append(query)
22
+ print(f"read {len(qs)} queries from files")
23
+ return qs
24
+
25
+ def read_qas():
26
+ qas = []
27
+ directory = "./dialogues_set"
28
+
29
+ for filename in os.listdir(directory):
30
+ if filename.endswith(".json") and "qas" in filename:
31
+ with open(f"{directory}/{filename}", "r", encoding="utf-8") as f:
32
+ for qa in json.loads(f.read()):
33
+ qas.append(qa)
34
+ return qas
35
+
36
+ def merge(qs, qas):
37
+ q_to_as = collections.defaultdict(lambda:[])
38
+ for qa in qas:
39
+ q_to_as[qa["q"]].append(qa["a"])
40
+
41
+ qas = []
42
+ for q in qs:
43
+ if len(q_to_as[q])==0:
44
+ continue
45
+ a = q_to_as[q].pop()
46
+ qas.append({"q":q, "a":a})
47
+
48
+ return qas
49
+
50
+
51
+ if __name__ == "__main__":
52
+ qs = read_qs()
53
+ qas = read_qas()
54
+ qas = merge(qs, qas)
55
+ with open("./dialogues_set/qas.json", "w", encoding="utf-8") as f:
56
+ f.write(json.dumps(qas, ensure_ascii=False, indent=2))