wannaphong commited on
Commit
aee7a7e
·
verified ·
1 Parent(s): 703a5b5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -0
app.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+ import gradio as gr
4
+ from huggingface_hub import CommitScheduler, hf_hub_download, login
5
+
6
+ HF_TOKEN = os.getenv("HF_TOKEN")
7
+ login(HF_TOKEN)
8
+
9
+ # Set up dataset storage
10
+ dataset_folder = Path("dataset")
11
+ dataset_folder.mkdir(exist_ok=True)
12
+
13
+ # Function to get the latest dataset file
14
+ def get_latest_dataset_file():
15
+ if files := glob.glob(str(dataset_folder / "data_*.jsonl")):
16
+ return max(files, key=os.path.getctime)
17
+ return None
18
+
19
+
20
+ # Check for existing dataset and create or append to it
21
+ if latest_file := get_latest_dataset_file():
22
+ dataset_file = Path(latest_file)
23
+ print(f"Appending to existing dataset file: {dataset_file}")
24
+ else:
25
+ dataset_file = dataset_folder / f"data_{uuid.uuid4()}.jsonl"
26
+ print(f"Creating new dataset file: {dataset_file}")
27
+ # Set up CommitScheduler for dataset uploads
28
+ repo_id = "wannaphong/d1" # Replace with your desired dataset repo
29
+ scheduler = CommitScheduler(
30
+ repo_id=repo_id,
31
+ repo_type="dataset",
32
+ folder_path=dataset_folder,
33
+ path_in_repo="data",
34
+ every=1, # Upload every 5 minutes
35
+ )
36
+ # Function to save feedback and generated data
37
+ def save_data(name):
38
+ data = {
39
+ "name": name,
40
+ }
41
+ with scheduler.lock:
42
+ with dataset_file.open("a") as f:
43
+ f.write(json.dumps(data) + "\n")
44
+ # return "Data saved and will be uploaded to the dataset repository."
45
+
46
+
47
+
48
+ def greet(name):
49
+ save_data(name)
50
+ return "Hello, " + name + "!"
51
+
52
+ demo = gr.Interface(
53
+ fn=greet,
54
+ inputs=["text"],
55
+ outputs=["text"],
56
+ )
57
+
58
+ demo.launch()
59
+