nandezgarcia commited on
Commit
911d165
·
verified ·
1 Parent(s): 6aca797

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -72
app.py CHANGED
@@ -17,54 +17,43 @@ HF_DATASET_REPO = "boe-preference-summaries-results" # Change this to your desi
17
  HF_USERNAME = os.environ.get("HF_USERNAME", "") # Set your HuggingFace username as an environment variable
18
 
19
  def load_data():
20
- """Load data from CSV file"""
21
  if os.path.exists(INPUT_CSV):
22
  return pd.read_csv(INPUT_CSV)
23
  else:
24
- # Create empty dataframe with required columns if file doesn't exist
25
- return pd.DataFrame(columns=["text_id", "text", "summary_a", "summary_b"])
26
 
27
  def initialize_hf_dataset():
28
- """Initialize a HuggingFace dataset repository if it doesn't exist"""
29
  if not HF_TOKEN or not HF_USERNAME:
30
- return False, "HuggingFace credentials not found. Please set HF_TOKEN and HF_USERNAME environment variables."
31
-
32
  try:
33
  api = HfApi(token=HF_TOKEN)
34
  repo_id = f"{HF_USERNAME}/{HF_DATASET_REPO}"
35
-
36
- # Check if the repo exists, if not create it
37
  try:
38
  api.repo_info(repo_id=repo_id, repo_type="dataset")
39
- print(f"Repository {repo_id} already exists")
40
  except Exception:
41
- print(f"Creating repository {repo_id}")
42
  create_repo(repo_id=repo_id, repo_type="dataset", token=HF_TOKEN)
43
-
44
- # Create an empty dataset file if it doesn't exist
45
  if not os.path.exists(TEMP_JSON):
46
  with open(TEMP_JSON, "w") as f:
47
  pass
48
-
49
- return True, f"{repo_id}"
50
  except Exception as e:
51
- return False, f"Error initializing HuggingFace dataset: {str(e)}"
52
 
53
  def push_to_hf_dataset(data_row):
54
- """Push a new data row to the HuggingFace dataset"""
55
  if not HF_TOKEN or not HF_USERNAME:
56
  return False, "HuggingFace credentials not found"
57
-
58
  try:
59
- # Append the new data to the JSONL file
60
  with open(TEMP_JSON, "a") as f:
61
  f.write(json.dumps(data_row) + "\n")
62
-
63
- # Upload the file to HuggingFace
64
  api = HfApi(token=HF_TOKEN)
65
  repo_id = f"{HF_USERNAME}/{HF_DATASET_REPO}"
66
-
67
- # Upload the JSONL file
68
  upload_file(
69
  path_or_fileobj=TEMP_JSON,
70
  path_in_repo="data.jsonl",
@@ -72,17 +61,15 @@ def push_to_hf_dataset(data_row):
72
  repo_type="dataset",
73
  token=HF_TOKEN
74
  )
75
-
76
  return True, f"Data pushed to {repo_id}"
77
  except Exception as e:
78
- return False, f"Error pushing to HuggingFace: {str(e)}"
79
 
80
  def save_choice(text_id, original_text, summary_a, summary_b, choice, notes="", request_id=""):
81
- """Save the user's choice locally and to HuggingFace dataset"""
82
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
83
  chosen_summary = "A" if choice == "Summary A" else "B"
84
-
85
- # Create a new row with the data
86
  new_row = {
87
  "timestamp": timestamp,
88
  "text_id": text_id,
@@ -93,85 +80,65 @@ def save_choice(text_id, original_text, summary_a, summary_b, choice, notes="",
93
  "notes": notes,
94
  "request_id": request_id
95
  }
96
-
97
- # Save locally
98
  if os.path.exists(OUTPUT_CSV):
99
  results_df = pd.read_csv(OUTPUT_CSV)
100
  results_df = pd.concat([results_df, pd.DataFrame([new_row])], ignore_index=True)
101
  else:
102
  results_df = pd.DataFrame([new_row])
103
-
104
  results_df.to_csv(OUTPUT_CSV, index=False)
105
-
106
- # Push to HuggingFace
107
  success, message = push_to_hf_dataset(new_row)
108
-
109
  request_id_msg = f" (Request ID: {request_id})" if request_id else ""
110
-
111
  if success:
112
- return f"Selection saved for text ID: {text_id}{request_id_msg}! You chose {'Summary A' if choice == 'Summary A' else 'Summary B'}. Pushed to HuggingFace."
113
  else:
114
  return f"Selection saved locally for text ID: {text_id}{request_id_msg}. HuggingFace push failed: {message}"
115
 
 
116
  class SummaryChooser:
117
  def __init__(self):
118
  self.df = load_data()
119
  self.current_index = 0
120
  self.total_items = len(self.df)
121
  self.hf_status = initialize_hf_dataset()
122
- self.request_id = "" # Initialize empty request ID
123
-
124
  def set_request_id(self, request: gr.Request):
125
- """Set the request ID from the URL query parameters"""
126
- try:
127
- query_params = request.query_params
128
- self.request_id = query_params.get("id", "")
129
- return f"Request ID: {self.request_id}" if self.request_id else "No Request ID provided"
130
- except:
131
- self.request_id = ""
132
- return "Failed to get Request ID"
133
-
134
  def get_current_item(self):
135
- """Get the current item from the dataframe"""
136
  if self.total_items == 0:
137
- return "", "", "", "", f"No data found in {INPUT_CSV}. Please check the file path."
138
-
139
  row = self.df.iloc[self.current_index]
140
  progress = f"Item {self.current_index + 1} of {self.total_items}"
141
- return row["id"], row["text"], row["summary_a"], row["summary_b"], progress
142
-
143
  def next_item(self, choice, notes):
144
- """Save current choice and move to next item"""
145
  if self.total_items == 0:
146
  return "", "", "", "", "No data available", ""
147
-
148
- # Get current values
149
  text_id, text, summary_a, summary_b, _ = self.get_current_item()
150
-
151
- # Save the choice with the request ID
152
  result_message = save_choice(text_id, text, summary_a, summary_b, choice, notes, self.request_id)
153
-
154
- # Move to next item or wrap around
155
  self.current_index = (self.current_index + 1) % self.total_items
156
-
157
- # Get next item
158
- text_id, text, summary_a, summary_b, progress = self.get_current_item()
159
- return text_id, text, summary_a, summary_b, progress, result_message
160
-
161
  def prev_item(self):
162
- """Move to previous item"""
163
  if self.total_items == 0:
164
  return "", "", "", "", "No data available", ""
165
-
166
- # Move to previous item or wrap around
167
  self.current_index = (self.current_index - 1) % self.total_items
168
-
169
- # Get the item
170
- text_id, text, summary_a, summary_b, progress = self.get_current_item()
171
- return text_id, text, summary_a, summary_b, progress, ""
172
-
173
  def get_hf_status(self):
174
- """Get the status of HuggingFace integration"""
175
  success, message = self.hf_status
176
  return f"{'Connected' if success else 'Not Connected'} - {message}"
177
 
 
17
  HF_USERNAME = os.environ.get("HF_USERNAME", "") # Set your HuggingFace username as an environment variable
18
 
19
  def load_data():
 
20
  if os.path.exists(INPUT_CSV):
21
  return pd.read_csv(INPUT_CSV)
22
  else:
23
+ return pd.DataFrame(columns=["id", "boe_text_cleaned", "tweet_text_cleaned", "tweet_original"])
 
24
 
25
  def initialize_hf_dataset():
 
26
  if not HF_TOKEN or not HF_USERNAME:
27
+ return False, "HuggingFace credentials not found."
28
+
29
  try:
30
  api = HfApi(token=HF_TOKEN)
31
  repo_id = f"{HF_USERNAME}/{HF_DATASET_REPO}"
32
+
 
33
  try:
34
  api.repo_info(repo_id=repo_id, repo_type="dataset")
 
35
  except Exception:
 
36
  create_repo(repo_id=repo_id, repo_type="dataset", token=HF_TOKEN)
37
+
 
38
  if not os.path.exists(TEMP_JSON):
39
  with open(TEMP_JSON, "w") as f:
40
  pass
41
+
42
+ return True, repo_id
43
  except Exception as e:
44
+ return False, str(e)
45
 
46
  def push_to_hf_dataset(data_row):
 
47
  if not HF_TOKEN or not HF_USERNAME:
48
  return False, "HuggingFace credentials not found"
49
+
50
  try:
 
51
  with open(TEMP_JSON, "a") as f:
52
  f.write(json.dumps(data_row) + "\n")
53
+
 
54
  api = HfApi(token=HF_TOKEN)
55
  repo_id = f"{HF_USERNAME}/{HF_DATASET_REPO}"
56
+
 
57
  upload_file(
58
  path_or_fileobj=TEMP_JSON,
59
  path_in_repo="data.jsonl",
 
61
  repo_type="dataset",
62
  token=HF_TOKEN
63
  )
64
+
65
  return True, f"Data pushed to {repo_id}"
66
  except Exception as e:
67
+ return False, str(e)
68
 
69
  def save_choice(text_id, original_text, summary_a, summary_b, choice, notes="", request_id=""):
 
70
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
71
  chosen_summary = "A" if choice == "Summary A" else "B"
72
+
 
73
  new_row = {
74
  "timestamp": timestamp,
75
  "text_id": text_id,
 
80
  "notes": notes,
81
  "request_id": request_id
82
  }
83
+
 
84
  if os.path.exists(OUTPUT_CSV):
85
  results_df = pd.read_csv(OUTPUT_CSV)
86
  results_df = pd.concat([results_df, pd.DataFrame([new_row])], ignore_index=True)
87
  else:
88
  results_df = pd.DataFrame([new_row])
89
+
90
  results_df.to_csv(OUTPUT_CSV, index=False)
91
+
 
92
  success, message = push_to_hf_dataset(new_row)
93
+
94
  request_id_msg = f" (Request ID: {request_id})" if request_id else ""
95
+
96
  if success:
97
+ return f"Selection saved for text ID: {text_id}{request_id_msg}! You chose {choice}."
98
  else:
99
  return f"Selection saved locally for text ID: {text_id}{request_id_msg}. HuggingFace push failed: {message}"
100
 
101
+
102
  class SummaryChooser:
103
  def __init__(self):
104
  self.df = load_data()
105
  self.current_index = 0
106
  self.total_items = len(self.df)
107
  self.hf_status = initialize_hf_dataset()
108
+ self.request_id = ""
109
+
110
  def set_request_id(self, request: gr.Request):
111
+ query_params = request.query_params
112
+ self.request_id = query_params.get("id", "")
113
+ return f"Request ID: {self.request_id}" if self.request_id else "No Request ID provided"
114
+
 
 
 
 
 
115
  def get_current_item(self):
 
116
  if self.total_items == 0:
117
+ return "", "", "", "", f"No data found in {INPUT_CSV}."
118
+
119
  row = self.df.iloc[self.current_index]
120
  progress = f"Item {self.current_index + 1} of {self.total_items}"
121
+ return row["id"], row["boe_text_cleaned"], row["tweet_text_cleaned"], row["tweet_original"], progress
122
+
123
  def next_item(self, choice, notes):
 
124
  if self.total_items == 0:
125
  return "", "", "", "", "No data available", ""
126
+
 
127
  text_id, text, summary_a, summary_b, _ = self.get_current_item()
128
+
 
129
  result_message = save_choice(text_id, text, summary_a, summary_b, choice, notes, self.request_id)
130
+
 
131
  self.current_index = (self.current_index + 1) % self.total_items
132
+ return (*self.get_current_item(), result_message)
133
+
 
 
 
134
  def prev_item(self):
 
135
  if self.total_items == 0:
136
  return "", "", "", "", "No data available", ""
137
+
 
138
  self.current_index = (self.current_index - 1) % self.total_items
139
+ return (*self.get_current_item(), "")
140
+
 
 
 
141
  def get_hf_status(self):
 
142
  success, message = self.hf_status
143
  return f"{'Connected' if success else 'Not Connected'} - {message}"
144