broadfield-dev commited on
Commit
236ae84
·
verified ·
1 Parent(s): 174b2b2

Update server.py

Browse files
Files changed (1) hide show
  1. server.py +311 -95
server.py CHANGED
@@ -20,44 +20,111 @@ except ImportError:
20
  STORAGE_BACKEND_CONFIG = os.getenv("STORAGE_BACKEND", "JSON").upper()
21
  HF_DATASET_REPO = os.getenv("HF_DATASET_REPO")
22
  HF_TOKEN = os.getenv("HF_TOKEN")
23
- DB_FILE_JSON = "social_data.json"
24
- DB_FILE_SQLITE = "social_data.db"
25
  db_lock = threading.Lock()
26
  HF_BACKUP_THRESHOLD = int(os.getenv("HF_BACKUP_THRESHOLD", 10))
27
  dirty_operations_count = 0
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  def force_persist_data():
30
  global dirty_operations_count
31
  with db_lock:
32
  storage_backend = STORAGE_BACKEND_CONFIG
 
33
  if storage_backend == "RAM":
 
34
  return True, "RAM backend. No persistence."
35
  elif storage_backend == "SQLITE":
36
- with sqlite3.connect(DB_FILE_SQLITE) as conn:
37
- users_df = pd.DataFrame(list(users_db.items()), columns=['username', 'password'])
38
- users_df.to_sql('users', conn, if_exists='replace', index=False)
39
- posts_df.to_sql('posts', conn, if_exists='replace', index=False)
40
- comments_df.to_sql('comments', conn, if_exists='replace', index=False)
41
- return True, "Successfully saved to SQLite."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  elif storage_backend == "JSON":
43
- with open(DB_FILE_JSON, "w") as f:
44
- json.dump({"users": users_db, "posts": posts_df.to_dict('records'), "comments": comments_df.to_dict('records')}, f, indent=2)
45
- return True, "Successfully saved to JSON file."
 
 
 
 
 
 
 
 
 
 
46
  elif storage_backend == "HF_DATASET":
47
  if not all([HF_DATASETS_AVAILABLE, HF_TOKEN, HF_DATASET_REPO]):
 
48
  return False, "HF_DATASET backend is not configured correctly."
49
  try:
50
  print("Pushing data to Hugging Face Hub...")
 
 
 
 
 
 
 
 
 
 
 
51
  dataset_dict = DatasetDict({
52
- 'users': Dataset.from_pandas(pd.DataFrame(list(users_db.items()), columns=['username', 'password'])),
53
- 'posts': Dataset.from_pandas(posts_df),
54
- 'comments': Dataset.from_pandas(comments_df)
55
  })
 
 
 
 
 
56
  dataset_dict.push_to_hub(HF_DATASET_REPO, token=HF_TOKEN, private=True)
57
  dirty_operations_count = 0
 
58
  return True, f"Successfully pushed data to {HF_DATASET_REPO}."
59
  except Exception as e:
 
60
  return False, f"Error pushing to Hugging Face Hub: {e}"
 
61
  return False, "Unknown backend."
62
 
63
  def handle_persistence_after_change():
@@ -72,79 +139,154 @@ def handle_persistence_after_change():
72
  force_persist_data()
73
 
74
  def load_data():
75
- global STORAGE_BACKEND_CONFIG
76
  storage_backend = STORAGE_BACKEND_CONFIG
77
-
78
- posts_schema = {"post_id": "int64", "username": "object", "content": "object", "timestamp": "object"}
79
- comments_schema = {"comment_id": "int64", "post_id": "int64", "username": "object", "content": "object", "timestamp": "object", "reply_to_comment_id": "float64"}
80
 
81
  with db_lock:
82
  users = {"admin": "password"}
83
- posts = pd.DataFrame({k: pd.Series(dtype=v) for k, v in posts_schema.items()})
84
- comments = pd.DataFrame({k: pd.Series(dtype=v) for k, v in comments_schema.items()})
85
 
86
  if storage_backend == "SQLITE":
87
  try:
88
  with sqlite3.connect(DB_FILE_SQLITE) as conn:
89
  cursor = conn.cursor()
 
90
  cursor.execute("CREATE TABLE IF NOT EXISTS users (username TEXT PRIMARY KEY, password TEXT NOT NULL)")
91
- cursor.execute("CREATE TABLE IF NOT EXISTS posts (post_id INTEGER PRIMARY KEY, username TEXT, content TEXT, timestamp TEXT)")
92
- cursor.execute("CREATE TABLE IF NOT EXISTS comments (comment_id INTEGER PRIMARY KEY, post_id INTEGER, username TEXT, content TEXT, timestamp TEXT, reply_to_comment_id INTEGER)")
93
  cursor.execute("INSERT OR IGNORE INTO users (username, password) VALUES (?, ?)", ("admin", "password"))
94
  conn.commit()
 
 
95
  users = dict(conn.execute("SELECT username, password FROM users").fetchall())
96
- posts = pd.read_sql_query("SELECT * FROM posts", conn)
97
- comments = pd.read_sql_query("SELECT * FROM comments", conn)
 
 
 
 
 
 
 
 
 
 
98
  except Exception as e:
99
  print(f"CRITICAL: Failed to use SQLite. Falling back to RAM. Error: {e}")
100
  STORAGE_BACKEND_CONFIG = "RAM"
 
101
  elif storage_backend == "JSON":
102
  if os.path.exists(DB_FILE_JSON):
103
  try:
104
  with open(DB_FILE_JSON, "r") as f: data = json.load(f)
105
- users, posts, comments = data.get("users", users), pd.DataFrame(data.get("posts", [])), pd.DataFrame(data.get("comments", []))
106
- except (json.JSONDecodeError, KeyError): pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  elif storage_backend == "HF_DATASET":
108
  if all([HF_DATASETS_AVAILABLE, HF_TOKEN, HF_DATASET_REPO]):
109
  try:
 
110
  ds_dict = load_dataset(HF_DATASET_REPO, token=HF_TOKEN, trust_remote_code=True)
111
- if ds_dict and all(k in ds_dict for k in ['users', 'posts', 'comments']):
112
- users = dict(zip(ds_dict['users']['username'], ds_dict['users']['password'])) if ds_dict['users'].num_rows > 0 else {"admin":"password"}
113
- posts = ds_dict['posts'].to_pandas()
114
- comments = ds_dict['comments'].to_pandas()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  print("Successfully loaded data from HF Dataset.")
116
- else: raise ValueError("Dataset dictionary is empty or malformed.")
 
117
  except Exception as e:
118
  print(f"Could not load from HF Dataset '{HF_DATASET_REPO}'. Attempting to initialize. Error: {e}")
119
  try:
 
120
  user_features = Features({'username': Value('string'), 'password': Value('string')})
121
- post_features = Features({'post_id': Value('int64'), 'username': Value('string'), 'content': Value('string'), 'timestamp': Value('string')})
122
- comment_features = Features({'comment_id': Value('int64'), 'post_id': Value('int64'), 'username': Value('string'), 'content': Value('string'), 'timestamp': Value('string'), 'reply_to_comment_id': Value('int64')})
123
-
 
 
 
 
 
 
 
124
  initial_users_df = pd.DataFrame(list(users.items()), columns=['username', 'password'])
125
-
 
 
 
126
  dataset_dict = DatasetDict({
127
  'users': Dataset.from_pandas(initial_users_df, features=user_features),
128
- 'posts': Dataset.from_pandas(posts, features=post_features),
129
- 'comments': Dataset.from_pandas(comments, features=comment_features)
130
  })
131
  dataset_dict.push_to_hub(HF_DATASET_REPO, token=HF_TOKEN, private=True)
132
  print(f"Successfully initialized new empty HF Dataset at {HF_DATASET_REPO}.")
 
 
133
  except Exception as e_push:
134
  print(f"CRITICAL: Failed to create new HF Dataset. Falling back to RAM. Push Error: {e_push}")
135
  STORAGE_BACKEND_CONFIG = "RAM"
136
  else:
137
  print("HF_DATASET backend not fully configured. Falling back to RAM.")
138
  STORAGE_BACKEND_CONFIG = "RAM"
 
 
 
 
 
 
 
 
 
 
 
139
 
140
- if "reply_to_comment_id" not in comments.columns:
141
- comments["reply_to_comment_id"] = pd.Series(dtype='float64')
142
-
143
- post_counter = int(posts['post_id'].max()) if not posts.empty else 0
144
- comment_counter = int(comments['comment_id'].max()) if not comments.empty else 0
145
- return users, posts, comments, post_counter, comment_counter
146
 
147
- users_db, posts_df, comments_df, post_counter, comment_counter = load_data()
148
 
149
  def api_register(username, password):
150
  if not username or not password: return "Failed: Username/password cannot be empty."
@@ -155,86 +297,145 @@ def api_register(username, password):
155
  return f"Success: User '{username}' registered."
156
 
157
  def api_login(username, password):
 
 
158
  return f"{username}:{password}" if users_db.get(username) == password else "Failed: Invalid credentials."
159
 
160
  def _get_user_from_token(token):
161
  if not token or ':' not in token: return None
162
  user, pwd = token.split(':', 1)
163
- return user if users_db.get(user) == pwd else None
 
164
 
165
  def api_create_post(auth_token, content):
166
- global posts_df, post_counter
 
167
  username = _get_user_from_token(auth_token)
168
  if not username: return "Failed: Invalid auth token."
 
 
169
  with db_lock:
170
- post_counter += 1
171
- new_post = pd.DataFrame([{"post_id": post_counter, "username": username, "content": content, "timestamp": datetime.utcnow().isoformat()}])
172
- posts_df = pd.concat([posts_df, new_post], ignore_index=True)
 
 
 
 
 
 
 
 
173
  handle_persistence_after_change()
174
- return f"Success: Post {post_counter} created."
175
 
176
- def api_create_comment(auth_token, post_id, content, reply_to_comment_id=None):
177
- global comments_df, comment_counter
 
 
 
178
  username = _get_user_from_token(auth_token)
179
  if not username: return "Failed: Invalid auth token."
 
 
 
 
 
 
 
 
180
  with db_lock:
181
- if int(post_id) not in posts_df['post_id'].values: return f"Failed: Post {post_id} not found."
182
- if reply_to_comment_id is not None and int(reply_to_comment_id) not in comments_df['comment_id'].values: return f"Failed: Comment to reply to ({reply_to_comment_id}) not found."
183
-
184
- comment_counter += 1
185
- new_comment = pd.DataFrame([{"comment_id": comment_counter, "post_id": int(post_id), "username": username, "content": content, "timestamp": datetime.utcnow().isoformat(), "reply_to_comment_id": int(reply_to_comment_id) if reply_to_comment_id is not None else None}])
186
- comments_df = pd.concat([comments_df, new_comment], ignore_index=True)
 
 
 
 
 
 
 
 
 
187
  handle_persistence_after_change()
188
- return "Success: Comment created."
 
 
189
 
190
  def api_get_feed():
 
191
  with db_lock:
192
- posts, comments = posts_df.copy(), comments_df.copy()
193
-
194
- if posts.empty and comments.empty:
195
- return pd.DataFrame(columns=['type', 'post_id', 'comment_id', 'reply_to_comment_id', 'username', 'timestamp', 'content'])
196
-
197
- posts['type'] = 'post'
198
- comments['type'] = 'comment'
199
-
200
- feed_data = pd.concat([posts, comments], ignore_index=True, sort=False)
201
- feed_data['timestamp'] = pd.to_datetime(feed_data['timestamp'])
202
-
203
- feed_data = feed_data.sort_values(by=['timestamp'], ascending=False)
204
-
205
- display_columns = ['type', 'post_id', 'comment_id', 'reply_to_comment_id', 'username', 'timestamp', 'content']
 
 
 
 
 
 
 
 
 
 
 
206
  feed_data = feed_data.reindex(columns=display_columns)
207
-
208
- return feed_data.fillna('')
 
 
 
 
 
 
 
 
209
 
210
  def ui_manual_post(username, password, content):
211
  auth_token = api_login(username, password)
212
  if "Failed" in auth_token: return "Login failed.", api_get_feed()
213
  return api_create_post(auth_token, content), api_get_feed()
214
 
215
- def ui_manual_comment(username, password, post_id, reply_id, content):
216
  auth_token = api_login(username, password)
217
  if "Failed" in auth_token: return "Login failed.", api_get_feed()
218
- return api_create_comment(auth_token, post_id, content, reply_id), api_get_feed()
219
 
220
  def ui_save_to_json():
221
- with db_lock:
222
- try:
223
- with open(DB_FILE_JSON, "w") as f:
224
- json.dump({"users": users_db, "posts": posts_df.to_dict('records'), "comments": comments_df.to_dict('records')}, f, indent=2)
225
- return f"Successfully saved current state to {DB_FILE_JSON}."
226
- except Exception as e:
227
- return f"Error saving to JSON: {e}"
 
 
228
 
229
  with gr.Blocks(theme=gr.themes.Soft(), title="Social App") as demo:
230
  gr.Markdown("# Social Media Server for iLearn Agent")
231
  gr.Markdown(f"This app provides an API for iLearn agents to interact with. **Storage Backend: `{STORAGE_BACKEND_CONFIG}`**")
232
-
233
  with gr.Tabs():
234
  with gr.TabItem("Live Feed"):
235
- feed_df_display = gr.DataFrame(label="Feed", interactive=False, wrap=True)
 
 
236
  refresh_btn = gr.Button("Refresh Feed")
237
-
238
  with gr.TabItem("Manual Actions"):
239
  manual_action_status = gr.Textbox(label="Action Status", interactive=False)
240
  with gr.Row():
@@ -248,28 +449,43 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Social App") as demo:
248
  gr.Markdown("### Create Comment / Reply")
249
  comment_user = gr.Textbox(label="User", value="admin")
250
  comment_pass = gr.Textbox(label="Pass", type="password", value="password")
251
- comment_post_id = gr.Number(label="Target Post ID")
252
- comment_reply_id = gr.Number(label="Reply to Comment ID (optional)")
253
  comment_content = gr.Textbox(label="Content", lines=2)
254
  comment_button = gr.Button("Submit Comment", variant="primary")
255
  with gr.Group():
256
  gr.Markdown("### Data Management")
257
- save_json_button = gr.Button("Save Current State to JSON")
258
-
259
 
 
 
260
  post_button.click(ui_manual_post, [post_user, post_pass, post_content], [manual_action_status, feed_df_display])
261
- comment_button.click(ui_manual_comment, [comment_user, comment_pass, comment_post_id, comment_reply_id, comment_content], [manual_action_status, feed_df_display])
 
262
  save_json_button.click(ui_save_to_json, None, [manual_action_status])
263
  refresh_btn.click(api_get_feed, None, feed_df_display)
264
-
 
265
  demo.load(api_get_feed, None, feed_df_display)
266
 
267
- with gr.Column(visible=False):
 
 
268
  gr.Interface(api_register, ["text", "text"], "text", api_name="register")
269
  gr.Interface(api_login, ["text", "text"], "text", api_name="login")
 
270
  gr.Interface(api_create_post, ["text", "text"], "text", api_name="create_post")
271
- gr.Interface(api_create_comment, ["text", "number", "text", "number"], "text", api_name="create_comment")
 
 
 
272
  gr.Interface(api_get_feed, None, "dataframe", api_name="get_feed")
273
 
 
274
  if __name__ == "__main__":
 
 
 
 
 
275
  demo.queue().launch(server_name="0.0.0.0", server_port=7860, share=False)
 
20
  STORAGE_BACKEND_CONFIG = os.getenv("STORAGE_BACKEND", "JSON").upper()
21
  HF_DATASET_REPO = os.getenv("HF_DATASET_REPO")
22
  HF_TOKEN = os.getenv("HF_TOKEN")
23
+ DB_FILE_JSON = "social_data_unified.json" # Changed filename to avoid conflicts
24
+ DB_FILE_SQLITE = "social_data_unified.db" # Changed filename
25
  db_lock = threading.Lock()
26
  HF_BACKUP_THRESHOLD = int(os.getenv("HF_BACKUP_THRESHOLD", 10))
27
  dirty_operations_count = 0
28
 
29
+ # --- New Global Data Structure ---
30
+ users_db = {}
31
+ entries_df = pd.DataFrame()
32
+ post_id_counter = 0 # Single counter for all entries
33
+
34
+ # Define the schema for the unified entries table
35
+ ENTRY_SCHEMA = {
36
+ "post_id": "Int64", # Use nullable integer
37
+ "reply_to_id": "Int64", # Use nullable integer, None for top-level posts
38
+ "username": "object",
39
+ "content": "object",
40
+ "timestamp": "object",
41
+ "type": "object" # 'post' or 'comment'
42
+ }
43
+
44
  def force_persist_data():
45
  global dirty_operations_count
46
  with db_lock:
47
  storage_backend = STORAGE_BACKEND_CONFIG
48
+ print(f"Attempting to persist data to {storage_backend}")
49
  if storage_backend == "RAM":
50
+ print("RAM backend. No persistence.")
51
  return True, "RAM backend. No persistence."
52
  elif storage_backend == "SQLITE":
53
+ try:
54
+ with sqlite3.connect(DB_FILE_SQLITE) as conn:
55
+ cursor = conn.cursor()
56
+ # Users table
57
+ cursor.execute("CREATE TABLE IF NOT EXISTS users (username TEXT PRIMARY KEY, password TEXT NOT NULL)")
58
+ # Entries table - new schema
59
+ cursor.execute("CREATE TABLE IF NOT EXISTS entries (post_id INTEGER PRIMARY KEY, reply_to_id INTEGER, username TEXT, content TEXT, timestamp TEXT, type TEXT)")
60
+
61
+ # Save users
62
+ users_to_save = [(u, p) for u, p in users_db.items()]
63
+ if users_to_save: # Avoid executing with empty list
64
+ conn.executemany("INSERT OR REPLACE INTO users (username, password) VALUES (?, ?)", users_to_save)
65
+
66
+ # Save entries (replace existing data)
67
+ # Ensure Int64 columns are correctly handled as nullable integers for SQL
68
+ entries_to_save = entries_df.copy()
69
+ entries_to_save['reply_to_id'] = entries_to_save['reply_to_id'].astype('object').where(entries_to_save['reply_to_id'].notna(), None)
70
+
71
+ entries_to_save.to_sql('entries', conn, if_exists='replace', index=False)
72
+
73
+ conn.commit()
74
+ print("Successfully saved to SQLite.")
75
+ return True, "Successfully saved to SQLite."
76
+ except Exception as e:
77
+ print(f"Error saving to SQLite: {e}")
78
+ return False, f"Error saving to SQLite: {e}"
79
+
80
  elif storage_backend == "JSON":
81
+ try:
82
+ data_to_save = {
83
+ "users": users_db,
84
+ "entries": entries_df.to_dict('records')
85
+ }
86
+ with open(DB_FILE_JSON, "w") as f:
87
+ json.dump(data_to_save, f, indent=2)
88
+ print("Successfully saved to JSON file.")
89
+ return True, "Successfully saved to JSON file."
90
+ except Exception as e:
91
+ print(f"Error saving to JSON: {e}")
92
+ return False, f"Error saving to JSON: {e}"
93
+
94
  elif storage_backend == "HF_DATASET":
95
  if not all([HF_DATASETS_AVAILABLE, HF_TOKEN, HF_DATASET_REPO]):
96
+ print("HF_DATASET backend is not configured correctly.")
97
  return False, "HF_DATASET backend is not configured correctly."
98
  try:
99
  print("Pushing data to Hugging Face Hub...")
100
+
101
+ # Convert nullable Int64 columns to standard int/float for dataset
102
+ entries_for_hf = entries_df.copy()
103
+ # Hugging Face datasets typically handle None/null correctly for integer types
104
+ # Ensure type hints are correct or handle potential type issues
105
+ entries_for_hf['post_id'] = entries_for_hf['post_id'].astype('int64') # Non-nullable ID
106
+ entries_for_hf['reply_to_id'] = entries_for_hf['reply_to_id'].astype('float64') # Use float for nullable integer in HF datasets
107
+
108
+ user_dataset = Dataset.from_pandas(pd.DataFrame(list(users_db.items()), columns=['username', 'password']))
109
+ entries_dataset = Dataset.from_pandas(entries_for_hf)
110
+
111
  dataset_dict = DatasetDict({
112
+ 'users': user_dataset,
113
+ 'entries': entries_dataset,
 
114
  })
115
+ # Define features explicitly for nullable types if needed, though pandas conversion often works
116
+ # user_features = Features({'username': Value('string'), 'password': Value('string')})
117
+ # entry_features = Features({'post_id': Value('int64'), 'reply_to_id': Value('int64'), 'username': Value('string'), 'content': Value('string'), 'timestamp': Value('string'), 'type': Value('string')})
118
+ # Pass features to from_pandas or push_to_hub if needed, but auto-detection is often sufficient for basic types
119
+
120
  dataset_dict.push_to_hub(HF_DATASET_REPO, token=HF_TOKEN, private=True)
121
  dirty_operations_count = 0
122
+ print(f"Successfully pushed data to {HF_DATASET_REPO}.")
123
  return True, f"Successfully pushed data to {HF_DATASET_REPO}."
124
  except Exception as e:
125
+ print(f"Error pushing to Hugging Face Hub: {e}")
126
  return False, f"Error pushing to Hugging Face Hub: {e}"
127
+ print("Unknown backend.")
128
  return False, "Unknown backend."
129
 
130
  def handle_persistence_after_change():
 
139
  force_persist_data()
140
 
141
  def load_data():
142
+ global STORAGE_BACKEND_CONFIG, users_db, entries_df, post_id_counter
143
  storage_backend = STORAGE_BACKEND_CONFIG
 
 
 
144
 
145
  with db_lock:
146
  users = {"admin": "password"}
147
+ # Initialize entries DataFrame with the correct schema
148
+ entries = pd.DataFrame({k: pd.Series(dtype=v) for k, v in ENTRY_SCHEMA.items()})
149
 
150
  if storage_backend == "SQLITE":
151
  try:
152
  with sqlite3.connect(DB_FILE_SQLITE) as conn:
153
  cursor = conn.cursor()
154
+ # Create tables if they don't exist
155
  cursor.execute("CREATE TABLE IF NOT EXISTS users (username TEXT PRIMARY KEY, password TEXT NOT NULL)")
156
+ cursor.execute("CREATE TABLE IF NOT EXISTS entries (post_id INTEGER PRIMARY KEY, reply_to_id INTEGER, username TEXT, content TEXT, timestamp TEXT, type TEXT)")
157
+ # Add default admin user if not exists
158
  cursor.execute("INSERT OR IGNORE INTO users (username, password) VALUES (?, ?)", ("admin", "password"))
159
  conn.commit()
160
+
161
+ # Load data
162
  users = dict(conn.execute("SELECT username, password FROM users").fetchall())
163
+ entries = pd.read_sql_query("SELECT * FROM entries", conn)
164
+
165
+ # Ensure correct dtypes, especially for nullable integers
166
+ for col, dtype in ENTRY_SCHEMA.items():
167
+ if col in entries.columns:
168
+ try:
169
+ entries[col] = entries[col].astype(dtype)
170
+ except Exception as e:
171
+ print(f"Warning: Could not convert column {col} to {dtype} from SQLite. {e}")
172
+
173
+
174
+ print(f"Successfully loaded data from SQLite: {DB_FILE_SQLITE}")
175
  except Exception as e:
176
  print(f"CRITICAL: Failed to use SQLite. Falling back to RAM. Error: {e}")
177
  STORAGE_BACKEND_CONFIG = "RAM"
178
+
179
  elif storage_backend == "JSON":
180
  if os.path.exists(DB_FILE_JSON):
181
  try:
182
  with open(DB_FILE_JSON, "r") as f: data = json.load(f)
183
+ users = data.get("users", users)
184
+ loaded_entries_list = data.get("entries", [])
185
+ entries = pd.DataFrame(loaded_entries_list)
186
+
187
+ # Ensure correct dtypes after loading from JSON
188
+ if not entries.empty:
189
+ for col, dtype in ENTRY_SCHEMA.items():
190
+ if col in entries.columns:
191
+ try:
192
+ entries[col] = entries[col].astype(dtype)
193
+ except Exception as e:
194
+ print(f"Warning: Could not convert column {col} to {dtype} from JSON. {e}")
195
+ else:
196
+ # If JSON was empty or missing entries key, ensure empty DF has schema
197
+ entries = pd.DataFrame({k: pd.Series(dtype=v) for k, v in ENTRY_SCHEMA.items()})
198
+
199
+ except (json.JSONDecodeError, KeyError, Exception) as e:
200
+ print(f"Error loading JSON data: {e}. Initializing with empty data.")
201
+ users = {"admin":"password"} # Reset users on load error? Or keep default? Let's keep default.
202
+ entries = pd.DataFrame({k: pd.Series(dtype=v) for k, v in ENTRY_SCHEMA.items()})
203
+
204
  elif storage_backend == "HF_DATASET":
205
  if all([HF_DATASETS_AVAILABLE, HF_TOKEN, HF_DATASET_REPO]):
206
  try:
207
+ print(f"Attempting to load from HF Dataset '{HF_DATASET_REPO}'...")
208
  ds_dict = load_dataset(HF_DATASET_REPO, token=HF_TOKEN, trust_remote_code=True)
209
+
210
+ if ds_dict and 'users' in ds_dict and 'entries' in ds_dict:
211
+ # Load users
212
+ if ds_dict['users'].num_rows > 0:
213
+ users = dict(zip(ds_dict['users']['username'], ds_dict['users']['password']))
214
+ else:
215
+ users = {"admin":"password"} # Default admin if no users
216
+
217
+ # Load entries
218
+ entries = ds_dict['entries'].to_pandas()
219
+
220
+ # Ensure correct dtypes, especially for nullable integers
221
+ if not entries.empty:
222
+ for col, dtype in ENTRY_SCHEMA.items():
223
+ if col in entries.columns:
224
+ try:
225
+ # HF datasets might load Int64 as float or object, convert explicitly
226
+ if dtype == "Int64": # Pandas nullable integer
227
+ entries[col] = pd.to_numeric(entries[col], errors='coerce').astype(dtype)
228
+ else:
229
+ entries[col] = entries[col].astype(dtype)
230
+ except Exception as e:
231
+ print(f"Warning: Could not convert column {col} to {dtype} from HF Dataset. {e}")
232
+ else:
233
+ # If entries dataset is empty, ensure empty DF has schema
234
+ entries = pd.DataFrame({k: pd.Series(dtype=v) for k, v in ENTRY_SCHEMA.items()})
235
+
236
+
237
  print("Successfully loaded data from HF Dataset.")
238
+ else:
239
+ raise ValueError("Dataset dictionary is empty or malformed (missing 'users' or 'entries').")
240
  except Exception as e:
241
  print(f"Could not load from HF Dataset '{HF_DATASET_REPO}'. Attempting to initialize. Error: {e}")
242
  try:
243
+ # Define features including nullable types if possible, or rely on pandas conversion
244
  user_features = Features({'username': Value('string'), 'password': Value('string')})
245
+ # Use float64 for nullable int in HF Features as a common workaround
246
+ entry_features = Features({
247
+ 'post_id': Value('int64'),
248
+ 'reply_to_id': Value('float64'), # HF datasets often use float for nullable int
249
+ 'username': Value('string'),
250
+ 'content': Value('string'),
251
+ 'timestamp': Value('string'),
252
+ 'type': Value('string')
253
+ })
254
+
255
  initial_users_df = pd.DataFrame(list(users.items()), columns=['username', 'password'])
256
+ # Ensure initial empty entries DF conforms to the HF features expected types
257
+ initial_entries_df = pd.DataFrame({k: pd.Series(dtype='float64' if k in ['post_id', 'reply_to_id'] else 'object') for k in ENTRY_SCHEMA.keys()})
258
+
259
+
260
  dataset_dict = DatasetDict({
261
  'users': Dataset.from_pandas(initial_users_df, features=user_features),
262
+ 'entries': Dataset.from_pandas(initial_entries_df, features=entry_features) # Use initial empty with HF types
 
263
  })
264
  dataset_dict.push_to_hub(HF_DATASET_REPO, token=HF_TOKEN, private=True)
265
  print(f"Successfully initialized new empty HF Dataset at {HF_DATASET_REPO}.")
266
+ # After initializing, reset entries_df to pandas schema
267
+ entries = pd.DataFrame({k: pd.Series(dtype=v) for k, v in ENTRY_SCHEMA.items()})
268
  except Exception as e_push:
269
  print(f"CRITICAL: Failed to create new HF Dataset. Falling back to RAM. Push Error: {e_push}")
270
  STORAGE_BACKEND_CONFIG = "RAM"
271
  else:
272
  print("HF_DATASET backend not fully configured. Falling back to RAM.")
273
  STORAGE_BACKEND_CONFIG = "RAM"
274
+ else: # RAM backend or fallback
275
+ print("Using RAM backend.")
276
+
277
+ # Initialize global variables after loading/initializing
278
+ users_db = users
279
+ entries_df = entries
280
+ # Calculate the next post_id counter value
281
+ post_id_counter = int(entries_df['post_id'].max()) if not entries_df.empty and entries_df['post_id'].notna().any() else 0
282
+
283
+ print(f"Loaded data. Users: {len(users_db)}, Entries: {len(entries_df)}. Next Post ID: {post_id_counter + 1}")
284
+
285
 
286
+ # --- Load Data Initially ---
287
+ load_data()
 
 
 
 
288
 
289
+ # --- API Functions (adapted for unified structure) ---
290
 
291
  def api_register(username, password):
292
  if not username or not password: return "Failed: Username/password cannot be empty."
 
297
  return f"Success: User '{username}' registered."
298
 
299
  def api_login(username, password):
300
+ # Simulate authentication token (basic user:pass string)
301
+ # In a real app, use proper token/session management
302
  return f"{username}:{password}" if users_db.get(username) == password else "Failed: Invalid credentials."
303
 
304
  def _get_user_from_token(token):
305
  if not token or ':' not in token: return None
306
  user, pwd = token.split(':', 1)
307
+ with db_lock: # Access users_db requires lock
308
+ return user if users_db.get(user) == pwd else None
309
 
310
  def api_create_post(auth_token, content):
311
+ """Creates a top-level post entry."""
312
+ global entries_df, post_id_counter
313
  username = _get_user_from_token(auth_token)
314
  if not username: return "Failed: Invalid auth token."
315
+ if not content: return "Failed: Content cannot be empty."
316
+
317
  with db_lock:
318
+ post_id_counter += 1
319
+ new_entry = pd.DataFrame([{
320
+ "post_id": post_id_counter,
321
+ "reply_to_id": pd.NA, # Use pandas NA for nullable integer
322
+ "username": username,
323
+ "content": content,
324
+ "timestamp": datetime.utcnow().isoformat(),
325
+ "type": "post"
326
+ }]).astype(ENTRY_SCHEMA) # Ensure correct dtypes
327
+
328
+ entries_df = pd.concat([entries_df, new_entry], ignore_index=True)
329
  handle_persistence_after_change()
 
330
 
331
+ return f"Success: Post {post_id_counter} created."
332
+
333
+ def api_create_comment(auth_token, reply_to_id, content):
334
+ """Creates a comment/reply entry."""
335
+ global entries_df, post_id_counter
336
  username = _get_user_from_token(auth_token)
337
  if not username: return "Failed: Invalid auth token."
338
+ if not content: return "Failed: Content cannot be empty."
339
+ if reply_to_id is None: return "Failed: Reply to ID cannot be empty for a comment/reply."
340
+
341
+ try:
342
+ reply_to_id = int(reply_to_id) # Ensure it's an integer
343
+ except (ValueError, TypeError):
344
+ return "Failed: Invalid Reply To ID."
345
+
346
  with db_lock:
347
+ # Check if the entry being replied to exists
348
+ if reply_to_id not in entries_df['post_id'].values:
349
+ return f"Failed: Entry with ID {reply_to_id} not found."
350
+
351
+ post_id_counter += 1
352
+ new_entry = pd.DataFrame([{
353
+ "post_id": post_id_counter,
354
+ "reply_to_id": reply_to_id,
355
+ "username": username,
356
+ "content": content,
357
+ "timestamp": datetime.utcnow().isoformat(),
358
+ "type": "comment" # All replies are 'comment' type in this scheme
359
+ }]).astype(ENTRY_SCHEMA) # Ensure correct dtypes
360
+
361
+ entries_df = pd.concat([entries_df, new_entry], ignore_index=True)
362
  handle_persistence_after_change()
363
+
364
+ return f"Success: Comment/Reply {post_id_counter} created (replying to {reply_to_id})."
365
+
366
 
367
  def api_get_feed():
368
+ """Retrieves all entries sorted by timestamp."""
369
  with db_lock:
370
+ # Return a copy to prevent external modifications
371
+ feed_data = entries_df.copy()
372
+
373
+ if feed_data.empty:
374
+ # Return empty DataFrame with expected columns
375
+ return pd.DataFrame({k: pd.Series(dtype=v) for k, v in ENTRY_SCHEMA.items()})
376
+
377
+ # Ensure timestamp is datetime for sorting, handle potential errors
378
+ try:
379
+ feed_data['timestamp'] = pd.to_datetime(feed_data['timestamp'])
380
+ except Exception as e:
381
+ print(f"Warning: Could not convert timestamp column to datetime: {e}")
382
+ # If conversion fails, sort by post_id or keep unsorted as fallback
383
+ # Let's skip sorting by timestamp if conversion fails
384
+ pass
385
+
386
+ # Sort (prefer timestamp, fallback to post_id if timestamp fails or is identical)
387
+ if 'timestamp' in feed_data.columns and pd.api.types.is_datetime64_any_dtype(feed_data['timestamp']):
388
+ feed_data = feed_data.sort_values(by=['timestamp', 'post_id'], ascending=[False, False])
389
+ else:
390
+ feed_data = feed_data.sort_values(by='post_id', ascending=False)
391
+
392
+ # Select and rename/reorder columns for display if necessary
393
+ # The current schema matches well, just need to ensure all columns are present
394
+ display_columns = list(ENTRY_SCHEMA.keys()) # Use all columns in the schema
395
  feed_data = feed_data.reindex(columns=display_columns)
396
+
397
+ # Fill NaN/NA for display purposes (optional, but can make table cleaner)
398
+ # Convert nullable Int64 NA to empty string or specific placeholder for display
399
+ for col in ['post_id', 'reply_to_id']:
400
+ if col in feed_data.columns:
401
+ feed_data[col] = feed_data[col].apply(lambda x: '' if pd.isna(x) else int(x)) # Display int without .0
402
+
403
+ return feed_data
404
+
405
+ # --- UI Functions (adapted for unified structure) ---
406
 
407
  def ui_manual_post(username, password, content):
408
  auth_token = api_login(username, password)
409
  if "Failed" in auth_token: return "Login failed.", api_get_feed()
410
  return api_create_post(auth_token, content), api_get_feed()
411
 
412
+ def ui_manual_comment(username, password, reply_to_id, content):
413
  auth_token = api_login(username, password)
414
  if "Failed" in auth_token: return "Login failed.", api_get_feed()
415
+ return api_create_comment(auth_token, reply_to_id, content), api_get_feed()
416
 
417
  def ui_save_to_json():
418
+ # Call the general persistence function targeting JSON
419
+ success, message = force_persist_data()
420
+ # Modify message to indicate JSON specifically if needed, or keep general
421
+ if "Successfully saved to JSON file." in message:
422
+ return f"Successfully saved current state to {DB_FILE_JSON}."
423
+ else:
424
+ return message # Return the error message from persistence
425
+
426
+ # --- Gradio UI ---
427
 
428
  with gr.Blocks(theme=gr.themes.Soft(), title="Social App") as demo:
429
  gr.Markdown("# Social Media Server for iLearn Agent")
430
  gr.Markdown(f"This app provides an API for iLearn agents to interact with. **Storage Backend: `{STORAGE_BACKEND_CONFIG}`**")
431
+
432
  with gr.Tabs():
433
  with gr.TabItem("Live Feed"):
434
+ # Define DataFrame columns based on the new schema
435
+ feed_columns = [(col, "number" if "id" in col else "text") for col in ENTRY_SCHEMA.keys()]
436
+ feed_df_display = gr.DataFrame(label="Feed", interactive=False, wrap=True, headers=list(ENTRY_SCHEMA.keys()))
437
  refresh_btn = gr.Button("Refresh Feed")
438
+
439
  with gr.TabItem("Manual Actions"):
440
  manual_action_status = gr.Textbox(label="Action Status", interactive=False)
441
  with gr.Row():
 
449
  gr.Markdown("### Create Comment / Reply")
450
  comment_user = gr.Textbox(label="User", value="admin")
451
  comment_pass = gr.Textbox(label="Pass", type="password", value="password")
452
+ # Updated UI field for the single Reply To ID
453
+ comment_reply_to_id = gr.Number(label="Reply To Entry ID (Post or Comment ID)", precision=0) # precision=0 for integer input
454
  comment_content = gr.Textbox(label="Content", lines=2)
455
  comment_button = gr.Button("Submit Comment", variant="primary")
456
  with gr.Group():
457
  gr.Markdown("### Data Management")
458
+ save_json_button = gr.Button("Save Current State to JSON") # Button label kept simple, func calls general persistence
 
459
 
460
+ # --- UI Actions ---
461
+ # Post button now calls ui_manual_post which calls api_create_post
462
  post_button.click(ui_manual_post, [post_user, post_pass, post_content], [manual_action_status, feed_df_display])
463
+ # Comment button calls ui_manual_comment with the single reply_to_id field
464
+ comment_button.click(ui_manual_comment, [comment_user, comment_pass, comment_reply_to_id, comment_content], [manual_action_status, feed_df_display])
465
  save_json_button.click(ui_save_to_json, None, [manual_action_status])
466
  refresh_btn.click(api_get_feed, None, feed_df_display)
467
+
468
+ # Load feed on startup
469
  demo.load(api_get_feed, None, feed_df_display)
470
 
471
+ # --- Gradio API Endpoints (adapted for unified structure) ---
472
+ # Ensure API names match the expected iLearn agent interactions
473
+ with gr.Column(visible=False): # Hide API interfaces in the main UI
474
  gr.Interface(api_register, ["text", "text"], "text", api_name="register")
475
  gr.Interface(api_login, ["text", "text"], "text", api_name="login")
476
+ # api_create_post: token, content
477
  gr.Interface(api_create_post, ["text", "text"], "text", api_name="create_post")
478
+ # api_create_comment: token, reply_to_id, content
479
+ # Note: Gradio interface infers types; Number will be float unless precision=0 and converted
480
+ gr.Interface(api_create_comment, ["text", "number", "text"], "text", api_name="create_comment")
481
+ # api_get_feed: no input, returns dataframe
482
  gr.Interface(api_get_feed, None, "dataframe", api_name="get_feed")
483
 
484
+
485
  if __name__ == "__main__":
486
+ # Ensure initial persistence happens on first run if not loading data
487
+ if not os.path.exists(DB_FILE_JSON) and not os.path.exists(DB_FILE_SQLITE) and STORAGE_BACKEND_CONFIG != "HF_DATASET":
488
+ print("No existing data files found. Performing initial save.")
489
+ force_persist_data() # Persist the initial admin user and empty tables
490
+
491
  demo.queue().launch(server_name="0.0.0.0", server_port=7860, share=False)