Spaces:
Sleeping
Sleeping
Update server.py
Browse files
server.py
CHANGED
@@ -20,44 +20,111 @@ except ImportError:
|
|
20 |
STORAGE_BACKEND_CONFIG = os.getenv("STORAGE_BACKEND", "JSON").upper()
|
21 |
HF_DATASET_REPO = os.getenv("HF_DATASET_REPO")
|
22 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
23 |
-
DB_FILE_JSON = "
|
24 |
-
DB_FILE_SQLITE = "
|
25 |
db_lock = threading.Lock()
|
26 |
HF_BACKUP_THRESHOLD = int(os.getenv("HF_BACKUP_THRESHOLD", 10))
|
27 |
dirty_operations_count = 0
|
28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
def force_persist_data():
|
30 |
global dirty_operations_count
|
31 |
with db_lock:
|
32 |
storage_backend = STORAGE_BACKEND_CONFIG
|
|
|
33 |
if storage_backend == "RAM":
|
|
|
34 |
return True, "RAM backend. No persistence."
|
35 |
elif storage_backend == "SQLITE":
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
elif storage_backend == "JSON":
|
43 |
-
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
elif storage_backend == "HF_DATASET":
|
47 |
if not all([HF_DATASETS_AVAILABLE, HF_TOKEN, HF_DATASET_REPO]):
|
|
|
48 |
return False, "HF_DATASET backend is not configured correctly."
|
49 |
try:
|
50 |
print("Pushing data to Hugging Face Hub...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
dataset_dict = DatasetDict({
|
52 |
-
'users':
|
53 |
-
'
|
54 |
-
'comments': Dataset.from_pandas(comments_df)
|
55 |
})
|
|
|
|
|
|
|
|
|
|
|
56 |
dataset_dict.push_to_hub(HF_DATASET_REPO, token=HF_TOKEN, private=True)
|
57 |
dirty_operations_count = 0
|
|
|
58 |
return True, f"Successfully pushed data to {HF_DATASET_REPO}."
|
59 |
except Exception as e:
|
|
|
60 |
return False, f"Error pushing to Hugging Face Hub: {e}"
|
|
|
61 |
return False, "Unknown backend."
|
62 |
|
63 |
def handle_persistence_after_change():
|
@@ -72,79 +139,154 @@ def handle_persistence_after_change():
|
|
72 |
force_persist_data()
|
73 |
|
74 |
def load_data():
|
75 |
-
global STORAGE_BACKEND_CONFIG
|
76 |
storage_backend = STORAGE_BACKEND_CONFIG
|
77 |
-
|
78 |
-
posts_schema = {"post_id": "int64", "username": "object", "content": "object", "timestamp": "object"}
|
79 |
-
comments_schema = {"comment_id": "int64", "post_id": "int64", "username": "object", "content": "object", "timestamp": "object", "reply_to_comment_id": "float64"}
|
80 |
|
81 |
with db_lock:
|
82 |
users = {"admin": "password"}
|
83 |
-
|
84 |
-
|
85 |
|
86 |
if storage_backend == "SQLITE":
|
87 |
try:
|
88 |
with sqlite3.connect(DB_FILE_SQLITE) as conn:
|
89 |
cursor = conn.cursor()
|
|
|
90 |
cursor.execute("CREATE TABLE IF NOT EXISTS users (username TEXT PRIMARY KEY, password TEXT NOT NULL)")
|
91 |
-
cursor.execute("CREATE TABLE IF NOT EXISTS
|
92 |
-
|
93 |
cursor.execute("INSERT OR IGNORE INTO users (username, password) VALUES (?, ?)", ("admin", "password"))
|
94 |
conn.commit()
|
|
|
|
|
95 |
users = dict(conn.execute("SELECT username, password FROM users").fetchall())
|
96 |
-
|
97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
except Exception as e:
|
99 |
print(f"CRITICAL: Failed to use SQLite. Falling back to RAM. Error: {e}")
|
100 |
STORAGE_BACKEND_CONFIG = "RAM"
|
|
|
101 |
elif storage_backend == "JSON":
|
102 |
if os.path.exists(DB_FILE_JSON):
|
103 |
try:
|
104 |
with open(DB_FILE_JSON, "r") as f: data = json.load(f)
|
105 |
-
users
|
106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
elif storage_backend == "HF_DATASET":
|
108 |
if all([HF_DATASETS_AVAILABLE, HF_TOKEN, HF_DATASET_REPO]):
|
109 |
try:
|
|
|
110 |
ds_dict = load_dataset(HF_DATASET_REPO, token=HF_TOKEN, trust_remote_code=True)
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
print("Successfully loaded data from HF Dataset.")
|
116 |
-
else:
|
|
|
117 |
except Exception as e:
|
118 |
print(f"Could not load from HF Dataset '{HF_DATASET_REPO}'. Attempting to initialize. Error: {e}")
|
119 |
try:
|
|
|
120 |
user_features = Features({'username': Value('string'), 'password': Value('string')})
|
121 |
-
|
122 |
-
|
123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
initial_users_df = pd.DataFrame(list(users.items()), columns=['username', 'password'])
|
125 |
-
|
|
|
|
|
|
|
126 |
dataset_dict = DatasetDict({
|
127 |
'users': Dataset.from_pandas(initial_users_df, features=user_features),
|
128 |
-
'
|
129 |
-
'comments': Dataset.from_pandas(comments, features=comment_features)
|
130 |
})
|
131 |
dataset_dict.push_to_hub(HF_DATASET_REPO, token=HF_TOKEN, private=True)
|
132 |
print(f"Successfully initialized new empty HF Dataset at {HF_DATASET_REPO}.")
|
|
|
|
|
133 |
except Exception as e_push:
|
134 |
print(f"CRITICAL: Failed to create new HF Dataset. Falling back to RAM. Push Error: {e_push}")
|
135 |
STORAGE_BACKEND_CONFIG = "RAM"
|
136 |
else:
|
137 |
print("HF_DATASET backend not fully configured. Falling back to RAM.")
|
138 |
STORAGE_BACKEND_CONFIG = "RAM"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
post_counter = int(posts['post_id'].max()) if not posts.empty else 0
|
144 |
-
comment_counter = int(comments['comment_id'].max()) if not comments.empty else 0
|
145 |
-
return users, posts, comments, post_counter, comment_counter
|
146 |
|
147 |
-
|
148 |
|
149 |
def api_register(username, password):
|
150 |
if not username or not password: return "Failed: Username/password cannot be empty."
|
@@ -155,86 +297,145 @@ def api_register(username, password):
|
|
155 |
return f"Success: User '{username}' registered."
|
156 |
|
157 |
def api_login(username, password):
|
|
|
|
|
158 |
return f"{username}:{password}" if users_db.get(username) == password else "Failed: Invalid credentials."
|
159 |
|
160 |
def _get_user_from_token(token):
|
161 |
if not token or ':' not in token: return None
|
162 |
user, pwd = token.split(':', 1)
|
163 |
-
|
|
|
164 |
|
165 |
def api_create_post(auth_token, content):
|
166 |
-
|
|
|
167 |
username = _get_user_from_token(auth_token)
|
168 |
if not username: return "Failed: Invalid auth token."
|
|
|
|
|
169 |
with db_lock:
|
170 |
-
|
171 |
-
|
172 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
173 |
handle_persistence_after_change()
|
174 |
-
return f"Success: Post {post_counter} created."
|
175 |
|
176 |
-
|
177 |
-
|
|
|
|
|
|
|
178 |
username = _get_user_from_token(auth_token)
|
179 |
if not username: return "Failed: Invalid auth token."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
180 |
with db_lock:
|
181 |
-
|
182 |
-
if
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
187 |
handle_persistence_after_change()
|
188 |
-
|
|
|
|
|
189 |
|
190 |
def api_get_feed():
|
|
|
191 |
with db_lock:
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
206 |
feed_data = feed_data.reindex(columns=display_columns)
|
207 |
-
|
208 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
209 |
|
210 |
def ui_manual_post(username, password, content):
|
211 |
auth_token = api_login(username, password)
|
212 |
if "Failed" in auth_token: return "Login failed.", api_get_feed()
|
213 |
return api_create_post(auth_token, content), api_get_feed()
|
214 |
|
215 |
-
def ui_manual_comment(username, password,
|
216 |
auth_token = api_login(username, password)
|
217 |
if "Failed" in auth_token: return "Login failed.", api_get_feed()
|
218 |
-
return api_create_comment(auth_token,
|
219 |
|
220 |
def ui_save_to_json():
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
|
|
|
|
228 |
|
229 |
with gr.Blocks(theme=gr.themes.Soft(), title="Social App") as demo:
|
230 |
gr.Markdown("# Social Media Server for iLearn Agent")
|
231 |
gr.Markdown(f"This app provides an API for iLearn agents to interact with. **Storage Backend: `{STORAGE_BACKEND_CONFIG}`**")
|
232 |
-
|
233 |
with gr.Tabs():
|
234 |
with gr.TabItem("Live Feed"):
|
235 |
-
|
|
|
|
|
236 |
refresh_btn = gr.Button("Refresh Feed")
|
237 |
-
|
238 |
with gr.TabItem("Manual Actions"):
|
239 |
manual_action_status = gr.Textbox(label="Action Status", interactive=False)
|
240 |
with gr.Row():
|
@@ -248,28 +449,43 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Social App") as demo:
|
|
248 |
gr.Markdown("### Create Comment / Reply")
|
249 |
comment_user = gr.Textbox(label="User", value="admin")
|
250 |
comment_pass = gr.Textbox(label="Pass", type="password", value="password")
|
251 |
-
|
252 |
-
|
253 |
comment_content = gr.Textbox(label="Content", lines=2)
|
254 |
comment_button = gr.Button("Submit Comment", variant="primary")
|
255 |
with gr.Group():
|
256 |
gr.Markdown("### Data Management")
|
257 |
-
save_json_button = gr.Button("Save Current State to JSON")
|
258 |
-
|
259 |
|
|
|
|
|
260 |
post_button.click(ui_manual_post, [post_user, post_pass, post_content], [manual_action_status, feed_df_display])
|
261 |
-
|
|
|
262 |
save_json_button.click(ui_save_to_json, None, [manual_action_status])
|
263 |
refresh_btn.click(api_get_feed, None, feed_df_display)
|
264 |
-
|
|
|
265 |
demo.load(api_get_feed, None, feed_df_display)
|
266 |
|
267 |
-
|
|
|
|
|
268 |
gr.Interface(api_register, ["text", "text"], "text", api_name="register")
|
269 |
gr.Interface(api_login, ["text", "text"], "text", api_name="login")
|
|
|
270 |
gr.Interface(api_create_post, ["text", "text"], "text", api_name="create_post")
|
271 |
-
|
|
|
|
|
|
|
272 |
gr.Interface(api_get_feed, None, "dataframe", api_name="get_feed")
|
273 |
|
|
|
274 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
|
275 |
demo.queue().launch(server_name="0.0.0.0", server_port=7860, share=False)
|
|
|
20 |
STORAGE_BACKEND_CONFIG = os.getenv("STORAGE_BACKEND", "JSON").upper()
|
21 |
HF_DATASET_REPO = os.getenv("HF_DATASET_REPO")
|
22 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
23 |
+
DB_FILE_JSON = "social_data_unified.json" # Changed filename to avoid conflicts
|
24 |
+
DB_FILE_SQLITE = "social_data_unified.db" # Changed filename
|
25 |
db_lock = threading.Lock()
|
26 |
HF_BACKUP_THRESHOLD = int(os.getenv("HF_BACKUP_THRESHOLD", 10))
|
27 |
dirty_operations_count = 0
|
28 |
|
29 |
+
# --- New Global Data Structure ---
|
30 |
+
users_db = {}
|
31 |
+
entries_df = pd.DataFrame()
|
32 |
+
post_id_counter = 0 # Single counter for all entries
|
33 |
+
|
34 |
+
# Define the schema for the unified entries table
|
35 |
+
ENTRY_SCHEMA = {
|
36 |
+
"post_id": "Int64", # Use nullable integer
|
37 |
+
"reply_to_id": "Int64", # Use nullable integer, None for top-level posts
|
38 |
+
"username": "object",
|
39 |
+
"content": "object",
|
40 |
+
"timestamp": "object",
|
41 |
+
"type": "object" # 'post' or 'comment'
|
42 |
+
}
|
43 |
+
|
44 |
def force_persist_data():
|
45 |
global dirty_operations_count
|
46 |
with db_lock:
|
47 |
storage_backend = STORAGE_BACKEND_CONFIG
|
48 |
+
print(f"Attempting to persist data to {storage_backend}")
|
49 |
if storage_backend == "RAM":
|
50 |
+
print("RAM backend. No persistence.")
|
51 |
return True, "RAM backend. No persistence."
|
52 |
elif storage_backend == "SQLITE":
|
53 |
+
try:
|
54 |
+
with sqlite3.connect(DB_FILE_SQLITE) as conn:
|
55 |
+
cursor = conn.cursor()
|
56 |
+
# Users table
|
57 |
+
cursor.execute("CREATE TABLE IF NOT EXISTS users (username TEXT PRIMARY KEY, password TEXT NOT NULL)")
|
58 |
+
# Entries table - new schema
|
59 |
+
cursor.execute("CREATE TABLE IF NOT EXISTS entries (post_id INTEGER PRIMARY KEY, reply_to_id INTEGER, username TEXT, content TEXT, timestamp TEXT, type TEXT)")
|
60 |
+
|
61 |
+
# Save users
|
62 |
+
users_to_save = [(u, p) for u, p in users_db.items()]
|
63 |
+
if users_to_save: # Avoid executing with empty list
|
64 |
+
conn.executemany("INSERT OR REPLACE INTO users (username, password) VALUES (?, ?)", users_to_save)
|
65 |
+
|
66 |
+
# Save entries (replace existing data)
|
67 |
+
# Ensure Int64 columns are correctly handled as nullable integers for SQL
|
68 |
+
entries_to_save = entries_df.copy()
|
69 |
+
entries_to_save['reply_to_id'] = entries_to_save['reply_to_id'].astype('object').where(entries_to_save['reply_to_id'].notna(), None)
|
70 |
+
|
71 |
+
entries_to_save.to_sql('entries', conn, if_exists='replace', index=False)
|
72 |
+
|
73 |
+
conn.commit()
|
74 |
+
print("Successfully saved to SQLite.")
|
75 |
+
return True, "Successfully saved to SQLite."
|
76 |
+
except Exception as e:
|
77 |
+
print(f"Error saving to SQLite: {e}")
|
78 |
+
return False, f"Error saving to SQLite: {e}"
|
79 |
+
|
80 |
elif storage_backend == "JSON":
|
81 |
+
try:
|
82 |
+
data_to_save = {
|
83 |
+
"users": users_db,
|
84 |
+
"entries": entries_df.to_dict('records')
|
85 |
+
}
|
86 |
+
with open(DB_FILE_JSON, "w") as f:
|
87 |
+
json.dump(data_to_save, f, indent=2)
|
88 |
+
print("Successfully saved to JSON file.")
|
89 |
+
return True, "Successfully saved to JSON file."
|
90 |
+
except Exception as e:
|
91 |
+
print(f"Error saving to JSON: {e}")
|
92 |
+
return False, f"Error saving to JSON: {e}"
|
93 |
+
|
94 |
elif storage_backend == "HF_DATASET":
|
95 |
if not all([HF_DATASETS_AVAILABLE, HF_TOKEN, HF_DATASET_REPO]):
|
96 |
+
print("HF_DATASET backend is not configured correctly.")
|
97 |
return False, "HF_DATASET backend is not configured correctly."
|
98 |
try:
|
99 |
print("Pushing data to Hugging Face Hub...")
|
100 |
+
|
101 |
+
# Convert nullable Int64 columns to standard int/float for dataset
|
102 |
+
entries_for_hf = entries_df.copy()
|
103 |
+
# Hugging Face datasets typically handle None/null correctly for integer types
|
104 |
+
# Ensure type hints are correct or handle potential type issues
|
105 |
+
entries_for_hf['post_id'] = entries_for_hf['post_id'].astype('int64') # Non-nullable ID
|
106 |
+
entries_for_hf['reply_to_id'] = entries_for_hf['reply_to_id'].astype('float64') # Use float for nullable integer in HF datasets
|
107 |
+
|
108 |
+
user_dataset = Dataset.from_pandas(pd.DataFrame(list(users_db.items()), columns=['username', 'password']))
|
109 |
+
entries_dataset = Dataset.from_pandas(entries_for_hf)
|
110 |
+
|
111 |
dataset_dict = DatasetDict({
|
112 |
+
'users': user_dataset,
|
113 |
+
'entries': entries_dataset,
|
|
|
114 |
})
|
115 |
+
# Define features explicitly for nullable types if needed, though pandas conversion often works
|
116 |
+
# user_features = Features({'username': Value('string'), 'password': Value('string')})
|
117 |
+
# entry_features = Features({'post_id': Value('int64'), 'reply_to_id': Value('int64'), 'username': Value('string'), 'content': Value('string'), 'timestamp': Value('string'), 'type': Value('string')})
|
118 |
+
# Pass features to from_pandas or push_to_hub if needed, but auto-detection is often sufficient for basic types
|
119 |
+
|
120 |
dataset_dict.push_to_hub(HF_DATASET_REPO, token=HF_TOKEN, private=True)
|
121 |
dirty_operations_count = 0
|
122 |
+
print(f"Successfully pushed data to {HF_DATASET_REPO}.")
|
123 |
return True, f"Successfully pushed data to {HF_DATASET_REPO}."
|
124 |
except Exception as e:
|
125 |
+
print(f"Error pushing to Hugging Face Hub: {e}")
|
126 |
return False, f"Error pushing to Hugging Face Hub: {e}"
|
127 |
+
print("Unknown backend.")
|
128 |
return False, "Unknown backend."
|
129 |
|
130 |
def handle_persistence_after_change():
|
|
|
139 |
force_persist_data()
|
140 |
|
141 |
def load_data():
|
142 |
+
global STORAGE_BACKEND_CONFIG, users_db, entries_df, post_id_counter
|
143 |
storage_backend = STORAGE_BACKEND_CONFIG
|
|
|
|
|
|
|
144 |
|
145 |
with db_lock:
|
146 |
users = {"admin": "password"}
|
147 |
+
# Initialize entries DataFrame with the correct schema
|
148 |
+
entries = pd.DataFrame({k: pd.Series(dtype=v) for k, v in ENTRY_SCHEMA.items()})
|
149 |
|
150 |
if storage_backend == "SQLITE":
|
151 |
try:
|
152 |
with sqlite3.connect(DB_FILE_SQLITE) as conn:
|
153 |
cursor = conn.cursor()
|
154 |
+
# Create tables if they don't exist
|
155 |
cursor.execute("CREATE TABLE IF NOT EXISTS users (username TEXT PRIMARY KEY, password TEXT NOT NULL)")
|
156 |
+
cursor.execute("CREATE TABLE IF NOT EXISTS entries (post_id INTEGER PRIMARY KEY, reply_to_id INTEGER, username TEXT, content TEXT, timestamp TEXT, type TEXT)")
|
157 |
+
# Add default admin user if not exists
|
158 |
cursor.execute("INSERT OR IGNORE INTO users (username, password) VALUES (?, ?)", ("admin", "password"))
|
159 |
conn.commit()
|
160 |
+
|
161 |
+
# Load data
|
162 |
users = dict(conn.execute("SELECT username, password FROM users").fetchall())
|
163 |
+
entries = pd.read_sql_query("SELECT * FROM entries", conn)
|
164 |
+
|
165 |
+
# Ensure correct dtypes, especially for nullable integers
|
166 |
+
for col, dtype in ENTRY_SCHEMA.items():
|
167 |
+
if col in entries.columns:
|
168 |
+
try:
|
169 |
+
entries[col] = entries[col].astype(dtype)
|
170 |
+
except Exception as e:
|
171 |
+
print(f"Warning: Could not convert column {col} to {dtype} from SQLite. {e}")
|
172 |
+
|
173 |
+
|
174 |
+
print(f"Successfully loaded data from SQLite: {DB_FILE_SQLITE}")
|
175 |
except Exception as e:
|
176 |
print(f"CRITICAL: Failed to use SQLite. Falling back to RAM. Error: {e}")
|
177 |
STORAGE_BACKEND_CONFIG = "RAM"
|
178 |
+
|
179 |
elif storage_backend == "JSON":
|
180 |
if os.path.exists(DB_FILE_JSON):
|
181 |
try:
|
182 |
with open(DB_FILE_JSON, "r") as f: data = json.load(f)
|
183 |
+
users = data.get("users", users)
|
184 |
+
loaded_entries_list = data.get("entries", [])
|
185 |
+
entries = pd.DataFrame(loaded_entries_list)
|
186 |
+
|
187 |
+
# Ensure correct dtypes after loading from JSON
|
188 |
+
if not entries.empty:
|
189 |
+
for col, dtype in ENTRY_SCHEMA.items():
|
190 |
+
if col in entries.columns:
|
191 |
+
try:
|
192 |
+
entries[col] = entries[col].astype(dtype)
|
193 |
+
except Exception as e:
|
194 |
+
print(f"Warning: Could not convert column {col} to {dtype} from JSON. {e}")
|
195 |
+
else:
|
196 |
+
# If JSON was empty or missing entries key, ensure empty DF has schema
|
197 |
+
entries = pd.DataFrame({k: pd.Series(dtype=v) for k, v in ENTRY_SCHEMA.items()})
|
198 |
+
|
199 |
+
except (json.JSONDecodeError, KeyError, Exception) as e:
|
200 |
+
print(f"Error loading JSON data: {e}. Initializing with empty data.")
|
201 |
+
users = {"admin":"password"} # Reset users on load error? Or keep default? Let's keep default.
|
202 |
+
entries = pd.DataFrame({k: pd.Series(dtype=v) for k, v in ENTRY_SCHEMA.items()})
|
203 |
+
|
204 |
elif storage_backend == "HF_DATASET":
|
205 |
if all([HF_DATASETS_AVAILABLE, HF_TOKEN, HF_DATASET_REPO]):
|
206 |
try:
|
207 |
+
print(f"Attempting to load from HF Dataset '{HF_DATASET_REPO}'...")
|
208 |
ds_dict = load_dataset(HF_DATASET_REPO, token=HF_TOKEN, trust_remote_code=True)
|
209 |
+
|
210 |
+
if ds_dict and 'users' in ds_dict and 'entries' in ds_dict:
|
211 |
+
# Load users
|
212 |
+
if ds_dict['users'].num_rows > 0:
|
213 |
+
users = dict(zip(ds_dict['users']['username'], ds_dict['users']['password']))
|
214 |
+
else:
|
215 |
+
users = {"admin":"password"} # Default admin if no users
|
216 |
+
|
217 |
+
# Load entries
|
218 |
+
entries = ds_dict['entries'].to_pandas()
|
219 |
+
|
220 |
+
# Ensure correct dtypes, especially for nullable integers
|
221 |
+
if not entries.empty:
|
222 |
+
for col, dtype in ENTRY_SCHEMA.items():
|
223 |
+
if col in entries.columns:
|
224 |
+
try:
|
225 |
+
# HF datasets might load Int64 as float or object, convert explicitly
|
226 |
+
if dtype == "Int64": # Pandas nullable integer
|
227 |
+
entries[col] = pd.to_numeric(entries[col], errors='coerce').astype(dtype)
|
228 |
+
else:
|
229 |
+
entries[col] = entries[col].astype(dtype)
|
230 |
+
except Exception as e:
|
231 |
+
print(f"Warning: Could not convert column {col} to {dtype} from HF Dataset. {e}")
|
232 |
+
else:
|
233 |
+
# If entries dataset is empty, ensure empty DF has schema
|
234 |
+
entries = pd.DataFrame({k: pd.Series(dtype=v) for k, v in ENTRY_SCHEMA.items()})
|
235 |
+
|
236 |
+
|
237 |
print("Successfully loaded data from HF Dataset.")
|
238 |
+
else:
|
239 |
+
raise ValueError("Dataset dictionary is empty or malformed (missing 'users' or 'entries').")
|
240 |
except Exception as e:
|
241 |
print(f"Could not load from HF Dataset '{HF_DATASET_REPO}'. Attempting to initialize. Error: {e}")
|
242 |
try:
|
243 |
+
# Define features including nullable types if possible, or rely on pandas conversion
|
244 |
user_features = Features({'username': Value('string'), 'password': Value('string')})
|
245 |
+
# Use float64 for nullable int in HF Features as a common workaround
|
246 |
+
entry_features = Features({
|
247 |
+
'post_id': Value('int64'),
|
248 |
+
'reply_to_id': Value('float64'), # HF datasets often use float for nullable int
|
249 |
+
'username': Value('string'),
|
250 |
+
'content': Value('string'),
|
251 |
+
'timestamp': Value('string'),
|
252 |
+
'type': Value('string')
|
253 |
+
})
|
254 |
+
|
255 |
initial_users_df = pd.DataFrame(list(users.items()), columns=['username', 'password'])
|
256 |
+
# Ensure initial empty entries DF conforms to the HF features expected types
|
257 |
+
initial_entries_df = pd.DataFrame({k: pd.Series(dtype='float64' if k in ['post_id', 'reply_to_id'] else 'object') for k in ENTRY_SCHEMA.keys()})
|
258 |
+
|
259 |
+
|
260 |
dataset_dict = DatasetDict({
|
261 |
'users': Dataset.from_pandas(initial_users_df, features=user_features),
|
262 |
+
'entries': Dataset.from_pandas(initial_entries_df, features=entry_features) # Use initial empty with HF types
|
|
|
263 |
})
|
264 |
dataset_dict.push_to_hub(HF_DATASET_REPO, token=HF_TOKEN, private=True)
|
265 |
print(f"Successfully initialized new empty HF Dataset at {HF_DATASET_REPO}.")
|
266 |
+
# After initializing, reset entries_df to pandas schema
|
267 |
+
entries = pd.DataFrame({k: pd.Series(dtype=v) for k, v in ENTRY_SCHEMA.items()})
|
268 |
except Exception as e_push:
|
269 |
print(f"CRITICAL: Failed to create new HF Dataset. Falling back to RAM. Push Error: {e_push}")
|
270 |
STORAGE_BACKEND_CONFIG = "RAM"
|
271 |
else:
|
272 |
print("HF_DATASET backend not fully configured. Falling back to RAM.")
|
273 |
STORAGE_BACKEND_CONFIG = "RAM"
|
274 |
+
else: # RAM backend or fallback
|
275 |
+
print("Using RAM backend.")
|
276 |
+
|
277 |
+
# Initialize global variables after loading/initializing
|
278 |
+
users_db = users
|
279 |
+
entries_df = entries
|
280 |
+
# Calculate the next post_id counter value
|
281 |
+
post_id_counter = int(entries_df['post_id'].max()) if not entries_df.empty and entries_df['post_id'].notna().any() else 0
|
282 |
+
|
283 |
+
print(f"Loaded data. Users: {len(users_db)}, Entries: {len(entries_df)}. Next Post ID: {post_id_counter + 1}")
|
284 |
+
|
285 |
|
286 |
+
# --- Load Data Initially ---
|
287 |
+
load_data()
|
|
|
|
|
|
|
|
|
288 |
|
289 |
+
# --- API Functions (adapted for unified structure) ---
|
290 |
|
291 |
def api_register(username, password):
|
292 |
if not username or not password: return "Failed: Username/password cannot be empty."
|
|
|
297 |
return f"Success: User '{username}' registered."
|
298 |
|
299 |
def api_login(username, password):
|
300 |
+
# Simulate authentication token (basic user:pass string)
|
301 |
+
# In a real app, use proper token/session management
|
302 |
return f"{username}:{password}" if users_db.get(username) == password else "Failed: Invalid credentials."
|
303 |
|
304 |
def _get_user_from_token(token):
|
305 |
if not token or ':' not in token: return None
|
306 |
user, pwd = token.split(':', 1)
|
307 |
+
with db_lock: # Access users_db requires lock
|
308 |
+
return user if users_db.get(user) == pwd else None
|
309 |
|
310 |
def api_create_post(auth_token, content):
|
311 |
+
"""Creates a top-level post entry."""
|
312 |
+
global entries_df, post_id_counter
|
313 |
username = _get_user_from_token(auth_token)
|
314 |
if not username: return "Failed: Invalid auth token."
|
315 |
+
if not content: return "Failed: Content cannot be empty."
|
316 |
+
|
317 |
with db_lock:
|
318 |
+
post_id_counter += 1
|
319 |
+
new_entry = pd.DataFrame([{
|
320 |
+
"post_id": post_id_counter,
|
321 |
+
"reply_to_id": pd.NA, # Use pandas NA for nullable integer
|
322 |
+
"username": username,
|
323 |
+
"content": content,
|
324 |
+
"timestamp": datetime.utcnow().isoformat(),
|
325 |
+
"type": "post"
|
326 |
+
}]).astype(ENTRY_SCHEMA) # Ensure correct dtypes
|
327 |
+
|
328 |
+
entries_df = pd.concat([entries_df, new_entry], ignore_index=True)
|
329 |
handle_persistence_after_change()
|
|
|
330 |
|
331 |
+
return f"Success: Post {post_id_counter} created."
|
332 |
+
|
333 |
+
def api_create_comment(auth_token, reply_to_id, content):
|
334 |
+
"""Creates a comment/reply entry."""
|
335 |
+
global entries_df, post_id_counter
|
336 |
username = _get_user_from_token(auth_token)
|
337 |
if not username: return "Failed: Invalid auth token."
|
338 |
+
if not content: return "Failed: Content cannot be empty."
|
339 |
+
if reply_to_id is None: return "Failed: Reply to ID cannot be empty for a comment/reply."
|
340 |
+
|
341 |
+
try:
|
342 |
+
reply_to_id = int(reply_to_id) # Ensure it's an integer
|
343 |
+
except (ValueError, TypeError):
|
344 |
+
return "Failed: Invalid Reply To ID."
|
345 |
+
|
346 |
with db_lock:
|
347 |
+
# Check if the entry being replied to exists
|
348 |
+
if reply_to_id not in entries_df['post_id'].values:
|
349 |
+
return f"Failed: Entry with ID {reply_to_id} not found."
|
350 |
+
|
351 |
+
post_id_counter += 1
|
352 |
+
new_entry = pd.DataFrame([{
|
353 |
+
"post_id": post_id_counter,
|
354 |
+
"reply_to_id": reply_to_id,
|
355 |
+
"username": username,
|
356 |
+
"content": content,
|
357 |
+
"timestamp": datetime.utcnow().isoformat(),
|
358 |
+
"type": "comment" # All replies are 'comment' type in this scheme
|
359 |
+
}]).astype(ENTRY_SCHEMA) # Ensure correct dtypes
|
360 |
+
|
361 |
+
entries_df = pd.concat([entries_df, new_entry], ignore_index=True)
|
362 |
handle_persistence_after_change()
|
363 |
+
|
364 |
+
return f"Success: Comment/Reply {post_id_counter} created (replying to {reply_to_id})."
|
365 |
+
|
366 |
|
367 |
def api_get_feed():
|
368 |
+
"""Retrieves all entries sorted by timestamp."""
|
369 |
with db_lock:
|
370 |
+
# Return a copy to prevent external modifications
|
371 |
+
feed_data = entries_df.copy()
|
372 |
+
|
373 |
+
if feed_data.empty:
|
374 |
+
# Return empty DataFrame with expected columns
|
375 |
+
return pd.DataFrame({k: pd.Series(dtype=v) for k, v in ENTRY_SCHEMA.items()})
|
376 |
+
|
377 |
+
# Ensure timestamp is datetime for sorting, handle potential errors
|
378 |
+
try:
|
379 |
+
feed_data['timestamp'] = pd.to_datetime(feed_data['timestamp'])
|
380 |
+
except Exception as e:
|
381 |
+
print(f"Warning: Could not convert timestamp column to datetime: {e}")
|
382 |
+
# If conversion fails, sort by post_id or keep unsorted as fallback
|
383 |
+
# Let's skip sorting by timestamp if conversion fails
|
384 |
+
pass
|
385 |
+
|
386 |
+
# Sort (prefer timestamp, fallback to post_id if timestamp fails or is identical)
|
387 |
+
if 'timestamp' in feed_data.columns and pd.api.types.is_datetime64_any_dtype(feed_data['timestamp']):
|
388 |
+
feed_data = feed_data.sort_values(by=['timestamp', 'post_id'], ascending=[False, False])
|
389 |
+
else:
|
390 |
+
feed_data = feed_data.sort_values(by='post_id', ascending=False)
|
391 |
+
|
392 |
+
# Select and rename/reorder columns for display if necessary
|
393 |
+
# The current schema matches well, just need to ensure all columns are present
|
394 |
+
display_columns = list(ENTRY_SCHEMA.keys()) # Use all columns in the schema
|
395 |
feed_data = feed_data.reindex(columns=display_columns)
|
396 |
+
|
397 |
+
# Fill NaN/NA for display purposes (optional, but can make table cleaner)
|
398 |
+
# Convert nullable Int64 NA to empty string or specific placeholder for display
|
399 |
+
for col in ['post_id', 'reply_to_id']:
|
400 |
+
if col in feed_data.columns:
|
401 |
+
feed_data[col] = feed_data[col].apply(lambda x: '' if pd.isna(x) else int(x)) # Display int without .0
|
402 |
+
|
403 |
+
return feed_data
|
404 |
+
|
405 |
+
# --- UI Functions (adapted for unified structure) ---
|
406 |
|
407 |
def ui_manual_post(username, password, content):
|
408 |
auth_token = api_login(username, password)
|
409 |
if "Failed" in auth_token: return "Login failed.", api_get_feed()
|
410 |
return api_create_post(auth_token, content), api_get_feed()
|
411 |
|
412 |
+
def ui_manual_comment(username, password, reply_to_id, content):
|
413 |
auth_token = api_login(username, password)
|
414 |
if "Failed" in auth_token: return "Login failed.", api_get_feed()
|
415 |
+
return api_create_comment(auth_token, reply_to_id, content), api_get_feed()
|
416 |
|
417 |
def ui_save_to_json():
|
418 |
+
# Call the general persistence function targeting JSON
|
419 |
+
success, message = force_persist_data()
|
420 |
+
# Modify message to indicate JSON specifically if needed, or keep general
|
421 |
+
if "Successfully saved to JSON file." in message:
|
422 |
+
return f"Successfully saved current state to {DB_FILE_JSON}."
|
423 |
+
else:
|
424 |
+
return message # Return the error message from persistence
|
425 |
+
|
426 |
+
# --- Gradio UI ---
|
427 |
|
428 |
with gr.Blocks(theme=gr.themes.Soft(), title="Social App") as demo:
|
429 |
gr.Markdown("# Social Media Server for iLearn Agent")
|
430 |
gr.Markdown(f"This app provides an API for iLearn agents to interact with. **Storage Backend: `{STORAGE_BACKEND_CONFIG}`**")
|
431 |
+
|
432 |
with gr.Tabs():
|
433 |
with gr.TabItem("Live Feed"):
|
434 |
+
# Define DataFrame columns based on the new schema
|
435 |
+
feed_columns = [(col, "number" if "id" in col else "text") for col in ENTRY_SCHEMA.keys()]
|
436 |
+
feed_df_display = gr.DataFrame(label="Feed", interactive=False, wrap=True, headers=list(ENTRY_SCHEMA.keys()))
|
437 |
refresh_btn = gr.Button("Refresh Feed")
|
438 |
+
|
439 |
with gr.TabItem("Manual Actions"):
|
440 |
manual_action_status = gr.Textbox(label="Action Status", interactive=False)
|
441 |
with gr.Row():
|
|
|
449 |
gr.Markdown("### Create Comment / Reply")
|
450 |
comment_user = gr.Textbox(label="User", value="admin")
|
451 |
comment_pass = gr.Textbox(label="Pass", type="password", value="password")
|
452 |
+
# Updated UI field for the single Reply To ID
|
453 |
+
comment_reply_to_id = gr.Number(label="Reply To Entry ID (Post or Comment ID)", precision=0) # precision=0 for integer input
|
454 |
comment_content = gr.Textbox(label="Content", lines=2)
|
455 |
comment_button = gr.Button("Submit Comment", variant="primary")
|
456 |
with gr.Group():
|
457 |
gr.Markdown("### Data Management")
|
458 |
+
save_json_button = gr.Button("Save Current State to JSON") # Button label kept simple, func calls general persistence
|
|
|
459 |
|
460 |
+
# --- UI Actions ---
|
461 |
+
# Post button now calls ui_manual_post which calls api_create_post
|
462 |
post_button.click(ui_manual_post, [post_user, post_pass, post_content], [manual_action_status, feed_df_display])
|
463 |
+
# Comment button calls ui_manual_comment with the single reply_to_id field
|
464 |
+
comment_button.click(ui_manual_comment, [comment_user, comment_pass, comment_reply_to_id, comment_content], [manual_action_status, feed_df_display])
|
465 |
save_json_button.click(ui_save_to_json, None, [manual_action_status])
|
466 |
refresh_btn.click(api_get_feed, None, feed_df_display)
|
467 |
+
|
468 |
+
# Load feed on startup
|
469 |
demo.load(api_get_feed, None, feed_df_display)
|
470 |
|
471 |
+
# --- Gradio API Endpoints (adapted for unified structure) ---
|
472 |
+
# Ensure API names match the expected iLearn agent interactions
|
473 |
+
with gr.Column(visible=False): # Hide API interfaces in the main UI
|
474 |
gr.Interface(api_register, ["text", "text"], "text", api_name="register")
|
475 |
gr.Interface(api_login, ["text", "text"], "text", api_name="login")
|
476 |
+
# api_create_post: token, content
|
477 |
gr.Interface(api_create_post, ["text", "text"], "text", api_name="create_post")
|
478 |
+
# api_create_comment: token, reply_to_id, content
|
479 |
+
# Note: Gradio interface infers types; Number will be float unless precision=0 and converted
|
480 |
+
gr.Interface(api_create_comment, ["text", "number", "text"], "text", api_name="create_comment")
|
481 |
+
# api_get_feed: no input, returns dataframe
|
482 |
gr.Interface(api_get_feed, None, "dataframe", api_name="get_feed")
|
483 |
|
484 |
+
|
485 |
if __name__ == "__main__":
|
486 |
+
# Ensure initial persistence happens on first run if not loading data
|
487 |
+
if not os.path.exists(DB_FILE_JSON) and not os.path.exists(DB_FILE_SQLITE) and STORAGE_BACKEND_CONFIG != "HF_DATASET":
|
488 |
+
print("No existing data files found. Performing initial save.")
|
489 |
+
force_persist_data() # Persist the initial admin user and empty tables
|
490 |
+
|
491 |
demo.queue().launch(server_name="0.0.0.0", server_port=7860, share=False)
|