Spaces:
Sleeping
Sleeping
import gradio as gr | |
import os | |
import subprocess | |
import sys | |
import json | |
import re | |
from threading import Thread | |
import datetime | |
import torch | |
import threading | |
def load_env_variables(): | |
"""Load environment variables from system or .env file.""" | |
if os.environ.get("SPACE_ID"): | |
print("Running in Hugging Face Space") | |
if "/" in os.environ.get("SPACE_ID", ""): | |
username = os.environ.get("SPACE_ID").split("/")[0] | |
os.environ["HF_USERNAME"] = username | |
print(f"Set HF_USERNAME from SPACE_ID: {username}") | |
else: | |
try: | |
from dotenv import load_dotenv | |
env_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), ".env") | |
if os.path.exists(env_path): | |
load_dotenv(env_path) | |
print(f"Loaded environment variables from {env_path}") | |
except ImportError: | |
print("python-dotenv not installed, skipping .env loading") | |
def check_environment(): | |
"""Check the environment for GPU availability and other requirements.""" | |
env_info = { | |
"System": { | |
"Platform": sys.platform, | |
"Python Version": sys.version.split()[0] | |
}, | |
"GPU": { | |
"CUDA Available": torch.cuda.is_available(), | |
"Device Count": torch.cuda.device_count() if torch.cuda.is_available() else 0 | |
}, | |
"Environment Variables": { | |
"HF_TOKEN": bool(os.environ.get("HF_TOKEN")), | |
"HF_USERNAME": bool(os.environ.get("HF_USERNAME")), | |
"HF_SPACE_NAME": bool(os.environ.get("HF_SPACE_NAME")) | |
} | |
} | |
if torch.cuda.is_available(): | |
env_info["GPU"]["Device Name"] = torch.cuda.get_device_name(0) | |
env_info["GPU"]["Memory (GB)"] = round(torch.cuda.get_device_properties(0).total_memory / (1024**3), 2) | |
return env_info | |
def run_training_process(): | |
"""Run the training process using the configuration files.""" | |
try: | |
current_dir = os.path.dirname(os.path.abspath(__file__)) | |
training_script = os.path.join(current_dir, "run_transformers_training.py") | |
# Start the training process | |
process = subprocess.Popen( | |
[sys.executable, training_script], | |
stdout=subprocess.PIPE, | |
stderr=subprocess.STDOUT, | |
text=True, | |
bufsize=1 | |
) | |
# Process the output line by line | |
for line in process.stdout: | |
print(line.strip()) | |
process.wait() | |
return process.returncode | |
except Exception as e: | |
print(f"Error in training process: {e}") | |
return 1 | |
def start_training(learning_rate, num_train_epochs, per_device_train_batch_size, | |
gradient_accumulation_steps): | |
"""Start the training process with the specified parameters.""" | |
try: | |
load_env_variables() | |
current_dir = os.path.dirname(os.path.abspath(__file__)) | |
# Load and update transformers config | |
with open(os.path.join(current_dir, "transformers_config.json"), "r") as f: | |
config = json.load(f) | |
# Update training parameters | |
config["training"].update({ | |
"num_train_epochs": num_train_epochs, | |
"learning_rate": learning_rate, | |
"per_device_train_batch_size": per_device_train_batch_size, | |
"gradient_accumulation_steps": gradient_accumulation_steps | |
}) | |
# Update hub settings if username is available | |
if os.environ.get("HF_USERNAME"): | |
config["huggingface_hub"].update({ | |
"hub_model_id": f"{os.environ['HF_USERNAME']}/Phi4-Cognitive-Science" | |
}) | |
# Save updated config | |
with open(os.path.join(current_dir, "transformers_config.json"), "w") as f: | |
json.dump(config, f, indent=4) | |
# Start training in a separate thread | |
thread = threading.Thread(target=run_training_process) | |
thread.daemon = True | |
thread.start() | |
return "Training started! Check the Hugging Face Space logs for progress." | |
except Exception as e: | |
return f"Error starting training: {str(e)}" | |
with gr.Blocks(title="Phi-4 Training Interface") as demo: | |
gr.Markdown("# Phi-4 Unsupervised Training for Cognitive Science") | |
with gr.Tab("Training"): | |
with gr.Row(): | |
with gr.Column(): | |
gr.Markdown("## Model Configuration") | |
gr.Markdown("**Model**: unsloth/phi-4-unsloth-bnb-4bit") | |
gr.Markdown("**Dataset**: George-API/cognitive-data") | |
gr.Markdown("## Training Parameters") | |
learning_rate = gr.Slider(minimum=1e-6, maximum=1e-4, value=2e-5, step=1e-6, | |
label="Learning Rate") | |
num_train_epochs = gr.Slider(minimum=1, maximum=5, value=3, step=1, | |
label="Number of Epochs") | |
per_device_train_batch_size = gr.Slider(minimum=4, maximum=24, value=12, step=4, | |
label="Per Device Train Batch Size (Unsloth Optimized)") | |
gradient_accumulation_steps = gr.Slider(minimum=1, maximum=8, value=4, step=1, | |
label="Gradient Accumulation Steps") | |
start_btn = gr.Button("Start Training", variant="primary") | |
training_output = gr.Textbox(label="Training Output", interactive=False) | |
with gr.Tab("Environment"): | |
with gr.Row(): | |
with gr.Column(): | |
gr.Markdown("## Environment Information") | |
env_info = gr.JSON(label="Environment Info") | |
check_env_btn = gr.Button("Check Environment") | |
# Set up event handlers | |
start_btn.click( | |
fn=start_training, | |
inputs=[learning_rate, num_train_epochs, per_device_train_batch_size, gradient_accumulation_steps], | |
outputs=training_output | |
) | |
check_env_btn.click( | |
fn=check_environment, | |
inputs=[], | |
outputs=env_info | |
) | |
if __name__ == "__main__": | |
load_env_variables() | |
demo.launch() | |