phi4training / app.py
George-API's picture
Upload folder using huggingface_hub
a57357b verified
raw
history blame
6.27 kB
import gradio as gr
import os
import subprocess
import sys
import json
import re
from threading import Thread
import datetime
import torch
import threading
def load_env_variables():
"""Load environment variables from system or .env file."""
if os.environ.get("SPACE_ID"):
print("Running in Hugging Face Space")
if "/" in os.environ.get("SPACE_ID", ""):
username = os.environ.get("SPACE_ID").split("/")[0]
os.environ["HF_USERNAME"] = username
print(f"Set HF_USERNAME from SPACE_ID: {username}")
else:
try:
from dotenv import load_dotenv
env_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), ".env")
if os.path.exists(env_path):
load_dotenv(env_path)
print(f"Loaded environment variables from {env_path}")
except ImportError:
print("python-dotenv not installed, skipping .env loading")
def check_environment():
"""Check the environment for GPU availability and other requirements."""
env_info = {
"System": {
"Platform": sys.platform,
"Python Version": sys.version.split()[0]
},
"GPU": {
"CUDA Available": torch.cuda.is_available(),
"Device Count": torch.cuda.device_count() if torch.cuda.is_available() else 0
},
"Environment Variables": {
"HF_TOKEN": bool(os.environ.get("HF_TOKEN")),
"HF_USERNAME": bool(os.environ.get("HF_USERNAME")),
"HF_SPACE_NAME": bool(os.environ.get("HF_SPACE_NAME"))
}
}
if torch.cuda.is_available():
env_info["GPU"]["Device Name"] = torch.cuda.get_device_name(0)
env_info["GPU"]["Memory (GB)"] = round(torch.cuda.get_device_properties(0).total_memory / (1024**3), 2)
return env_info
def run_training_process():
"""Run the training process using the configuration files."""
try:
current_dir = os.path.dirname(os.path.abspath(__file__))
training_script = os.path.join(current_dir, "run_transformers_training.py")
# Start the training process
process = subprocess.Popen(
[sys.executable, training_script],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
bufsize=1
)
# Process the output line by line
for line in process.stdout:
print(line.strip())
process.wait()
return process.returncode
except Exception as e:
print(f"Error in training process: {e}")
return 1
def start_training(learning_rate, num_train_epochs, per_device_train_batch_size,
gradient_accumulation_steps):
"""Start the training process with the specified parameters."""
try:
load_env_variables()
current_dir = os.path.dirname(os.path.abspath(__file__))
# Load and update transformers config
with open(os.path.join(current_dir, "transformers_config.json"), "r") as f:
config = json.load(f)
# Update training parameters
config["training"].update({
"num_train_epochs": num_train_epochs,
"learning_rate": learning_rate,
"per_device_train_batch_size": per_device_train_batch_size,
"gradient_accumulation_steps": gradient_accumulation_steps
})
# Update hub settings if username is available
if os.environ.get("HF_USERNAME"):
config["huggingface_hub"].update({
"hub_model_id": f"{os.environ['HF_USERNAME']}/Phi4-Cognitive-Science"
})
# Save updated config
with open(os.path.join(current_dir, "transformers_config.json"), "w") as f:
json.dump(config, f, indent=4)
# Start training in a separate thread
thread = threading.Thread(target=run_training_process)
thread.daemon = True
thread.start()
return "Training started! Check the Hugging Face Space logs for progress."
except Exception as e:
return f"Error starting training: {str(e)}"
with gr.Blocks(title="Phi-4 Training Interface") as demo:
gr.Markdown("# Phi-4 Unsupervised Training for Cognitive Science")
with gr.Tab("Training"):
with gr.Row():
with gr.Column():
gr.Markdown("## Model Configuration")
gr.Markdown("**Model**: unsloth/phi-4-unsloth-bnb-4bit")
gr.Markdown("**Dataset**: George-API/cognitive-data")
gr.Markdown("## Training Parameters")
learning_rate = gr.Slider(minimum=1e-6, maximum=1e-4, value=2e-5, step=1e-6,
label="Learning Rate")
num_train_epochs = gr.Slider(minimum=1, maximum=5, value=3, step=1,
label="Number of Epochs")
per_device_train_batch_size = gr.Slider(minimum=4, maximum=24, value=12, step=4,
label="Per Device Train Batch Size (Unsloth Optimized)")
gradient_accumulation_steps = gr.Slider(minimum=1, maximum=8, value=4, step=1,
label="Gradient Accumulation Steps")
start_btn = gr.Button("Start Training", variant="primary")
training_output = gr.Textbox(label="Training Output", interactive=False)
with gr.Tab("Environment"):
with gr.Row():
with gr.Column():
gr.Markdown("## Environment Information")
env_info = gr.JSON(label="Environment Info")
check_env_btn = gr.Button("Check Environment")
# Set up event handlers
start_btn.click(
fn=start_training,
inputs=[learning_rate, num_train_epochs, per_device_train_batch_size, gradient_accumulation_steps],
outputs=training_output
)
check_env_btn.click(
fn=check_environment,
inputs=[],
outputs=env_info
)
if __name__ == "__main__":
load_env_variables()
demo.launch()