Spaces:

George-API
/

qwen4bit

Running

File size: 5,549 Bytes

import gradio as gr
import os
import json
import torch
import subprocess
from dotenv import load_dotenv
import logging

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler(),
        logging.FileHandler("app.log")
    ]
)
logger = logging.getLogger(__name__)

# Load environment variables
load_dotenv()

# Load config file
def load_config(config_path="transformers_config.json"):
    try:
        with open(config_path, 'r') as f:
            config = json.load(f)
        return config
    except Exception as e:
        logger.error(f"Error loading config: {str(e)}")
        return {}

# Load configuration
config = load_config()
model_config = config.get("model_config", {})

# Model details from config
MODEL_NAME = model_config.get("model_name_or_path", "unsloth/DeepSeek-R1-Distill-Qwen-14B-bnb-4bit")
SPACE_NAME = os.getenv("HF_SPACE_NAME", "phi4training")
TRAINING_ACTIVE = os.path.exists("TRAINING_ACTIVE")

# Function to start the training process
def start_training():
    try:
        # Create TRAINING_ACTIVE file
        with open("TRAINING_ACTIVE", "w") as f:
            f.write("Training in progress")
            
        # Run the training script in the background
        subprocess.Popen(["python", "run_cloud_training.py"], 
                        stdout=subprocess.PIPE, 
                        stderr=subprocess.PIPE)
        
        return "✅ Training started! Check status below for updates."
    except Exception as e:
        logger.error(f"Error starting training: {str(e)}")
        return f"❌ Error starting training: {str(e)}"

# Create Gradio interface - training status only, no model outputs
with gr.Blocks(css="footer {visibility: hidden}") as demo:
    gr.Markdown(f"# {SPACE_NAME}: Training Status Dashboard")
    
    with gr.Row():
        with gr.Column():
            status = gr.Markdown(
                f"""
                ## Research Training Phase Active
                
                **Model**: {MODEL_NAME}
                **Dataset**: phi4-cognitive-dataset
                
                This is a multidisciplinary research training phase. The model is not available for interactive use.
                
                ### Training Configuration:
                - **Epochs**: {config.get("training_config", {}).get("num_train_epochs", 3)}
                - **Batch Size**: {config.get("training_config", {}).get("per_device_train_batch_size", 2)}
                - **Gradient Accumulation Steps**: {config.get("training_config", {}).get("gradient_accumulation_steps", 4)}
                - **Learning Rate**: {config.get("training_config", {}).get("learning_rate", 2e-5)}
                - **Max Sequence Length**: {config.get("training_config", {}).get("max_seq_length", 2048)}
                
                ### Training Status:
                {"🟢 Training in progress" if TRAINING_ACTIVE else "⚪ Training not currently active"}
                
                ⚠️ **NOTE**: This space does not provide model outputs during the research training phase.
                """
            )
    
    with gr.Row():
        # Add buttons for starting training and refreshing status
        start_btn = gr.Button("Start Training", variant="primary")
        refresh_btn = gr.Button("Refresh Status")
        
    # Output area for training start messages
    training_output = gr.Markdown("")
    
    def refresh_status():
        # Re-check if training is active
        training_active = os.path.exists("TRAINING_ACTIVE")
        return f"""
        ## Research Training Phase Active
        
        **Model**: {MODEL_NAME}
        **Dataset**: phi4-cognitive-dataset
        
        This is a multidisciplinary research training phase. The model is not available for interactive use.
        
        ### Training Configuration:
        - **Epochs**: {config.get("training_config", {}).get("num_train_epochs", 3)}
        - **Batch Size**: {config.get("training_config", {}).get("per_device_train_batch_size", 2)}
        - **Gradient Accumulation Steps**: {config.get("training_config", {}).get("gradient_accumulation_steps", 4)}
        - **Learning Rate**: {config.get("training_config", {}).get("learning_rate", 2e-5)}
        - **Max Sequence Length**: {config.get("training_config", {}).get("max_seq_length", 2048)}
        
        ### Training Status:
        {"🟢 Training in progress" if training_active else "⚪ Training not currently active"}
        
        ⚠️ **NOTE**: This space does not provide model outputs during the research training phase.
        """
    
    # Connect button clicks to functions
    start_btn.click(start_training, outputs=training_output)
    refresh_btn.click(refresh_status, outputs=status)
    
    gr.Markdown("""
    ### Research Training Information
    This model is being fine-tuned on research-focused datasets and is not available for interactive querying.
    Training logs are available to authorized researchers only.
    
    ### Instructions
    1. Click "Start Training" to begin the fine-tuning process
    2. Use "Refresh Status" to check training progress
    3. Training logs are saved to the output directory
    """)

# Launch the interface
if __name__ == "__main__":
    # Start Gradio with minimal features
    logger.info("Starting training status dashboard")
    demo.launch(share=False)  # Removed enable_queue parameter which is no longer supported in Gradio 5.x