hf-train-frontend

Runtime error

File size: 12,559 Bytes

#!/usr/bin/env python

"""
Quick script to update your Hugging Face Space for phi-4-unsloth-bnb-4bit training.
This script handles the specific requirements for the 4-bit quantized Phi-4 model training,
including proper configuration and dependency management.
"""

import os
import sys
import json
import subprocess
import argparse
import logging
from pathlib import Path
from huggingface_hub import HfApi, login
import getpass

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
    handlers=[logging.StreamHandler(sys.stdout)]
)
logger = logging.getLogger(__name__)

def load_env_variables():
    """Load environment variables from system or .env file."""
    # Define default values that should be used
    required_vars = {
        "HF_USERNAME": os.environ.get("HF_USERNAME", "George-API"),
        "HF_SPACE_NAME": "phi4training"  # Hardcode the correct space name
    }
    
    # First try to load from local .env file
    try:
        from dotenv import load_dotenv
        env_path = Path(__file__).parent / ".env"
        if env_path.exists():
            # Load and explicitly set environment variables
            with open(env_path) as f:
                for line in f:
                    if line.strip() and not line.startswith('#'):
                        key, value = line.strip().split('=', 1)
                        os.environ[key] = value.strip()
            logger.info(f"Loaded environment variables from {env_path}")
        else:
            logger.warning(f"No .env file found at {env_path}")
    except ImportError:
        logger.warning("python-dotenv not installed, skipping .env loading")
    
    # Check if we're running in a Hugging Face Space
    if os.environ.get("SPACE_ID"):
        logger.info("Running in Hugging Face Space")
        if "/" in os.environ.get("SPACE_ID", ""):
            username = os.environ.get("SPACE_ID").split("/")[0]
            os.environ["HF_USERNAME"] = username
            logger.info(f"Set HF_USERNAME from SPACE_ID: {username}")
    
    # Always ensure we have the required variables
    # And override HF_SPACE_NAME to ensure we use phi4training
    result = {
        "HF_TOKEN": os.environ.get("HF_TOKEN", ""),
        "HF_USERNAME": os.environ.get("HF_USERNAME", required_vars["HF_USERNAME"]),
        "HF_SPACE_NAME": required_vars["HF_SPACE_NAME"]  # Always use phi4training
    }
    
    # Ensure the space name is set correctly in environment
    os.environ["HF_SPACE_NAME"] = required_vars["HF_SPACE_NAME"]
    
    logger.info(f"Using environment variables: USERNAME={result['HF_USERNAME']}, SPACE_NAME={result['HF_SPACE_NAME']}")
    return result

def verify_configs():
    """Verify that all necessary configuration files exist and are valid."""
    current_dir = Path(__file__).parent
    required_files = [
        "transformers_config.json",
        "requirements.txt",
        "run_transformers_training.py"
    ]
    
    missing_files = []
    for file in required_files:
        if not (current_dir / file).exists():
            missing_files.append(file)
    
    if missing_files:
        raise FileNotFoundError(f"Missing required files: {', '.join(missing_files)}")
    
    # Verify JSON configs
    json_files = [f for f in required_files if f.endswith('.json')]
    for json_file in json_files:
        try:
            with open(current_dir / json_file) as f:
                json.load(f)
            logger.info(f"Verified {json_file} is valid JSON")
        except json.JSONDecodeError as e:
            raise ValueError(f"Invalid JSON in {json_file}: {e}")

def update_requirements():
    """Update consolidated requirements.txt with all necessary packages in the correct order."""
    logger.info("Setting up consolidated requirements file...")
    current_dir = Path(__file__).parent
    req_path = current_dir / "requirements.txt"
    
    # All required packages in the correct installation order
    required_packages = [
        # Base requirements (install first)
        "torch>=2.0.0",
        "accelerate>=0.27.0",
        "bitsandbytes>=0.41.0", 
        "transformers>=4.36.0",
        "datasets>=2.15.0",
        "huggingface-hub>=0.19.0",
        "tensorboard>=2.15.0",
        
        # Main requirements (install second)
        "einops>=0.7.0",
        "filelock>=3.13.1",
        "gradio>=5.17.0",
        "matplotlib>=3.7.0",
        "numpy>=1.24.0",
        "packaging>=23.0",
        "peft>=0.9.0",
        "psutil>=5.9.0",
        "python-dotenv>=1.0.0",
        "pyyaml>=6.0.1",
        "regex>=2023.0.0",
        "requests>=2.31.0",
        "safetensors>=0.4.1",
        "sentencepiece>=0.1.99",
        "tqdm>=4.65.0",
        "typing-extensions>=4.8.0",
        "unsloth>=2024.3"
    ]
    
    # Optional packages (commented out by default)
    optional_packages = [
        "flash-attn==2.5.2"
    ]
    
    # Create consolidated requirements file
    with open(req_path, 'w') as f:
        f.write("# BASE REQUIREMENTS - Install these critical dependencies first\n")
        f.write("# ---------------------------------------------------------------------\n")
        
        # Write base dependencies first
        for i, req in enumerate(required_packages):
            if i == 7:  # After base requirements
                f.write("\n# MAIN REQUIREMENTS - Install these after base dependencies\n")
                f.write("# ---------------------------------------------------------------------\n")
            f.write(f"{req}\n")
        
        # Add optional dependencies section
        f.write("\n# OPTIONAL DEPENDENCIES - Install these last (if needed)\n")
        f.write("# ---------------------------------------------------------------------\n")
        for opt_pkg in optional_packages:
            f.write(f"# {opt_pkg}\n")
    
    logger.info(f"Updated consolidated requirements file at {req_path}")
    logger.info("Requirements are ordered for proper dependency installation")
    
    # Remove old requirements files if they exist
    old_files = ["requirements-base.txt", "requirements-flash.txt"]
    for old_file in old_files:
        old_path = current_dir / old_file
        if old_path.exists():
            old_path.unlink()
            logger.info(f"Removed old requirements file: {old_file}")
    
    return True

def create_space(username, space_name):
    """Create or get a Hugging Face Space."""
    # Override with the correct values regardless of what's passed
    username = "George-API"
    space_name = "phi4training"
    
    try:
        api = HfApi()
        space_id = f"{username}/{space_name}"
        logger.info(f"Checking Space {space_id}...")
        
        # First try to get the space
        try:
            space_info = api.space_info(repo_id=space_id)
            logger.info(f"Space {space_id} already exists")
            return space_info
        except Exception:
            logger.info(f"Space {space_id} does not exist, creating new space...")
            
            # Create new space
            api.create_repo(
                repo_id=space_id,
                private=False,
                repo_type="space",
                space_sdk="gradio"
            )
            logger.info(f"Created new space: {space_id}")
            return api.space_info(repo_id=space_id)
    except Exception as e:
        logger.error(f"Failed to create space: {str(e)}")
        
        # Don't proceed if we can't create/access the space
        raise RuntimeError(f"Error with Space {space_id}: {str(e)}")

def main():
    """Main function to update the Space."""
    try:
        # Parse command line arguments
        parser = argparse.ArgumentParser(description='Update Hugging Face Space for Phi-4 training')
        parser.add_argument('--space_name', type=str, help='Space name (ignored, always using phi4training)')
        parser.add_argument('--force', action='store_true', help='Skip confirmation when updating Space')
        args = parser.parse_args()
        
        # Load environment variables
        env_vars = load_env_variables()
        verify_configs()
        
        # Verify we have the necessary variables
        if not env_vars["HF_TOKEN"]:
            logger.error("Missing HF_TOKEN. Please set it in your .env file or environment variables.")
            return False
            
        logger.info(f"Environment variables loaded: USERNAME={env_vars['HF_USERNAME']}, SPACE_NAME={env_vars['HF_SPACE_NAME']}")
        
        # Ask for confirmation unless forced
        if not args.force:
            print("\nWARNING: Updating the Space will INTERRUPT any ongoing training!")
            confirm = input("Are you sure you want to update the Space? Type 'yes' to confirm: ")
            if confirm.lower() != 'yes':
                logger.info("Update cancelled by user")
                return False
            
            # Additional password check for safety
            password = getpass.getpass("Enter your password to confirm update: ")
            if password.strip() == "":
                logger.info("No password entered. Update cancelled.")
                return False
        else:
            logger.info("Skipping confirmation due to --force flag")
        
        # Update requirements
        update_requirements()
        logger.info("Requirements updated successfully")
        
        # Always use phi4training as the space name regardless of arguments
        space_name = "phi4training"
        logger.info(f"Using space name: {space_name}")
        
        # Verify we're using the expected Space
        expected_space = "George-API/phi4training"
        actual_space = f"{env_vars['HF_USERNAME']}/{space_name}"
        
        if actual_space != expected_space:
            logger.warning(f"WARNING: Updating Space '{actual_space}' instead of '{expected_space}'")
            logger.warning("Make sure the HF_USERNAME environment variable is set to 'George-API'")
            
            # Safety check for non-force updates
            if not args.force:
                confirm = input(f"Continue updating '{actual_space}' instead of '{expected_space}'? (yes/no): ")
                if confirm.lower() != "yes":
                    logger.info("Update cancelled by user")
                    return False
        else:
            logger.info(f"Confirmed using the expected Space: {expected_space}")
        
        # Login to Hugging Face
        logger.info("Logging in to Hugging Face...")
        try:
            login(token=env_vars["HF_TOKEN"])
            logger.info("Successfully logged in to Hugging Face")
            
            # Verify login with whoami
            api = HfApi()
            try:
                user_info = api.whoami()
                logger.info(f"Authenticated as: {user_info['name']}")
            except Exception as e:
                logger.error(f"Authentication verification failed: {str(e)}")
                logger.error("Your HF_TOKEN may be invalid or expired.")
                return False
        except Exception as e:
            logger.error(f"Login failed: {str(e)}")
            logger.error("Make sure your HF_TOKEN is valid and not expired.")
            return False
        
        # Create/get space
        space_info = create_space(env_vars["HF_USERNAME"], space_name)
        logger.info(f"Space info: {space_info}")
        
        # Upload files
        current_dir = Path(__file__).parent
        logger.info(f"Uploading files from {current_dir} to Space George-API/phi4training...")
        
        # Create .gitignore
        with open(current_dir / ".gitignore", "w") as f:
            f.write(".env\n*.pyc\n__pycache__\n")
        logger.info("Created .gitignore file")
        
        api = HfApi()
        api.upload_folder(
            folder_path=str(current_dir),
            repo_id="George-API/phi4training",  # Hardcoded repo ID
            repo_type="space",
            ignore_patterns=[".env", "*.pyc", "__pycache__", "TRAINING_IN_PROGRESS.lock"]
        )
        
        logger.info(f"Files uploaded successfully")
        space_url = "https://huggingface.co/spaces/George-API/phi4training"
        logger.info(f"Space URL: {space_url}")
        print(f"\nSpace created successfully! You can view it at:\n{space_url}")
        return True
        
    except Exception as e:
        logger.error(f"Error updating Space: {str(e)}")
        return False

if __name__ == "__main__":
    success = main()
    sys.exit(0 if success else 1)