#!/usr/bin/env python """ Quick script to update your Hugging Face Space for phi-4-unsloth-bnb-4bit training. This script handles the specific requirements for the 4-bit quantized Phi-4 model training, including proper configuration and dependency management. """ import os import sys import json import subprocess import argparse import logging from pathlib import Path from huggingface_hub import HfApi, login import getpass # Configure logging logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", handlers=[logging.StreamHandler(sys.stdout)] ) logger = logging.getLogger(__name__) def load_env_variables(): """Load environment variables from system or .env file.""" # Define default values that should be used required_vars = { "HF_USERNAME": os.environ.get("HF_USERNAME", "George-API"), "HF_SPACE_NAME": "phi4training" # Hardcode the correct space name } # First try to load from local .env file try: from dotenv import load_dotenv env_path = Path(__file__).parent / ".env" if env_path.exists(): # Load and explicitly set environment variables with open(env_path) as f: for line in f: if line.strip() and not line.startswith('#'): key, value = line.strip().split('=', 1) os.environ[key] = value.strip() logger.info(f"Loaded environment variables from {env_path}") else: logger.warning(f"No .env file found at {env_path}") except ImportError: logger.warning("python-dotenv not installed, skipping .env loading") # Check if we're running in a Hugging Face Space if os.environ.get("SPACE_ID"): logger.info("Running in Hugging Face Space") if "/" in os.environ.get("SPACE_ID", ""): username = os.environ.get("SPACE_ID").split("/")[0] os.environ["HF_USERNAME"] = username logger.info(f"Set HF_USERNAME from SPACE_ID: {username}") # Always ensure we have the required variables # And override HF_SPACE_NAME to ensure we use phi4training result = { "HF_TOKEN": os.environ.get("HF_TOKEN", ""), "HF_USERNAME": os.environ.get("HF_USERNAME", required_vars["HF_USERNAME"]), "HF_SPACE_NAME": required_vars["HF_SPACE_NAME"] # Always use phi4training } # Ensure the space name is set correctly in environment os.environ["HF_SPACE_NAME"] = required_vars["HF_SPACE_NAME"] logger.info(f"Using environment variables: USERNAME={result['HF_USERNAME']}, SPACE_NAME={result['HF_SPACE_NAME']}") return result def verify_configs(): """Verify that all necessary configuration files exist and are valid.""" current_dir = Path(__file__).parent required_files = [ "transformers_config.json", "requirements.txt", "run_transformers_training.py" ] missing_files = [] for file in required_files: if not (current_dir / file).exists(): missing_files.append(file) if missing_files: raise FileNotFoundError(f"Missing required files: {', '.join(missing_files)}") # Verify JSON configs json_files = [f for f in required_files if f.endswith('.json')] for json_file in json_files: try: with open(current_dir / json_file) as f: json.load(f) logger.info(f"Verified {json_file} is valid JSON") except json.JSONDecodeError as e: raise ValueError(f"Invalid JSON in {json_file}: {e}") def update_requirements(): """Update consolidated requirements.txt with all necessary packages in the correct order.""" logger.info("Setting up consolidated requirements file...") current_dir = Path(__file__).parent req_path = current_dir / "requirements.txt" # All required packages in the correct installation order required_packages = [ # Base requirements (install first) "torch>=2.0.0", "accelerate>=0.27.0", "bitsandbytes>=0.41.0", "transformers>=4.36.0", "datasets>=2.15.0", "huggingface-hub>=0.19.0", "tensorboard>=2.15.0", # Main requirements (install second) "einops>=0.7.0", "filelock>=3.13.1", "gradio>=5.17.0", "matplotlib>=3.7.0", "numpy>=1.24.0", "packaging>=23.0", "peft>=0.9.0", "psutil>=5.9.0", "python-dotenv>=1.0.0", "pyyaml>=6.0.1", "regex>=2023.0.0", "requests>=2.31.0", "safetensors>=0.4.1", "sentencepiece>=0.1.99", "tqdm>=4.65.0", "typing-extensions>=4.8.0", "unsloth>=2024.3" ] # Optional packages (commented out by default) optional_packages = [ "flash-attn==2.5.2" ] # Create consolidated requirements file with open(req_path, 'w') as f: f.write("# BASE REQUIREMENTS - Install these critical dependencies first\n") f.write("# ---------------------------------------------------------------------\n") # Write base dependencies first for i, req in enumerate(required_packages): if i == 7: # After base requirements f.write("\n# MAIN REQUIREMENTS - Install these after base dependencies\n") f.write("# ---------------------------------------------------------------------\n") f.write(f"{req}\n") # Add optional dependencies section f.write("\n# OPTIONAL DEPENDENCIES - Install these last (if needed)\n") f.write("# ---------------------------------------------------------------------\n") for opt_pkg in optional_packages: f.write(f"# {opt_pkg}\n") logger.info(f"Updated consolidated requirements file at {req_path}") logger.info("Requirements are ordered for proper dependency installation") # Remove old requirements files if they exist old_files = ["requirements-base.txt", "requirements-flash.txt"] for old_file in old_files: old_path = current_dir / old_file if old_path.exists(): old_path.unlink() logger.info(f"Removed old requirements file: {old_file}") return True def create_space(username, space_name): """Create or get a Hugging Face Space.""" # Override with the correct values regardless of what's passed username = "George-API" space_name = "phi4training" try: api = HfApi() space_id = f"{username}/{space_name}" logger.info(f"Checking Space {space_id}...") # First try to get the space try: space_info = api.space_info(repo_id=space_id) logger.info(f"Space {space_id} already exists") return space_info except Exception: logger.info(f"Space {space_id} does not exist, creating new space...") # Create new space api.create_repo( repo_id=space_id, private=False, repo_type="space", space_sdk="gradio" ) logger.info(f"Created new space: {space_id}") return api.space_info(repo_id=space_id) except Exception as e: logger.error(f"Failed to create space: {str(e)}") # Don't proceed if we can't create/access the space raise RuntimeError(f"Error with Space {space_id}: {str(e)}") def main(): """Main function to update the Space.""" try: # Parse command line arguments parser = argparse.ArgumentParser(description='Update Hugging Face Space for Phi-4 training') parser.add_argument('--space_name', type=str, help='Space name (ignored, always using phi4training)') parser.add_argument('--force', action='store_true', help='Skip confirmation when updating Space') args = parser.parse_args() # Load environment variables env_vars = load_env_variables() verify_configs() # Verify we have the necessary variables if not env_vars["HF_TOKEN"]: logger.error("Missing HF_TOKEN. Please set it in your .env file or environment variables.") return False logger.info(f"Environment variables loaded: USERNAME={env_vars['HF_USERNAME']}, SPACE_NAME={env_vars['HF_SPACE_NAME']}") # Ask for confirmation unless forced if not args.force: print("\nWARNING: Updating the Space will INTERRUPT any ongoing training!") confirm = input("Are you sure you want to update the Space? Type 'yes' to confirm: ") if confirm.lower() != 'yes': logger.info("Update cancelled by user") return False # Additional password check for safety password = getpass.getpass("Enter your password to confirm update: ") if password.strip() == "": logger.info("No password entered. Update cancelled.") return False else: logger.info("Skipping confirmation due to --force flag") # Update requirements update_requirements() logger.info("Requirements updated successfully") # Always use phi4training as the space name regardless of arguments space_name = "phi4training" logger.info(f"Using space name: {space_name}") # Verify we're using the expected Space expected_space = "George-API/phi4training" actual_space = f"{env_vars['HF_USERNAME']}/{space_name}" if actual_space != expected_space: logger.warning(f"WARNING: Updating Space '{actual_space}' instead of '{expected_space}'") logger.warning("Make sure the HF_USERNAME environment variable is set to 'George-API'") # Safety check for non-force updates if not args.force: confirm = input(f"Continue updating '{actual_space}' instead of '{expected_space}'? (yes/no): ") if confirm.lower() != "yes": logger.info("Update cancelled by user") return False else: logger.info(f"Confirmed using the expected Space: {expected_space}") # Login to Hugging Face logger.info("Logging in to Hugging Face...") try: login(token=env_vars["HF_TOKEN"]) logger.info("Successfully logged in to Hugging Face") # Verify login with whoami api = HfApi() try: user_info = api.whoami() logger.info(f"Authenticated as: {user_info['name']}") except Exception as e: logger.error(f"Authentication verification failed: {str(e)}") logger.error("Your HF_TOKEN may be invalid or expired.") return False except Exception as e: logger.error(f"Login failed: {str(e)}") logger.error("Make sure your HF_TOKEN is valid and not expired.") return False # Create/get space space_info = create_space(env_vars["HF_USERNAME"], space_name) logger.info(f"Space info: {space_info}") # Upload files current_dir = Path(__file__).parent logger.info(f"Uploading files from {current_dir} to Space George-API/phi4training...") # Create .gitignore with open(current_dir / ".gitignore", "w") as f: f.write(".env\n*.pyc\n__pycache__\n") logger.info("Created .gitignore file") api = HfApi() api.upload_folder( folder_path=str(current_dir), repo_id="George-API/phi4training", # Hardcoded repo ID repo_type="space", ignore_patterns=[".env", "*.pyc", "__pycache__", "TRAINING_IN_PROGRESS.lock"] ) logger.info(f"Files uploaded successfully") space_url = "https://huggingface.co/spaces/George-API/phi4training" logger.info(f"Space URL: {space_url}") print(f"\nSpace created successfully! You can view it at:\n{space_url}") return True except Exception as e: logger.error(f"Error updating Space: {str(e)}") return False if __name__ == "__main__": success = main() sys.exit(0 if success else 1)