George-API commited on
Commit
1362b55
·
verified ·
1 Parent(s): 6ade52f

Upload run_cloud_training.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. run_cloud_training.py +44 -13
run_cloud_training.py CHANGED
@@ -24,12 +24,54 @@ from unsloth import FastLanguageModel
24
  # Disable flash attention globally
25
  os.environ["TRANSFORMERS_NO_FLASH_ATTENTION"] = "1"
26
 
27
- # Try to install flash-attention (for systems that support it)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  try:
29
  import subprocess
30
  import sys
31
 
32
- logger = logging.getLogger(__name__)
 
 
 
 
 
 
 
 
33
  logger.info("Attempting to install flash-attention...")
34
 
35
  # Try multiple installation approaches for flash-attention
@@ -74,17 +116,6 @@ except ImportError:
74
  TENSORBOARD_AVAILABLE = False
75
  print("Tensorboard not available. Will skip tensorboard logging.")
76
 
77
- # Configure logging
78
- logging.basicConfig(
79
- level=logging.INFO,
80
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
81
- handlers=[
82
- logging.StreamHandler(),
83
- logging.FileHandler("training.log")
84
- ]
85
- )
86
- logger = logging.getLogger(__name__)
87
-
88
  # Default dataset path - use the correct path with username
89
  DEFAULT_DATASET = "George-API/phi4-cognitive-dataset"
90
 
 
24
  # Disable flash attention globally
25
  os.environ["TRANSFORMERS_NO_FLASH_ATTENTION"] = "1"
26
 
27
+ # Configure logging first
28
+ logging.basicConfig(
29
+ level=logging.INFO,
30
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
31
+ handlers=[
32
+ logging.StreamHandler(),
33
+ logging.FileHandler("training.log")
34
+ ]
35
+ )
36
+ logger = logging.getLogger(__name__)
37
+
38
+ # Make sure torch is installed and available before proceeding
39
+ try:
40
+ logger.info("Importing torch...")
41
+ import torch
42
+ logger.info(f"PyTorch version: {torch.__version__}")
43
+ logger.info(f"CUDA available: {torch.cuda.is_available()}")
44
+ if torch.cuda.is_available():
45
+ logger.info(f"CUDA version: {torch.version.cuda}")
46
+ logger.info(f"GPU: {torch.cuda.get_device_name(0)}")
47
+ except ImportError:
48
+ logger.error("PyTorch not found. Installing torch first...")
49
+ try:
50
+ import subprocess
51
+ import sys
52
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "torch"])
53
+ logger.info("PyTorch installed successfully. Importing...")
54
+ import torch
55
+ logger.info(f"PyTorch version: {torch.__version__}")
56
+ except Exception as e:
57
+ logger.error(f"Failed to install PyTorch: {e}")
58
+ logger.error("Cannot proceed without PyTorch. Exiting.")
59
+ raise
60
+
61
+ # Now try to install flash-attention (for systems that support it)
62
  try:
63
  import subprocess
64
  import sys
65
 
66
+ # Make sure torch is installed before attempting flash-attn
67
+ try:
68
+ logger.info("Ensuring PyTorch is installed before flash-attention...")
69
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "torch", "--quiet"])
70
+ logger.info("PyTorch installation verified")
71
+ except Exception as torch_error:
72
+ logger.warning(f"PyTorch installation check failed: {torch_error}")
73
+ logger.info("Will continue with flash-attention installation anyway")
74
+
75
  logger.info("Attempting to install flash-attention...")
76
 
77
  # Try multiple installation approaches for flash-attention
 
116
  TENSORBOARD_AVAILABLE = False
117
  print("Tensorboard not available. Will skip tensorboard logging.")
118
 
 
 
 
 
 
 
 
 
 
 
 
119
  # Default dataset path - use the correct path with username
120
  DEFAULT_DATASET = "George-API/phi4-cognitive-dataset"
121