LPX commited on
Commit
f725d52
·
1 Parent(s): b971b27

Remove sequential CPU offload from model loading and simplify return value in safe_model_load function

Browse files
Files changed (1) hide show
  1. model_loader.py +2 -7
model_loader.py CHANGED
@@ -39,7 +39,7 @@ def safe_model_load():
39
  print(f"XFormers not available: {e}")
40
 
41
  pipe.enable_attention_slicing()
42
- pipe.enable_sequential_cpu_offload()
43
  pipe.to("cuda")
44
 
45
  # For memory-sensitive environments
@@ -47,13 +47,8 @@ def safe_model_load():
47
  torch.multiprocessing.set_sharing_strategy('file_system')
48
  except Exception as e:
49
  print(f"Exception raised (torch.multiprocessing): {e}")
50
- # Moondream
51
- model = vl(api_key=md_api_key)
52
 
53
- return {
54
- "pipeline": pipe,
55
- "captioner": model
56
- }
57
 
58
  except Exception as e:
59
  print(f"Model loading failed: {e}")
 
39
  print(f"XFormers not available: {e}")
40
 
41
  pipe.enable_attention_slicing()
42
+ # pipe.enable_sequential_cpu_offload()
43
  pipe.to("cuda")
44
 
45
  # For memory-sensitive environments
 
47
  torch.multiprocessing.set_sharing_strategy('file_system')
48
  except Exception as e:
49
  print(f"Exception raised (torch.multiprocessing): {e}")
 
 
50
 
51
+ return pipe
 
 
 
52
 
53
  except Exception as e:
54
  print(f"Model loading failed: {e}")