compression parameter on mac.dosent work. #119

dnvs · 2024-03-19T14:31:11Z

--> 162 v_quant, quant_state = bnb.functional.quantize_nf4(v.cuda(), blocksize=64)

ssertionError                            Traceback (most recent call last)
Cell In[1], line 7
      3 MAX_LENGTH = 128
      4 # could use hugging face model repo id:
      5 #model = AutoModel.from_pretrained("abacusai/Smaug-72B-v0.1")
----> 7 model = AutoModel.from_pretrained("garage-bAInd/Platypus2-7B", compression='4bit')

File ~/anaconda3/lib/python3.11/site-packages/airllm/auto_model.py:49, in AutoModel.from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs)
     45 @classmethod
     46 def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs):
     48     if is_on_mac_os:
---> 49         return AirLLMLlamaMlx(pretrained_model_name_or_path, *inputs, ** kwargs)
     51     module, cls = AutoModel.get_module_class(pretrained_model_name_or_path, *inputs, **kwargs)
     52     module = importlib.import_module(module)

File ~/anaconda3/lib/python3.11/site-packages/airllm/airllm_llama_mlx.py:224, in AirLLMLlamaMlx.__init__(self, model_local_path_or_repo_id, device, dtype, max_seq_len, layer_shards_saving_path, profiling_mode, compression, hf_token, prefetching, test_nonlayered, show_memory_util, delete_original)
    219 self.least_available = None
    220 self.initial_available = psutil.virtual_memory().available / 1024 / 1024
--> 224 self.model_local_path, self.checkpoint_path = find_or_create_local_splitted_path(model_local_path_or_repo_id,
    225                                                                                  layer_shards_saving_path,
    226                                                                                  compression=compression,
    227                                                                                  layer_names=self.layer_names_dict,
    228                                                                                  hf_token=hf_token,
    229                                                                                  delete_original=delete_original)
    230 if hf_token is not None:
    231     self.config = AutoConfig.from_pretrained(self.model_local_path, token=hf_token, trust_remote_code=True)

File ~/anaconda3/lib/python3.11/site-packages/airllm/utils.py:382, in find_or_create_local_splitted_path(model_local_path_or_repo_id, layer_shards_saving_path, compression, layer_names, hf_token, delete_original)
    365 '''
    366 hf_cache_path = huggingface_hub.snapshot_download(model_local_path_or_repo_id, token=hf_token, allow_patterns="model.safetensors.index.json")
    367 if len(glob(str(Path(hf_cache_path) / "model.safetensors.index.json"))) > 0:
   (...)
    374                                                       token=hf_token)
    375 '''
    377 #assert os.path.exists(Path(hf_cache_path) / 'pytorch_model.bin.index.json') or \
    378 #       os.path.exists(Path(hf_cache_path) / 'model.safetensors.index.json'), \
    379 #       f"{hf_cache_path}/pytorch_model.bin.index.json or {hf_cache_path}/model.safetensors.index.json should exists."
    380 
    381 # if splitted_model subdir exists under cache use it, otherwise split and save
--> 382 return Path(hf_cache_path), split_and_save_layers(hf_cache_path, layer_shards_saving_path,
    383                                                   compression=compression, layer_names=layer_names,
    384                                                   delete_original=delete_original, repo_id=model_local_path_or_repo_id, hf_token=hf_token)

File ~/anaconda3/lib/python3.11/site-packages/airllm/utils.py:303, in split_and_save_layers(checkpoint_path, layer_shards_saving_path, splitted_model_dir_name, compression, layer_names, delete_original, repo_id, hf_token)
    300 # Get layer state dict
    301 layer_state_dict = dict([(k, v) for k, v in state_dict.items() if k.startswith(layer)])
--> 303 layer_state_dict = compress_layer_state_dict(layer_state_dict, compression)
    306 # Save layer state dict as using safetensors
    308 marker_exists = ModelPersister.get_model_persister().model_persist_exist(layer, saving_path)

File ~/anaconda3/lib/python3.11/site-packages/airllm/utils.py:162, in compress_layer_state_dict(layer_state_dict, compression)
    160 compressed_layer_state_dict = {}
    161 for k, v in layer_state_dict.items():
--> 162     v_quant, quant_state = bnb.functional.quantize_nf4(v.cuda(), blocksize=64)
    163     compressed_layer_state_dict[k] = v_quant
    164     for quant_state_k, quant_state_v in save_quant_state_to_dict(quant_state).items():

File ~/anaconda3/lib/python3.11/site-packages/torch/cuda/__init__.py:289, in _lazy_init()
    284     raise RuntimeError(
    285         "Cannot re-initialize CUDA in forked subprocess. To use CUDA with "
    286         "multiprocessing, you must use the 'spawn' start method"
    287     )
    288 if not hasattr(torch._C, "_cuda_getDeviceCount"):
--> 289     raise AssertionError("Torch not compiled with CUDA enabled")
    290 if _cudart is None:
    291     raise AssertionError(
    292         "libcudart functions unavailable. It looks like you have a broken build?"
    293     )

AssertionError: Torch not compiled with CUDA enabled

The text was updated successfully, but these errors were encountered:

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

compression parameter on mac.dosent work. #119

compression parameter on mac.dosent work. #119

dnvs commented Mar 19, 2024 •

edited

compression parameter on mac.dosent work. #119

compression parameter on mac.dosent work. #119

Comments

dnvs commented Mar 19, 2024 • edited

dnvs commented Mar 19, 2024 •

edited