Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

compression parameter on mac.dosent work. #119

Open
dnvs opened this issue Mar 19, 2024 · 0 comments
Open

compression parameter on mac.dosent work. #119

dnvs opened this issue Mar 19, 2024 · 0 comments

Comments

@dnvs
Copy link

dnvs commented Mar 19, 2024

--> 162 v_quant, quant_state = bnb.functional.quantize_nf4(v.cuda(), blocksize=64)

ssertionError                            Traceback (most recent call last)
Cell In[1], line 7
      3 MAX_LENGTH = 128
      4 # could use hugging face model repo id:
      5 #model = AutoModel.from_pretrained("abacusai/Smaug-72B-v0.1")
----> 7 model = AutoModel.from_pretrained("garage-bAInd/Platypus2-7B", compression='4bit')

File ~/anaconda3/lib/python3.11/site-packages/airllm/auto_model.py:49, in AutoModel.from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs)
     45 @classmethod
     46 def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs):
     48     if is_on_mac_os:
---> 49         return AirLLMLlamaMlx(pretrained_model_name_or_path, *inputs, ** kwargs)
     51     module, cls = AutoModel.get_module_class(pretrained_model_name_or_path, *inputs, **kwargs)
     52     module = importlib.import_module(module)

File ~/anaconda3/lib/python3.11/site-packages/airllm/airllm_llama_mlx.py:224, in AirLLMLlamaMlx.__init__(self, model_local_path_or_repo_id, device, dtype, max_seq_len, layer_shards_saving_path, profiling_mode, compression, hf_token, prefetching, test_nonlayered, show_memory_util, delete_original)
    219 self.least_available = None
    220 self.initial_available = psutil.virtual_memory().available / 1024 / 1024
--> 224 self.model_local_path, self.checkpoint_path = find_or_create_local_splitted_path(model_local_path_or_repo_id,
    225                                                                                  layer_shards_saving_path,
    226                                                                                  compression=compression,
    227                                                                                  layer_names=self.layer_names_dict,
    228                                                                                  hf_token=hf_token,
    229                                                                                  delete_original=delete_original)
    230 if hf_token is not None:
    231     self.config = AutoConfig.from_pretrained(self.model_local_path, token=hf_token, trust_remote_code=True)

File ~/anaconda3/lib/python3.11/site-packages/airllm/utils.py:382, in find_or_create_local_splitted_path(model_local_path_or_repo_id, layer_shards_saving_path, compression, layer_names, hf_token, delete_original)
    365 '''
    366 hf_cache_path = huggingface_hub.snapshot_download(model_local_path_or_repo_id, token=hf_token, allow_patterns="model.safetensors.index.json")
    367 if len(glob(str(Path(hf_cache_path) / "model.safetensors.index.json"))) > 0:
   (...)
    374                                                       token=hf_token)
    375 '''
    377 #assert os.path.exists(Path(hf_cache_path) / 'pytorch_model.bin.index.json') or \
    378 #       os.path.exists(Path(hf_cache_path) / 'model.safetensors.index.json'), \
    379 #       f"{hf_cache_path}/pytorch_model.bin.index.json or {hf_cache_path}/model.safetensors.index.json should exists."
    380 
    381 # if splitted_model subdir exists under cache use it, otherwise split and save
--> 382 return Path(hf_cache_path), split_and_save_layers(hf_cache_path, layer_shards_saving_path,
    383                                                   compression=compression, layer_names=layer_names,
    384                                                   delete_original=delete_original, repo_id=model_local_path_or_repo_id, hf_token=hf_token)

File ~/anaconda3/lib/python3.11/site-packages/airllm/utils.py:303, in split_and_save_layers(checkpoint_path, layer_shards_saving_path, splitted_model_dir_name, compression, layer_names, delete_original, repo_id, hf_token)
    300 # Get layer state dict
    301 layer_state_dict = dict([(k, v) for k, v in state_dict.items() if k.startswith(layer)])
--> 303 layer_state_dict = compress_layer_state_dict(layer_state_dict, compression)
    306 # Save layer state dict as using safetensors
    308 marker_exists = ModelPersister.get_model_persister().model_persist_exist(layer, saving_path)

File ~/anaconda3/lib/python3.11/site-packages/airllm/utils.py:162, in compress_layer_state_dict(layer_state_dict, compression)
    160 compressed_layer_state_dict = {}
    161 for k, v in layer_state_dict.items():
--> 162     v_quant, quant_state = bnb.functional.quantize_nf4(v.cuda(), blocksize=64)
    163     compressed_layer_state_dict[k] = v_quant
    164     for quant_state_k, quant_state_v in save_quant_state_to_dict(quant_state).items():

File ~/anaconda3/lib/python3.11/site-packages/torch/cuda/__init__.py:289, in _lazy_init()
    284     raise RuntimeError(
    285         "Cannot re-initialize CUDA in forked subprocess. To use CUDA with "
    286         "multiprocessing, you must use the 'spawn' start method"
    287     )
    288 if not hasattr(torch._C, "_cuda_getDeviceCount"):
--> 289     raise AssertionError("Torch not compiled with CUDA enabled")
    290 if _cudart is None:
    291     raise AssertionError(
    292         "libcudart functions unavailable. It looks like you have a broken build?"
    293     )

AssertionError: Torch not compiled with CUDA enabled
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant