We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
--> 162 v_quant, quant_state = bnb.functional.quantize_nf4(v.cuda(), blocksize=64)
ssertionError Traceback (most recent call last) Cell In[1], line 7 3 MAX_LENGTH = 128 4 # could use hugging face model repo id: 5 #model = AutoModel.from_pretrained("abacusai/Smaug-72B-v0.1") ----> 7 model = AutoModel.from_pretrained("garage-bAInd/Platypus2-7B", compression='4bit') File ~/anaconda3/lib/python3.11/site-packages/airllm/auto_model.py:49, in AutoModel.from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs) 45 @classmethod 46 def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs): 48 if is_on_mac_os: ---> 49 return AirLLMLlamaMlx(pretrained_model_name_or_path, *inputs, ** kwargs) 51 module, cls = AutoModel.get_module_class(pretrained_model_name_or_path, *inputs, **kwargs) 52 module = importlib.import_module(module) File ~/anaconda3/lib/python3.11/site-packages/airllm/airllm_llama_mlx.py:224, in AirLLMLlamaMlx.__init__(self, model_local_path_or_repo_id, device, dtype, max_seq_len, layer_shards_saving_path, profiling_mode, compression, hf_token, prefetching, test_nonlayered, show_memory_util, delete_original) 219 self.least_available = None 220 self.initial_available = psutil.virtual_memory().available / 1024 / 1024 --> 224 self.model_local_path, self.checkpoint_path = find_or_create_local_splitted_path(model_local_path_or_repo_id, 225 layer_shards_saving_path, 226 compression=compression, 227 layer_names=self.layer_names_dict, 228 hf_token=hf_token, 229 delete_original=delete_original) 230 if hf_token is not None: 231 self.config = AutoConfig.from_pretrained(self.model_local_path, token=hf_token, trust_remote_code=True) File ~/anaconda3/lib/python3.11/site-packages/airllm/utils.py:382, in find_or_create_local_splitted_path(model_local_path_or_repo_id, layer_shards_saving_path, compression, layer_names, hf_token, delete_original) 365 ''' 366 hf_cache_path = huggingface_hub.snapshot_download(model_local_path_or_repo_id, token=hf_token, allow_patterns="model.safetensors.index.json") 367 if len(glob(str(Path(hf_cache_path) / "model.safetensors.index.json"))) > 0: (...) 374 token=hf_token) 375 ''' 377 #assert os.path.exists(Path(hf_cache_path) / 'pytorch_model.bin.index.json') or \ 378 # os.path.exists(Path(hf_cache_path) / 'model.safetensors.index.json'), \ 379 # f"{hf_cache_path}/pytorch_model.bin.index.json or {hf_cache_path}/model.safetensors.index.json should exists." 380 381 # if splitted_model subdir exists under cache use it, otherwise split and save --> 382 return Path(hf_cache_path), split_and_save_layers(hf_cache_path, layer_shards_saving_path, 383 compression=compression, layer_names=layer_names, 384 delete_original=delete_original, repo_id=model_local_path_or_repo_id, hf_token=hf_token) File ~/anaconda3/lib/python3.11/site-packages/airllm/utils.py:303, in split_and_save_layers(checkpoint_path, layer_shards_saving_path, splitted_model_dir_name, compression, layer_names, delete_original, repo_id, hf_token) 300 # Get layer state dict 301 layer_state_dict = dict([(k, v) for k, v in state_dict.items() if k.startswith(layer)]) --> 303 layer_state_dict = compress_layer_state_dict(layer_state_dict, compression) 306 # Save layer state dict as using safetensors 308 marker_exists = ModelPersister.get_model_persister().model_persist_exist(layer, saving_path) File ~/anaconda3/lib/python3.11/site-packages/airllm/utils.py:162, in compress_layer_state_dict(layer_state_dict, compression) 160 compressed_layer_state_dict = {} 161 for k, v in layer_state_dict.items(): --> 162 v_quant, quant_state = bnb.functional.quantize_nf4(v.cuda(), blocksize=64) 163 compressed_layer_state_dict[k] = v_quant 164 for quant_state_k, quant_state_v in save_quant_state_to_dict(quant_state).items(): File ~/anaconda3/lib/python3.11/site-packages/torch/cuda/__init__.py:289, in _lazy_init() 284 raise RuntimeError( 285 "Cannot re-initialize CUDA in forked subprocess. To use CUDA with " 286 "multiprocessing, you must use the 'spawn' start method" 287 ) 288 if not hasattr(torch._C, "_cuda_getDeviceCount"): --> 289 raise AssertionError("Torch not compiled with CUDA enabled") 290 if _cudart is None: 291 raise AssertionError( 292 "libcudart functions unavailable. It looks like you have a broken build?" 293 ) AssertionError: Torch not compiled with CUDA enabled
The text was updated successfully, but these errors were encountered:
No branches or pull requests
--> 162 v_quant, quant_state = bnb.functional.quantize_nf4(v.cuda(), blocksize=64)
The text was updated successfully, but these errors were encountered: