>>> tokenizer = AutoTokenizer.from_pretrained("D:\\OneDrive\\Programs\\llm\\chatglm-6b-int4", trust_remote_code=True)
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
>>> model = AutoModel.from_pretrained("D:/OneDrive/Programs/llm/chatglm-6b-int4", trust_remote_code=True).half().cuda()
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "C:\Users\DataLearner\AppData\Local\Programs\Python\Python39\lib\site-packages\transformers\models\auto\auto_factory.py", line 441, in from_pretrained
config, kwargs = AutoConfig.from_pretrained(
File "C:\Users\DataLearner\AppData\Local\Programs\Python\Python39\lib\site-packages\transformers\models\auto\configuration_auto.py", line 911, in from_pretrained
config_class = get_class_from_dynamic_module(
File "C:\Users\DataLearner\AppData\Local\Programs\Python\Python39\lib\site-packages\transformers\dynamic_module_utils.py", line 388, in get_class_from_dynamic_module
final_module = get_cached_module_file(
File "C:\Users\DataLearner\AppData\Local\Programs\Python\Python39\lib\site-packages\transformers\dynamic_module_utils.py", line 273, in get_cached_module_file
create_dynamic_module(full_submodule)
File "C:\Users\DataLearner\AppData\Local\Programs\Python\Python39\lib\site-packages\transformers\dynamic_module_utils.py", line 59, in create_dynamic_module
os.makedirs(dynamic_module_path, exist_ok=True)
File "C:\Users\DataLearner\AppData\Local\Programs\Python\Python39\lib\os.py", line 215, in makedirs
makedirs(head, exist_ok=exist_ok)
File "C:\Users\DataLearner\AppData\Local\Programs\Python\Python39\lib\os.py", line 215, in makedirs
makedirs(head, exist_ok=exist_ok)
File "C:\Users\DataLearner\AppData\Local\Programs\Python\Python39\lib\os.py", line 215, in makedirs
makedirs(head, exist_ok=exist_ok)
[Previous line repeated 1 more time]
File "C:\Users\DataLearner\AppData\Local\Programs\Python\Python39\lib\os.py", line 225, in makedirs
mkdir(name, mode)
OSError: [WinError 123] 文件名、目录名或卷标语法不正确。: 'C:\\Users\\DataLearner\\.cache\\huggingface\\modules\\transformers_modules\\D:'
Explicitly passing a revision is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
from transformers import AutoTokenizer, AutoModel
tokenizer = AutoTokenizer.from_pretrained("D:\\data\\llm\\chatglm-6b-int4", trust_remote_code=True, revision="")
model = AutoModel.from_pretrained("D:\\data\\llm\\chatglm-6b-int4",trust_remote_code=True, revision="").float()
model = model.eval()
response, history = model.chat(tokenizer, "你好", history=[])
print(response)
注意,其实就是第三行代码最后的float()有差异:
model = AutoModel.from_pretrained("D:\\data\\llm\\chatglm-6b-int4", trust_remote_code=True, revision="").float()
GPU版本后面是.half().cuda(),而这里是float()。
如果你运行上面的代码出现如下错误:
The dtype of attention mask (torch.int64) is not bool
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "C:\Users\DuFei\AppData\Local\Programs\Python\Python39\lib\site-packages\torch\utils\_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "C:\Users\DuFei/.cache\huggingface\modules\transformers_modules\chatglm-6b-int4\modeling_chatglm.py", line 1255, in chat
outputs = self.generate(**inputs, **gen_kwargs)
File "C:\Users\DuFei\AppData\Local\Programs\Python\Python39\lib\site-packages\torch\utils\_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "C:\Users\DuFei\AppData\Local\Programs\Python\Python39\lib\site-packages\transformers\generation\utils.py", line 1452, in generate
return self.sample(
File "C:\Users\DuFei\AppData\Local\Programs\Python\Python39\lib\site-packages\transformers\generation\utils.py", line 2468, in sample
outputs = self(
File "C:\Users\DuFei\AppData\Local\Programs\Python\Python39\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\DuFei/.cache\huggingface\modules\transformers_modules\chatglm-6b-int4\modeling_chatglm.py", line 1160, in forward
transformer_outputs = self.transformer(
File "C:\Users\DuFei\AppData\Local\Programs\Python\Python39\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\DuFei/.cache\huggingface\modules\transformers_modules\chatglm-6b-int4\modeling_chatglm.py", line 973, in forward
layer_ret = layer(
File "C:\Users\DuFei\AppData\Local\Programs\Python\Python39\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\DuFei/.cache\huggingface\modules\transformers_modules\chatglm-6b-int4\modeling_chatglm.py", line 614, in forward
attention_outputs = self.attention(
File "C:\Users\DuFei\AppData\Local\Programs\Python\Python39\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\DuFei/.cache\huggingface\modules\transformers_modules\chatglm-6b-int4\modeling_chatglm.py", line 439, in forward
mixed_raw_layer = self.query_key_value(hidden_states)
File "C:\Users\DuFei\AppData\Local\Programs\Python\Python39\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\DuFei/.cache\huggingface\modules\transformers_modules\chatglm-6b-int4\quantization.py", line 338, in forward
output = W8A16LinearCPU.apply(input, self.weight, self.weight_scale, self.weight_bit_width, self.quantization_cache)
File "C:\Users\DuFei\AppData\Local\Programs\Python\Python39\lib\site-packages\torch\autograd\function.py", line 506, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "C:\Users\DuFei/.cache\huggingface\modules\transformers_modules\chatglm-6b-int4\quantization.py", line 76, in forward
weight = extract_weight_to_float(quant_w, scale_w, weight_bit_width, quantization_cache=quantization_cache)
File "C:\Users\DuFei/.cache\huggingface\modules\transformers_modules\chatglm-6b-int4\quantization.py", line 260, in extract_weight_to_float
func = cpu_kernels.int4WeightExtractionFloat
AttributeError: 'NoneType' object has no attribute 'int4WeightExtractionFloat'