Skip to content
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions optimum/gptq/quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
from accelerate.hooks import remove_hook_from_module

if is_gptqmodel_available():
from gptqmodel import BACKEND, QuantizeConfig, exllama_set_max_input_length
from gptqmodel import BACKEND, QuantizeConfig
from gptqmodel.quantization import FORMAT, GPTQ, METHOD
from gptqmodel.utils.importer import hf_select_quant_linear_v2
from gptqmodel.utils.model import hf_convert_gptq_v1_to_v2_format, hf_convert_gptq_v2_to_v1_format
Expand Down Expand Up @@ -669,8 +669,7 @@ class StoreAttr(object):
model.quantize_config = StoreAttr()
model.quantize_config.desc_act = self.desc_act
model = gptq_post_init(model, use_act_order=self.desc_act)
if self.desc_act and self.backend == BACKEND.EXLLAMA_V1 and self.max_input_length is not None:
model = exllama_set_max_input_length(model, self.max_input_length)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jiqing-feng Can you check if anyone stil uses this? exllama_set_max_input_length If not and only v1 specific, this helper method can be removed too.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Checked no use of this func, removed. Thanks for the review.


return model

def pack_model(
Expand Down