Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/test_gptq.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,9 @@ jobs:
- name: Install dependencies
run: |
pip install --upgrade pip uv
uv pip install torch torchvision --index-url https://download.pytorch.org/whl/cu128
uv pip install .[tests]
Comment thread
IlyasMoutawwakil marked this conversation as resolved.
uv pip install pypcre "setuptools>=78.1.1,<82"
uv pip install "gptqmodel>=5.6.12" --no-build-isolation

- name: Run tests
Expand Down
14 changes: 12 additions & 2 deletions optimum/gptq/quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
from accelerate.hooks import remove_hook_from_module

if is_gptqmodel_available():
from gptqmodel import BACKEND, QuantizeConfig, exllama_set_max_input_length
from gptqmodel import BACKEND, QuantizeConfig
from gptqmodel.quantization import FORMAT, GPTQ, METHOD
from gptqmodel.utils.importer import hf_select_quant_linear_v2
from gptqmodel.utils.model import hf_convert_gptq_v1_to_v2_format, hf_convert_gptq_v2_to_v1_format
Expand Down Expand Up @@ -669,8 +669,18 @@ class StoreAttr(object):
model.quantize_config = StoreAttr()
model.quantize_config.desc_act = self.desc_act
model = gptq_post_init(model, use_act_order=self.desc_act)
if self.desc_act and self.backend == BACKEND.EXLLAMA_V1 and self.max_input_length is not None:
# Keep this compatibility guard for older gptqmodel versions where EXLLAMA_V1 still exists.
# This branch can be removed once we bump the minimum gptqmodel version and drop v1 support.
if (
hasattr(BACKEND, "EXLLAMA_V1")
and self.backend == BACKEND.EXLLAMA_V1
and self.desc_act
and self.max_input_length is not None
):
from gptqmodel import exllama_set_max_input_length

model = exllama_set_max_input_length(model, self.max_input_length)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jiqing-feng Can you check if anyone stil uses this? exllama_set_max_input_length If not and only v1 specific, this helper method can be removed too.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Checked no use of this func, removed. Thanks for the review.


return model

def pack_model(
Expand Down
Loading