huggingface · IlyasMoutawwakil · Apr 15, 2026 · Apr 8, 2026 · Apr 8, 2026 · Apr 10, 2026
diff --git a/optimum/gptq/quantizer.py b/optimum/gptq/quantizer.py
@@ -47,7 +47,7 @@
     from accelerate.hooks import remove_hook_from_module
 
 if is_gptqmodel_available():
-    from gptqmodel import BACKEND, QuantizeConfig, exllama_set_max_input_length
+    from gptqmodel import BACKEND, QuantizeConfig
     from gptqmodel.quantization import FORMAT, GPTQ, METHOD
     from gptqmodel.utils.importer import hf_select_quant_linear_v2
     from gptqmodel.utils.model import hf_convert_gptq_v1_to_v2_format, hf_convert_gptq_v2_to_v1_format
@@ -669,8 +669,7 @@ class StoreAttr(object):
         model.quantize_config = StoreAttr()
         model.quantize_config.desc_act = self.desc_act
         model = gptq_post_init(model, use_act_order=self.desc_act)
-        if self.desc_act and self.backend == BACKEND.EXLLAMA_V1 and self.max_input_length is not None:
-            model = exllama_set_max_input_length(model, self.max_input_length)
+
         return model
 
     def pack_model(