From 0d4b53084b8481728470ff7633d9db7bd4d6fb4d Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Tue, 23 Dec 2025 21:45:08 +0100 Subject: [PATCH] Update data.py --- optimum/gptq/data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/optimum/gptq/data.py b/optimum/gptq/data.py index 7e5fc0b43d..7cb57d31bc 100644 --- a/optimum/gptq/data.py +++ b/optimum/gptq/data.py @@ -156,7 +156,7 @@ def get_c4(tokenizer: Any, seqlen: int, nsamples: int, split: str = "train"): while True: i = random.randint(0, len(data) - 1) enc = tokenizer(data[i]["text"], return_tensors="pt") - if enc.input_ids.shape[1] >= seqlen: + if enc.input_ids.shape[1] > seqlen: break i = random.randint(0, enc.input_ids.shape[1] - seqlen - 1) j = i + seqlen @@ -184,7 +184,7 @@ def get_c4_new(tokenizer: Any, seqlen: int, nsamples: int, split: str = "train") while True: i = random.randint(0, len(data) - 1) enc = tokenizer(data[i]["text"], return_tensors="pt") - if enc.input_ids.shape[1] >= seqlen: + if enc.input_ids.shape[1] > seqlen: break i = random.randint(0, enc.input_ids.shape[1] - seqlen - 1) j = i + seqlen