diff --git a/babeldoc/format/pdf/document_il/midend/il_translator_llm_only.py b/babeldoc/format/pdf/document_il/midend/il_translator_llm_only.py
index cef6e069..fe210a25 100644
--- a/babeldoc/format/pdf/document_il/midend/il_translator_llm_only.py
+++ b/babeldoc/format/pdf/document_il/midend/il_translator_llm_only.py
@@ -589,7 +589,7 @@ def process_page(
                     )
                 )
 
-            if total_token_count > 200 or len(paragraphs) > 5:
+            if total_token_count > self.translation_config.llm_batch_max_tokens or len(paragraphs) > self.translation_config.llm_batch_max_paragraphs:
                 self.mid += 1
                 executor.submit(
                     self.translate_paragraph,
diff --git a/babeldoc/format/pdf/translation_config.py b/babeldoc/format/pdf/translation_config.py
index 3270b591..14da500e 100644
--- a/babeldoc/format/pdf/translation_config.py
+++ b/babeldoc/format/pdf/translation_config.py
@@ -217,6 +217,8 @@ def __init__(
         metadata_extra_data: str | None = None,
         term_pool_max_workers: int | None = None,
         disable_same_text_fallback: bool = False,
+        llm_batch_max_tokens: int = 200,
+        llm_batch_max_paragraphs: int = 5,
     ):
         self.translator = translator
         self.term_extraction_translator = term_extraction_translator or translator
@@ -376,6 +378,8 @@ def __init__(
             "cache_hit_prompt_tokens": 0,
         }
         self.disable_same_text_fallback = disable_same_text_fallback
+        self.llm_batch_max_tokens = llm_batch_max_tokens
+        self.llm_batch_max_paragraphs = llm_batch_max_paragraphs
 
         if self.ocr_workaround:
             self.remove_non_formula_lines = False
diff --git a/babeldoc/main.py b/babeldoc/main.py
index 9e256e10..941332c8 100644
--- a/babeldoc/main.py
+++ b/babeldoc/main.py
@@ -291,6 +291,18 @@ def create_parser():
         type=int,
         help="Maximum number of worker threads dedicated to automatic term extraction. If not specified, defaults to --pool-max-workers (or QPS value when unset).",
     )
+    translation_group.add_argument(
+        "--llm-batch-max-tokens",
+        type=int,
+        default=200,
+        help="Maximum total tokens per LLM batch translation request. Paragraphs are packed until this limit is reached. (default: 200)",
+    )
+    translation_group.add_argument(
+        "--llm-batch-max-paragraphs",
+        type=int,
+        default=5,
+        help="Maximum number of paragraphs per LLM batch translation request. Paragraphs are packed until this limit is reached. (default: 5)",
+    )
     translation_group.add_argument(
         "--no-auto-extract-glossary",
         action="store_false",
@@ -729,6 +741,8 @@ async def main():
             skip_formula_offset_calculation=args.skip_formula_offset_calculation,
             metadata_extra_data=args.metadata_extra_data,
             term_pool_max_workers=args.term_pool_max_workers,
+            llm_batch_max_tokens=args.llm_batch_max_tokens,
+            llm_batch_max_paragraphs=args.llm_batch_max_paragraphs,
         )
 
         def nop(_x):