diff --git a/olive/cache.py b/olive/cache.py
index 7f27a50de..53699b285 100644
--- a/olive/cache.py
+++ b/olive/cache.py
@@ -541,9 +541,11 @@ def _save_model(
                 output_file = output_dir
                 actual_output_dir = output_dir.parent
             else:
-                # Otherwise, create model.onnx in the directory
+                # Otherwise, create model.onnx in the directory.
+                # Preserve the source onnx_file_name stem (e.g. model_ctx) so the output
+                # filename matches what genai_config.json references.
                 actual_output_dir = output_dir
-                model_file_name = "model"
+                model_file_name = Path(onnx_file_name).stem if has_additional_files and onnx_file_name else "model"
                 if path_prefix:
                     model_file_name = f"{path_prefix}_{model_file_name}"
                 output_file = output_dir / f"{model_file_name}.onnx"
diff --git a/olive/passes/onnx/common.py b/olive/passes/onnx/common.py
index 14daffa7c..ba880ae5c 100644
--- a/olive/passes/onnx/common.py
+++ b/olive/passes/onnx/common.py
@@ -5,6 +5,7 @@
 import json
 import logging
 import re
+from collections.abc import Iterable
 from copy import deepcopy
 from pathlib import Path
 from typing import Any, Callable, Optional, Union
@@ -792,40 +793,46 @@ def update_llm_pipeline_genai_config(
 
 
 def update_llm_pipeline_genai_config_gpu(
-    model: ONNXModelHandler,
+    model: Union[ONNXModelHandler, CompositeModelHandler],
     output_model_dir: Union[str, Path],
-    input_model_path: Union[str, Path],
     decoder_config_extra: Optional[dict[str, Any]] = None,
-) -> ONNXModelHandler:
+    composite_components: Optional[Iterable[tuple[str, ONNXModelHandler]]] = None,
+) -> Union[ONNXModelHandler, CompositeModelHandler]:
     """Update the LLM pipeline in the model's genai_config.json file.
 
-    :param model: The  model to update.
+    :param model: The model (single or composite) to update.
+    :param output_model_dir: Directory where the updated genai_config.json should be written.
     :param decoder_config_extra: Extra configuration for the decoder.
+    :param composite_components: Optional iterable of (component_name, ONNXModelHandler)
+                                 used to build a multi-component pipeline.
+    :return: The same `model` object (with its directory now having updated genai_config.json).
     """
     output_model_dir = Path(output_model_dir)
 
-    # update genai_config if it exists
+    additional_files = model.model_attributes["additional_files"]
     genai_config_path = None
-    genai_config_path = Path(input_model_path).parent / "genai_config.json"
+    for file_path in additional_files:
+        if Path(file_path).name == "genai_config.json":
+            genai_config_path = file_path
+            break
 
-    if genai_config_path.exists():
-        genai_config_path = str(genai_config_path.resolve())
-    else:
+    if not genai_config_path:
         return model
 
     with open(genai_config_path) as f:
         genai_config = json.load(f)
-
     # update model_type
     genai_config["model"]["type"] = "decoder-pipeline"
 
-    # Update the provider_options list
     provider_option = {"qnn": {"backend_type": "gpu"}}
-    genai_config["model"]["decoder"]["session_options"]["provider_options"] = [provider_option]
+    decoder = genai_config["model"].setdefault("decoder", {})
+    session_opts = decoder.setdefault("session_options", {})
+    session_opts["provider_options"] = [provider_option]
 
     # update decoder config
     decoder_config = genai_config["model"]["decoder"]
     decoder_config.get("sliding_window", {}).pop("slide_inputs", None)
+
     for key, value in (decoder_config_extra or {}).items():
         exisiting_value = decoder_config.get(key)
         if isinstance(exisiting_value, dict):
@@ -835,13 +842,39 @@ def update_llm_pipeline_genai_config_gpu(
         else:
             decoder_config[key] = value
 
-    pipeline_config = {}
-    component_io_config = model.io_config
-    pipeline_config["model_onnx"] = {
-        "filename": Path(model.model_path).name,
-        "inputs": component_io_config["input_names"],
-        "outputs": component_io_config["output_names"],
-    }
+    # --- Build pipeline_config ---
+    pipeline_config: dict[str, Any] = {}
+
+    if composite_components is None:
+        if not isinstance(model, ONNXModelHandler):
+            handlers = list(model.get_model_components())
+            if not handlers:
+                return model
+            _, single_handler = handlers[0]
+        else:
+            single_handler = model
+
+        component_io_config = single_handler.io_config
+        component_key = Path(single_handler.model_path).stem
+        pipeline_config[component_key] = {
+            "filename": Path(single_handler.model_path).name,
+            "inputs": component_io_config["input_names"],
+            "outputs": component_io_config["output_names"],
+        }
+
+    else:
+        # Composite case: one entry per component
+        for comp_name, comp_handler in composite_components:
+            component_io_config = comp_handler.io_config
+            pipeline_config[comp_name] = {
+                "filename": Path(comp_handler.model_path).name,
+                "inputs": component_io_config["input_names"],
+                "outputs": component_io_config["output_names"],
+            }
+            if comp_name.endswith("iterator"):
+                pipeline_config[comp_name]["run_on_prompt"] = False
+            else:
+                pipeline_config[comp_name]["run_on_token_gen"] = False
 
     decoder_config["pipeline"] = [pipeline_config]
 
@@ -849,40 +882,65 @@ def update_llm_pipeline_genai_config_gpu(
     new_genai_config_path = output_model_dir / "genai_config.json"
     with new_genai_config_path.open("w") as f:
         json.dump(genai_config, f, indent=4)
+    additional_files.remove(genai_config_path)
+    additional_files.append(str(new_genai_config_path))
 
     return model
 
 
 def update_llm_pipeline_genai_config_gpu_ctxbin(
-    model_path: Union[str, Path],
+    model: Union[ONNXModelHandler, CompositeModelHandler],
+    output_model_path: Union[str, Path],
 ) -> None:
-    """Update the filename fields in the model's genai_config.json file from 'model' to 'model_ctx'.
+    """Update the genai_config.json entry for one context binary component.
 
-    The genai_config.json file is updated in place in the model's directory.
-    :param model_path: Path to the model file.
+    :param model: Source model is used to locate and update genai_config.json.
+    :param output_model_path: Path to the context binary output file.
     """
-    # Find genai_config in the model's directory
-    model_dir = Path(model_path).parent
-    genai_config_path = model_dir / "genai_config.json"
+    output_model_path = Path(output_model_path)
+
+    # Extract additional_files from model -- same as update_llm_pipeline_genai_config_gpu
+    additional_files = model.model_attributes["additional_files"]
+    genai_config_path = None
+    for file_path in additional_files:
+        if Path(file_path).name == "genai_config.json":
+            genai_config_path = file_path
+            break
+
+    if not genai_config_path:
+        return
 
-    if not genai_config_path.exists():
+    ctx_stem = output_model_path.stem
+    if not ctx_stem.endswith("_ctx"):
         return
+    src_stem = ctx_stem[: -len("_ctx")]
+    src_filename = f"{src_stem}.onnx"
+    ctx_filename = f"{ctx_stem}.onnx"
 
     with open(genai_config_path) as f:
         genai_config = json.load(f)
 
-    # Update decoder filename to 'model_ctx'
-    if "decoder" in genai_config.get("model", {}):
-        if "filename" in genai_config["model"]["decoder"]:
-            genai_config["model"]["decoder"]["filename"] = "model/model_ctx.onnx"
+    decoder = genai_config.get("model", {}).get("decoder", {})
 
-        # Update filename in pipeline configuration
-        decoder_config = genai_config["model"]["decoder"]
-        if "pipeline" in decoder_config and isinstance(decoder_config["pipeline"], list):
-            for pipeline_item in decoder_config["pipeline"]:
-                if "model_onnx" in pipeline_item and "filename" in pipeline_item["model_onnx"]:
-                    pipeline_item["model_onnx"]["filename"] = "model/model_ctx.onnx"
+    # Update top-level decoder.filename if it points to this model
+    if decoder.get("filename") == src_filename:
+        decoder["filename"] = ctx_filename
 
-    # Save the updated genai_config back to the same location
-    with genai_config_path.open("w") as f:
+    # Update the single matching pipeline entry
+    for pipeline_item in decoder.get("pipeline", []):
+        if not isinstance(pipeline_item, dict):
+            continue
+        for comp_name in list(pipeline_item.keys()):
+            comp = pipeline_item[comp_name]
+            if isinstance(comp, dict) and comp.get("filename") == src_filename:
+                comp["filename"] = ctx_filename
+                if comp_name == src_stem:
+                    pipeline_item[ctx_stem] = pipeline_item.pop(comp_name)
+                break  # only one entry matches per call
+
+    # Save to output dir and update additional_files pointer.
+    new_genai_config_path = output_model_path.parent / "genai_config.json"
+    with new_genai_config_path.open("w") as f:
         json.dump(genai_config, f, indent=4)
+    additional_files.remove(genai_config_path)
+    additional_files.append(str(new_genai_config_path))
diff --git a/olive/passes/onnx/context_binary.py b/olive/passes/onnx/context_binary.py
index ba7fc433b..928b91189 100644
--- a/olive/passes/onnx/context_binary.py
+++ b/olive/passes/onnx/context_binary.py
@@ -117,6 +117,7 @@ def _run_single_target(
             "session_options": config.session_options,
             "embed_context": config.embed_context,
             "disable_cpu_fallback": config.disable_cpu_fallback,
+            "model": model,
         }
 
         if isinstance(model, ONNXModelHandler):
@@ -243,6 +244,7 @@ def _generate_context_binary(
         share_ep_contexts: bool = False,
         stop_share_ep_contexts: bool = False,
         ignore_missing_cb_bin: bool = False,
+        model: Optional[Union[ONNXModelHandler, CompositeModelHandler]] = None,
     ) -> ONNXModelHandler:
         """Generate context binary for the model.
 
@@ -271,7 +273,7 @@ def _generate_context_binary(
         if execution_provider == ExecutionProvider.QNNExecutionProvider:
             if str(device).lower() == "gpu":
                 provider_options["backend_path"] = "libQnnGpu.so" if platform.system() == "Linux" else "QnnGpu.dll"
-                update_llm_pipeline_genai_config_gpu_ctxbin(model_path)
+                update_llm_pipeline_genai_config_gpu_ctxbin(model, Path(output_model_path))
             else:
                 provider_options["backend_path"] = "libQnnHtp.so" if platform.system() == "Linux" else "QnnHtp.dll"
                 if share_ep_contexts:
diff --git a/olive/passes/onnx/static_llm.py b/olive/passes/onnx/static_llm.py
index b12ca5c34..294759aff 100644
--- a/olive/passes/onnx/static_llm.py
+++ b/olive/passes/onnx/static_llm.py
@@ -3,6 +3,7 @@
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
 import logging
+from copy import deepcopy
 from pathlib import Path
 
 import onnx
@@ -56,6 +57,11 @@ def _default_config(cls, accelerator_spec: AcceleratorSpec) -> dict[str, PassCon
                 default_value=64,
                 description="Input length of the context model.",
             ),
+            "context_iterator_models": PassConfigParam(
+                type_=bool,
+                default_value=True,
+                description=("To generate context and iterative models. Specifically for QNN GPU"),
+            ),
             "group_session_options": PassConfigParam(
                 type_=dict,
                 description=(
@@ -182,57 +188,146 @@ def process_context_iterator(component_models, llm_pipeline, output_dir):
         )
 
     def _run_qnn_gpu(self, model: ONNXModelHandler, config: type[BasePassConfig], output_model_path: Path):
+        """QNN_GPU path: generate one or more static ONNX models for different context lengths.
+
+        - If config.context_iterator_models is false: generate single model.
+        - If config.context_iterator_models is true: generate multiple models (ar1 and arN) and return CompositeModelHandler.
+        """
         output_model_dir = Path(output_model_path).with_suffix("")
         model_path = Path(model.model_path)
 
         # --- Step 1: Load model (handle both single and external data) ---
         try:
-            model_proto = onnx.load(model_path, load_external_data=True)
+            base_model_proto = onnx.load(model_path, load_external_data=True)
         except Exception as e:
             raise RuntimeError(f"Failed to load ONNX model: {e}") from e
 
-        # --- Step 2: Fix symbolic dimensions ---
-        batch_size, sequence_length = OnnxDAG(model_proto).get_io_shape("input_ids")
+        # --- Step 2: Get symbolic batch and sequence dims once ---
+        batch_size, sequence_length = OnnxDAG(base_model_proto).get_io_shape("input_ids")
         if not (isinstance(batch_size, str) and isinstance(sequence_length, str)):
             raise ValueError("Input dimensions must be symbolic before static shape fixing.")
 
-        param_mapping = {batch_size: config.batch_size, sequence_length: config.context_length}
-        self.fix_shape(model_proto, param_mapping)
+        context_iterator_models = getattr(config, "context_iterator_models", True)
 
-        # --- Step 3: Save model as external-data format ---
-        output_model_file = Path(output_model_dir) / "model.onnx"
-        external_data_file = Path(output_model_dir) / "model.onnx.data"
+        if not context_iterator_models:
+            # Single model mode
+            ctx_lengths_list = [int(config.context_length)]
+        else:
+            # Composite model mode → AR1 + AR-N
+            n = int(config.context_length)
+            ctx_lengths_list = [n, 1]
+
+        multiple = len(ctx_lengths_list) > 1
+
+        generated_handlers: dict[int, ONNXModelHandler] = {}
+        generated_names: dict[int, str] = {}
+
+        for ctx_len in ctx_lengths_list:
+            # --- Clone base model proto for this variant ---
+            model_proto = onnx.ModelProto()
+            model_proto.CopyFrom(base_model_proto)
+
+            # --- Step 3: Fix symbolic dimensions for this context length ---
+            param_mapping = {batch_size: config.batch_size, sequence_length: ctx_len}
+            self.fix_shape(model_proto, param_mapping)
+
+            add_version_metadata_to_model_proto(model_proto)
+
+            # --- Step 4: Save as external-data ONNX ---
+            # single model: "model", composite: "context" (AR-N) or "iterator" (AR-1)
+            if not multiple:
+                logical_name = "model"
+            elif ctx_len == 1:
+                logical_name = "iterator"
+            else:
+                logical_name = "context"
+            onnx_file_name = f"{logical_name}.onnx"
+            output_model_file = Path(output_model_dir) / onnx_file_name
+            external_data_file = Path(output_model_dir) / f"{onnx_file_name}.data"
+
+            output_model_dir.mkdir(parents=True, exist_ok=True)
+            onnx.save(
+                model_proto,
+                str(output_model_file),
+                save_as_external_data=True,
+                all_tensors_to_one_file=True,
+                location=external_data_file.name,
+                convert_attribute=False,
+            )
+
+            # Build handler for this static model
+            new_model_attributes = deepcopy(model.model_attributes) or {}
+            handler = ONNXModelHandler(
+                model_path=output_model_dir,
+                onnx_file_name=output_model_file.name,
+                model_attributes=new_model_attributes,
+            )
+
+            # Store handler + logical component name
+            generated_handlers[ctx_len] = handler
+            generated_names[ctx_len] = logical_name
+
+        # --- Step 5: Update genai_config.json ---
+        # For single model: pipeline with one component.
+        # For multiple models: pipeline with multiple components (composite).
+        if not multiple:
+            # Single context length
+            ctx_len = ctx_lengths_list[0]
+            handler = generated_handlers[ctx_len]
+
+            decoder_config_extra = {
+                "inputs": {
+                    "past_sequence_length": "past_seq_len",
+                    "total_sequence_length": "total_seq_len",
+                },
+                "sliding_window": {
+                    "window_size": ctx_len,
+                    "pad_value": 0,
+                    "alignment": "left",
+                    "slide_key_value_cache": False,
+                },
+            }
 
-        onnx.save(
-            model_proto,
-            str(output_model_file),
-            save_as_external_data=True,
-            all_tensors_to_one_file=True,
-            location=external_data_file.name,
-            convert_attribute=False,
+            return update_llm_pipeline_genai_config_gpu(
+                model=handler,
+                output_model_dir=output_model_dir,
+                decoder_config_extra=decoder_config_extra,
+                composite_components=None,
+            )
+
+        # Multiple context lengths -> wrap in CompositeModelHandler and create composite pipeline
+        components = []
+        component_names = []
+
+        for ctx_len, handler in generated_handlers.items():
+            components.append(handler)
+            component_names.append(generated_names[ctx_len])
+
+        new_model_attributes = deepcopy(model.model_attributes) or {}
+
+        composite = CompositeModelHandler(
+            model_components=components, model_component_names=component_names, model_attributes=new_model_attributes
         )
 
-        decoder_config_extra = {
+        # Build per-component sliding_window config keyed by name
+        composite_decoder_extra = {
             "inputs": {
                 "past_sequence_length": "past_seq_len",
                 "total_sequence_length": "total_seq_len",
             },
             "sliding_window": {
-                "window_size": config.context_length,
+                "window_size": max(ctx_lengths_list),
                 "pad_value": 0,
                 "alignment": "left",
                 "slide_key_value_cache": False,
             },
         }
 
-        input_model_path = model.model_path
-        model_static = ONNXModelHandler(model_path=output_model_dir, onnx_file_name=output_model_file.name)
-
         return update_llm_pipeline_genai_config_gpu(
-            model_static,
-            output_model_dir,
-            input_model_path,
-            decoder_config_extra,
+            model=composite,
+            output_model_dir=output_model_dir,
+            decoder_config_extra=composite_decoder_extra,
+            composite_components=list(zip(component_names, components)),
         )
 
     @staticmethod