Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ manifests: controller-gen yq
echo '{{- if .Values.kserve.localmodel.enabled }}'> charts/kserve-resources/templates/localmodelnode/role.yaml
cat config/rbac/localmodelnode/role.yaml >> charts/kserve-resources/templates/localmodelnode/role.yaml
echo '{{- end }}' >> charts/kserve-resources/templates/localmodelnode/role.yaml
# Copy the llmisvc templates
cp config/llmisvc/* charts/llmisvc-resources/templates/
rm charts/llmisvc-resources/templates/kustomization.yaml

@$(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt" paths=./pkg/apis/serving/v1alpha1
@$(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt" paths=./pkg/apis/serving/v1beta1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,22 +85,44 @@ spec:
type: object
parallelism:
properties:
data:
format: int32
type: integer
dataLocal:
format: int32
type: integer
dataRPCPort:
format: int32
type: integer
expert:
Comment thread
mholder6 marked this conversation as resolved.
type: boolean
pipeline:
Comment thread
mholder6 marked this conversation as resolved.
format: int64
format: int32
type: integer
tensor:
format: int64
format: int32
type: integer
type: object
prefill:
properties:
parallelism:
properties:
data:
format: int32
type: integer
dataLocal:
format: int32
type: integer
dataRPCPort:
format: int32
type: integer
expert:
type: boolean
Comment thread
mholder6 marked this conversation as resolved.
pipeline:
format: int64
format: int32
type: integer
tensor:
format: int64
format: int32
type: integer
type: object
replicas:
Expand Down Expand Up @@ -19538,8 +19560,6 @@ spec:
required:
- containers
type: object
required:
- model
type: object
type: object
served: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,22 +104,44 @@ spec:
type: object
parallelism:
properties:
data:
format: int32
type: integer
dataLocal:
format: int32
type: integer
dataRPCPort:
format: int32
type: integer
expert:
Comment thread
mholder6 marked this conversation as resolved.
type: boolean
pipeline:
format: int64
format: int32
type: integer
tensor:
format: int64
format: int32
type: integer
type: object
prefill:
properties:
parallelism:
properties:
data:
format: int32
type: integer
dataLocal:
format: int32
type: integer
dataRPCPort:
format: int32
type: integer
expert:
type: boolean
pipeline:
format: int64
format: int32
type: integer
tensor:
format: int64
format: int32
type: integer
type: object
replicas:
Expand Down Expand Up @@ -19557,8 +19579,6 @@ spec:
required:
- containers
type: object
required:
- model
type: object
status:
properties:
Expand Down
131 changes: 131 additions & 0 deletions charts/llmisvc-resources/templates/config-llm-decode-template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
apiVersion: serving.kserve.io/v1alpha1
kind: LLMInferenceServiceConfig
metadata:
name: kserve-config-llm-decode-template
spec:
template:
containers:
- image: ghcr.io/llm-d/llm-d:v0.2.0
imagePullPolicy: IfNotPresent
name: main
ports:
- containerPort: 8001
protocol: TCP
command:
- vllm
- serve
args:
- --served-model-name
- "{{ .Spec.Model.Name }}"
- --port
- "8001"
- --disable-log-requests
- --enable-ssl-refresh
- --ssl-certfile
- /etc/ssl/certs/tls.crt
- --ssl-keyfile
- /etc/ssl/certs/tls.key
env:
- name: HOME
value: /home
- name: VLLM_LOGGING_LEVEL
value: INFO
- name: HF_HUB_CACHE
value: /models
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- MKNOD
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: FallbackToLogsOnError
livenessProbe:
httpGet:
path: /health
port: 8001
scheme: HTTPS
initialDelaySeconds: 120
periodSeconds: 10
timeoutSeconds: 10
failureThreshold: 3
readinessProbe:
httpGet:
path: /health
port: 8001
scheme: HTTPS
initialDelaySeconds: 10
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 60
volumeMounts:
- mountPath: /home
name: home
- mountPath: /dev/shm
name: dshm
- mountPath: /models
name: model-cache
- mountPath: /etc/ssl/certs
name: tls-certs
readOnly: true
initContainers:
- name: llm-d-routing-sidecar
imagePullPolicy: IfNotPresent
image: ghcr.io/llm-d/llm-d-routing-sidecar:v0.2.0
restartPolicy: Always
Comment thread
mholder6 marked this conversation as resolved.
ports:
- containerPort: 8000
protocol: TCP
resources: { }
securityContext: { }
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: FallbackToLogsOnError
livenessProbe:
httpGet:
path: /health
port: 8000
scheme: HTTPS
initialDelaySeconds: 10
periodSeconds: 10
timeoutSeconds: 10
failureThreshold: 3
readinessProbe:
httpGet:
path: /health
port: 8000
scheme: HTTPS
initialDelaySeconds: 10
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 10
args:
- "--port=8000"
- "--vllm-port=8001"
- "--secure-proxy=true"
- "--cert-path=/etc/ssl/certs"
- "--decoder-use-tls=true"
- "--decoder-tls-insecure-skip-verify=true"
- "--prefiller-use-tls=true"
- "--prefiller-tls-insecure-skip-verify=true"
- "--enable-ssrf-protection=true"
volumeMounts:
- mountPath: /etc/ssl/certs
name: tls-certs
readOnly: true
env:
- name: INFERENCE_POOL_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
terminationGracePeriodSeconds: 30
volumes:
- emptyDir: { }
name: home
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: dshm
- emptyDir: { }
name: model-cache
- name: tls-certs
secret:
secretName: "{{ ChildName .ObjectMeta.Name `-kserve-self-signed-certs` }}"
Loading
Loading