跳到内容

源文件 examples/online_serving/chart-helm

Helm Charts

此目录包含用于部署 vllm 应用程序的 Helm chart。该 chart 包括部署、自动扩缩、资源管理等配置。

文件

  • Chart.yaml: 定义 chart 元数据,包括名称、版本和维护者。
  • ct.yaml: 用于 chart 测试的配置。
  • lintconf.yaml: YAML 文件的 Linting 规则。
  • values.schema.json: 用于验证 values.yaml 的 JSON 模式。
  • values.yaml: Helm chart 的默认值。
  • templates/_helpers.tpl: 用于定义常用配置的辅助模板。
  • templates/configmap.yaml: 用于创建 ConfigMap 的模板。
  • templates/custom-objects.yaml: 用于自定义 Kubernetes 对象的模板。
  • templates/deployment.yaml: 用于创建 Deployments 的模板。
  • templates/hpa.yaml: 用于 Horizontal Pod Autoscaler 的模板。
  • templates/job.yaml: 用于 Kubernetes Jobs 的模板。
  • templates/poddisruptionbudget.yaml: 用于 Pod Disruption Budget 的模板。
  • templates/pvc.yaml: 用于 Persistent Volume Claims 的模板。
  • templates/secrets.yaml: 用于 Kubernetes Secrets 的模板。
  • templates/service.yaml: 用于创建 Services 的模板。

示例材料

.helmignore
*.png
.git/
ct.yaml
lintconf.yaml
values.schema.json
/workflows
Chart.yaml
apiVersion: v2
name: chart-vllm
description: Chart vllm

# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application

# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.0.1

maintainers:
  - name: mfournioux
ct.yaml
chart-dirs:
  - charts
validate-maintainers: false
lintconf.yaml
---
rules:
  braces:
    min-spaces-inside: 0
    max-spaces-inside: 0
    min-spaces-inside-empty: -1
    max-spaces-inside-empty: -1
  brackets:
    min-spaces-inside: 0
    max-spaces-inside: 0
    min-spaces-inside-empty: -1
    max-spaces-inside-empty: -1
  colons:
    max-spaces-before: 0
    max-spaces-after: 1
  commas:
    max-spaces-before: 0
    min-spaces-after: 1
    max-spaces-after: 1
  comments:
    require-starting-space: true
    min-spaces-from-content: 2
  document-end: disable
  document-start: disable           # No --- to start a file
  empty-lines:
    max: 2
    max-start: 0
    max-end: 0
  hyphens:
    max-spaces-after: 1
  indentation:
    spaces: consistent
    indent-sequences: whatever      # - list indentation will handle both indentation and without
    check-multi-line-strings: false
  key-duplicates: enable
  line-length: disable              # Lines can be any length
  new-line-at-end-of-file: disable
  new-lines:
    type: unix
  trailing-spaces: enable
  truthy:
    level: warning
templates/_helpers.tpl
{{/*
Define ports for the pods
*/}}
{{- define "chart.container-port" -}}
{{-  default "8000" .Values.containerPort }}
{{- end }}

{{/*
Define service name
*/}}
{{- define "chart.service-name" -}}
{{-  if .Values.serviceName }}
{{-    .Values.serviceName | lower | trim }}
{{-  else }}
"{{ .Release.Name }}-service"
{{-  end }}
{{- end }}

{{/*
Define service port
*/}}
{{- define "chart.service-port" -}}
{{-  if .Values.servicePort }}
{{-    .Values.servicePort }}
{{-  else }}
{{-    include "chart.container-port" . }}
{{-  end }}
{{- end }}

{{/*
Define service port name
*/}}
{{- define "chart.service-port-name" -}}
"service-port"
{{- end }}

{{/*
Define container port name
*/}}
{{- define "chart.container-port-name" -}}
"container-port"
{{- end }}

{{/*
Define deployment strategy
*/}}
{{- define "chart.strategy" -}}
strategy:
{{-   if not .Values.deploymentStrategy }}
  rollingUpdate:
    maxSurge: 100%
    maxUnavailable: 0
{{-   else }}
{{      toYaml .Values.deploymentStrategy | indent 2 }}
{{-   end }}
{{- end }}

{{/*
Define additional ports
*/}}
{{- define "chart.extraPorts" }}
{{-   with .Values.extraPorts }}
{{      toYaml . }}
{{-   end }}
{{- end }}

{{/*
Define chart external ConfigMaps and Secrets
*/}}
{{- define "chart.externalConfigs" -}}
{{-   with .Values.externalConfigs -}}
{{      toYaml . }}
{{-   end }}
{{- end }}


{{/*
Define liveness et readiness probes
*/}}
{{- define "chart.probes" -}}
{{-   if .Values.readinessProbe  }}
readinessProbe:
{{-     with .Values.readinessProbe }}
{{-       toYaml . | nindent 2 }}
{{-     end }}
{{-   end }}
{{-   if .Values.livenessProbe  }}
livenessProbe:
{{-     with .Values.livenessProbe }}
{{-       toYaml . | nindent 2 }}
{{-     end }}
{{-   end }}
{{- end }}

{{/*
Define resources
*/}}
{{- define "chart.resources" -}}
requests:
  memory: {{ required "Value 'resources.requests.memory' must be defined !" .Values.resources.requests.memory | quote }}
  cpu: {{ required "Value 'resources.requests.cpu' must be defined !" .Values.resources.requests.cpu | quote }}
  {{- if and (gt (int (index .Values.resources.requests "nvidia.com/gpu")) 0) (gt (int (index .Values.resources.limits "nvidia.com/gpu")) 0) }}
  nvidia.com/gpu: {{ required "Value 'resources.requests.nvidia.com/gpu' must be defined !" (index .Values.resources.requests "nvidia.com/gpu") | quote }}
  {{- end }}
limits:
  memory: {{ required "Value 'resources.limits.memory' must be defined !" .Values.resources.limits.memory | quote }}
  cpu: {{ required "Value 'resources.limits.cpu' must be defined !" .Values.resources.limits.cpu | quote }}
  {{- if and (gt (int (index .Values.resources.requests "nvidia.com/gpu")) 0) (gt (int (index .Values.resources.limits "nvidia.com/gpu")) 0) }}
  nvidia.com/gpu: {{ required "Value 'resources.limits.nvidia.com/gpu' must be defined !" (index .Values.resources.limits "nvidia.com/gpu") | quote }}
  {{- end }}
{{- end }}


{{/*
Define User used for the main container
*/}}
{{- define "chart.user" }}
{{-   if .Values.image.runAsUser  }}
runAsUser: 
{{-     with .Values.runAsUser }}
{{-       toYaml . | nindent 2 }}
{{-     end }}
{{-   end }}
{{- end }}

{{- define "chart.extraInitImage" -}}
"amazon/aws-cli:2.6.4"
{{- end }}

{{- define "chart.extraInitEnv" -}}
- name: S3_ENDPOINT_URL
  valueFrom:
    secretKeyRef:
      name: {{ .Release.Name }}-secrets
      key: s3endpoint
- name: S3_BUCKET_NAME
  valueFrom:
    secretKeyRef:
      name: {{ .Release.Name }}-secrets
      key: s3bucketname
- name: AWS_ACCESS_KEY_ID
  valueFrom:
    secretKeyRef:
      name: {{ .Release.Name }}-secrets
      key: s3accesskeyid
- name: AWS_SECRET_ACCESS_KEY
  valueFrom:
    secretKeyRef:
      name: {{ .Release.Name }}-secrets
      key: s3accesskey
- name: S3_PATH
  value: "{{ .Values.extraInit.s3modelpath }}"
- name: AWS_EC2_METADATA_DISABLED
  value: "{{ .Values.extraInit.awsEc2MetadataDisabled }}"
{{- end }}

{{/*
  Define chart labels
*/}}
{{- define "chart.labels" -}}
{{-   with .Values.labels -}}
{{      toYaml . }}
{{-   end }}
{{- end }}
templates/configmap.yaml
{{- if .Values.configs -}}
apiVersion: v1
kind: ConfigMap
metadata:
  name: "{{ .Release.Name }}-configs"
  namespace: {{ .Release.Namespace }}
data:
  {{- with .Values.configs }}
  {{- toYaml . | nindent 2 }}
  {{- end }}
{{- end -}}
templates/custom-objects.yaml
{{- if .Values.customObjects }}
{{- range .Values.customObjects }}
{{- tpl (. | toYaml) $ }}
---
{{- end }}
{{- end }}
templates/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: "{{ .Release.Name }}-deployment-vllm"
  namespace: {{ .Release.Namespace }}
  labels:
  {{- include "chart.labels" . | nindent 4 }}
spec:
  replicas: {{ .Values.replicaCount }}
  {{- include "chart.strategy" . | nindent 2 }}
  selector:                                                                                                                                  
    matchLabels:
      environment: "test"
      release: "test"
  progressDeadlineSeconds: 1200
  template:
    metadata:
      labels:
        environment: "test"
        release: "test"
    spec:
      containers:
        - name: "vllm"
          image: "{{ required "Required value 'image.repository' must be defined !" .Values.image.repository }}:{{ required "Required value 'image.tag' must be defined !" .Values.image.tag }}"
          {{- if .Values.image.command }}
          command :
            {{- with .Values.image.command }}
            {{- toYaml . | nindent 10 }}
            {{- end }}
          {{- end }}
          securityContext:
            {{- if .Values.image.securityContext }}
              {{- with .Values.image.securityContext }}
              {{- toYaml . | nindent 12 }}
              {{- end }}
            {{- else }}
            runAsNonRoot: false
              {{- include "chart.user" . | indent 12 }}
            {{- end }}
          imagePullPolicy: IfNotPresent
          {{- if .Values.image.env }}
          env :
            {{- with .Values.image.env }}
            {{- toYaml . | nindent 10 }}
            {{- end }}
          {{- else }}
          env: []
          {{- end }}
          {{- if or .Values.externalConfigs .Values.configs .Values.secrets }}
          envFrom:
            {{- if .Values.configs }}
            - configMapRef:
                name: "{{ .Release.Name }}-configs"
            {{- end }}
            {{- if .Values.secrets}}
            - secretRef:
                name: "{{ .Release.Name }}-secrets"
            {{- end }}
            {{- include "chart.externalConfigs" . | nindent 12 }}
          {{- end }}          
          ports:
            - name: {{ include "chart.container-port-name" . }}
              containerPort: {{ include "chart.container-port" . }}
            {{- include "chart.extraPorts" . | nindent 12 }}
          {{- include "chart.probes" . | indent 10 }}
          resources: {{- include "chart.resources" . | nindent 12 }}
          volumeMounts:
          - name: {{ .Release.Name }}-storage
            mountPath: /data

        {{- with .Values.extraContainers }}
        {{ toYaml . | nindent 8 }}
        {{- end }}

      {{-   if .Values.extraInit  }}
      initContainers:
      - name: wait-download-model
        image: {{ include "chart.extraInitImage" . }}
        command: 
          - /bin/bash
        args:
          - -eucx
          - while aws --endpoint-url $S3_ENDPOINT_URL s3 sync --dryrun s3://$S3_BUCKET_NAME/$S3_PATH /data | grep -q download; do sleep 10; done
        env: {{- include "chart.extraInitEnv" . | nindent 10 }}
        resources:
          requests:
            cpu: 200m
            memory: 1Gi
          limits:
            cpu: 500m
            memory: 2Gi
        volumeMounts:
        - name: {{ .Release.Name }}-storage
          mountPath: /data
      {{- end }}
      volumes:
        - name: {{ .Release.Name }}-storage
          persistentVolumeClaim:
            claimName: {{ .Release.Name }}-storage-claim     

      {{- with .Values.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.tolerations }}
      tolerations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- if and (gt (int (index .Values.resources.requests "nvidia.com/gpu")) 0) (gt (int (index .Values.resources.limits "nvidia.com/gpu")) 0) }}
      runtimeClassName: nvidia
      affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
              - matchExpressions:
                - key: nvidia.com/gpu.product
                  operator: In
                  {{- with .Values.gpuModels }}
                  values:
                    {{- toYaml . | nindent 20 }}
                  {{- end }}
      {{- end }} 
templates/hpa.yaml
{{- if .Values.autoscaling.enabled }}
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: "{{ .Release.Name }}-hpa"
  namespace: {{ .Release.Namespace }}
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: vllm
  minReplicas: {{ .Values.autoscaling.minReplicas }}
  maxReplicas: {{ .Values.autoscaling.maxReplicas }}
  metrics:
    {{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
    - type: Resource
      resource:
        name: cpu
        target:
          type: Utilization
          averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
    {{- end }}
    {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
    - type: Resource
      resource:
        name: memory
        target:
          type: Utilization
          averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
    {{- end }}
{{- end }}
templates/job.yaml
{{-   if .Values.extraInit  }}
apiVersion: batch/v1
kind: Job
metadata:
  name: "{{ .Release.Name }}-init-vllm"
  namespace: {{ .Release.Namespace }}
spec:
  ttlSecondsAfterFinished: 100
  template:
   metadata:
     name: init-vllm
   spec:
    containers:
    - name: job-download-model
      image: {{ include "chart.extraInitImage" . }}
      command: 
        - /bin/bash
      args:
        - -eucx
        - aws --endpoint-url $S3_ENDPOINT_URL s3 sync s3://$S3_BUCKET_NAME/$S3_PATH /data
      env: {{- include "chart.extraInitEnv" . | nindent 8 }}
      volumeMounts:
        - name: {{ .Release.Name }}-storage
          mountPath: /data
      resources:
        requests:
          cpu: 200m
          memory: 1Gi
        limits:
          cpu: 500m
          memory: 2Gi
    restartPolicy: OnFailure
    volumes:
    - name: {{ .Release.Name }}-storage
      persistentVolumeClaim:
        claimName: "{{ .Release.Name }}-storage-claim"
{{- end }}
templates/poddisruptionbudget.yaml
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
  name: "{{ .Release.Name }}-pdb"
  namespace: {{ .Release.Namespace }}
spec:
  maxUnavailable: {{ default 1 .Values.maxUnavailablePodDisruptionBudget }}
templates/pvc.yaml
{{-   if .Values.extraInit  }}
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: "{{ .Release.Name }}-storage-claim"
  namespace: {{ .Release.Namespace }}
spec:
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: {{ .Values.extraInit.pvcStorage }}
{{- end }}
templates/secrets.yaml
apiVersion: v1
kind: Secret
metadata:
  name: "{{ .Release.Name }}-secrets"
  namespace: {{ .Release.Namespace }}
type: Opaque
data:
  {{- range $key, $val := .Values.secrets }}
  {{ $key }}: {{ $val | b64enc | quote }}
  {{- end }}
templates/service.yaml
apiVersion: v1
kind: Service
metadata:
  name: "{{ .Release.Name }}-service"
  namespace: {{ .Release.Namespace }}
spec:
  type: ClusterIP
  ports:
    - name: {{ include "chart.service-port-name" . }}
      port: {{ include "chart.service-port" . }}
      targetPort: {{ include "chart.container-port-name" . }}
      protocol: TCP
  selector:
  {{- include "chart.labels" . | nindent 4 }}
values.schema.json
{
    "$schema": "https://json-schema.fullstack.org.cn/schema#",
    "type": "object",
    "properties": {
        "image": {
            "type": "object",
            "properties": {
                "repository": {
                    "type": "string"
                },
                "tag": {
                    "type": "string"
                },
                "command": {
                    "type": "array",
                    "items": {
                        "type": "string"
                    }
                }
            },
            "required": [
                "command",
                "repository",
                "tag"
            ]
        },
        "containerPort": {
            "type": "integer"
        },
        "serviceName": {
            "type": "null"
        },
        "servicePort": {
            "type": "integer"
        },
        "extraPorts": {
            "type": "array"
        },
        "replicaCount": {
            "type": "integer"
        },
        "deploymentStrategy": {
            "type": "object"
        },
        "resources": {
            "type": "object",
            "properties": {
                "requests": {
                    "type": "object",
                    "properties": {
                        "cpu": {
                            "type": "integer"
                        },
                        "memory": {
                            "type": "string"
                        },
                        "nvidia.com/gpu": {
                            "type": "integer"
                        }
                    },
                    "required": [
                        "cpu",
                        "memory",
                        "nvidia.com/gpu"
                    ]
                },
                "limits": {
                    "type": "object",
                    "properties": {
                        "cpu": {
                            "type": "integer"
                        },
                        "memory": {
                            "type": "string"
                        },
                        "nvidia.com/gpu": {
                            "type": "integer"
                        }
                    },
                    "required": [
                        "cpu",
                        "memory",
                        "nvidia.com/gpu"
                    ]
                }
            },
            "required": [
                "limits",
                "requests"
            ]
        },
        "gpuModels": {
            "type": "array",
            "items": {
                "type": "string"
            }
        },
        "autoscaling": {
            "type": "object",
            "properties": {
                "enabled": {
                    "type": "boolean"
                },
                "minReplicas": {
                    "type": "integer"
                },
                "maxReplicas": {
                    "type": "integer"
                },
                "targetCPUUtilizationPercentage": {
                    "type": "integer"
                }
            },
            "required": [
                "enabled",
                "maxReplicas",
                "minReplicas",
                "targetCPUUtilizationPercentage"
            ]
        },
        "configs": {
            "type": "object"
        },
        "secrets": {
            "type": "object"
        },
        "externalConfigs": {
            "type": "array"
        },
        "customObjects": {
            "type": "array"
        },
        "maxUnavailablePodDisruptionBudget": {
            "type": "string"
        },
        "extraInit": {
            "type": "object",
            "properties": {
                "s3modelpath": {
                    "type": "string"
                },
                "pvcStorage": {
                    "type": "string"
                },
                "awsEc2MetadataDisabled": {
                    "type": "boolean"
                }
            },
            "required": [
                "pvcStorage",
                "s3modelpath",
                "awsEc2MetadataDisabled"
            ]
        },
        "extraContainers": {
            "type": "array"
        },
        "readinessProbe": {
            "type": "object",
            "properties": {
                "initialDelaySeconds": {
                    "type": "integer"
                },
                "periodSeconds": {
                    "type": "integer"
                },
                "failureThreshold": {
                    "type": "integer"
                },
                "httpGet": {
                    "type": "object",
                    "properties": {
                        "path": {
                            "type": "string"
                        },
                        "port": {
                            "type": "integer"
                        }
                    },
                    "required": [
                        "path",
                        "port"
                    ]
                }
            },
            "required": [
                "failureThreshold",
                "httpGet",
                "initialDelaySeconds",
                "periodSeconds"
            ]
        },
        "livenessProbe": {
            "type": "object",
            "properties": {
                "initialDelaySeconds": {
                    "type": "integer"
                },
                "failureThreshold": {
                    "type": "integer"
                },
                "periodSeconds": {
                    "type": "integer"
                },
                "httpGet": {
                    "type": "object",
                    "properties": {
                        "path": {
                            "type": "string"
                        },
                        "port": {
                            "type": "integer"
                        }
                    },
                    "required": [
                        "path",
                        "port"
                    ]
                }
            },
            "required": [
                "failureThreshold",
                "httpGet",
                "initialDelaySeconds",
                "periodSeconds"
            ]
        },
        "labels": {
            "type": "object",
            "properties": {
                "environment": {
                    "type": "string"
                },
                "release": {
                    "type": "string"
                }
            },
            "required": [
                "environment",
                "release"
            ]
        }
    },
    "required": [
        "autoscaling",
        "configs",
        "containerPort",
        "customObjects",
        "deploymentStrategy",
        "externalConfigs",
        "extraContainers",
        "extraInit",
        "extraPorts",
        "gpuModels",
        "image",
        "labels",
        "livenessProbe",
        "maxUnavailablePodDisruptionBudget",
        "readinessProbe",
        "replicaCount",
        "resources",
        "secrets",
        "servicePort"
    ]
}
values.yaml
# -- Default values for chart vllm
# -- Declare variables to be passed into your templates.

# -- Image configuration
image:
  # -- Image repository
  repository: "vllm/vllm-openai"
  # -- Image tag
  tag: "latest"
  # -- Container launch command
  command: ["vllm", "serve", "/data/", "--served-model-name", "opt-125m", "--dtype", "float32", "--block-size", "16", "--host", "0.0.0.0", "--port", "8000"]

# -- Container port
containerPort: 8000
# -- Service name
serviceName:
# -- Service port
servicePort: 80
# -- Additional ports configuration
extraPorts: []

# -- Number of replicas
replicaCount: 1

# -- Deployment strategy configuration
deploymentStrategy: {}

# -- Resource configuration
resources:
  requests:
    # -- Number of CPUs
    cpu: 4
    # -- CPU memory configuration
    memory: 16Gi
    # -- Number of gpus used
    nvidia.com/gpu: 1
  limits:
    # -- Number of CPUs
    cpu: 4
    # -- CPU memory configuration
    memory: 16Gi
    # -- Number of gpus used
    nvidia.com/gpu: 1

# -- Type of gpu used
gpuModels:
  - "TYPE_GPU_USED"

# -- Autoscaling configuration
autoscaling:
  # -- Enable autoscaling
  enabled: false
  # -- Minimum replicas
  minReplicas: 1
  # -- Maximum replicas
  maxReplicas: 100
  # -- Target CPU utilization for autoscaling
  targetCPUUtilizationPercentage: 80
  # targetMemoryUtilizationPercentage: 80

# -- Configmap
configs: {}

# -- Secrets configuration
secrets: {}

# -- External configuration
externalConfigs: []

# -- Custom Objects configuration
customObjects: []

# -- Disruption Budget Configuration
maxUnavailablePodDisruptionBudget: ""

# -- Additional configuration for the init container
extraInit:
   # -- Path of the model on the s3 which hosts model weights and config files
  s3modelpath: "relative_s3_model_path/opt-125m"
   # -- Storage size of the s3
  pvcStorage: "1Gi"
  awsEc2MetadataDisabled: true

# -- Additional containers configuration
extraContainers: []

# -- Readiness probe configuration
readinessProbe:
  # -- Number of seconds after the container has started before readiness probe is initiated
  initialDelaySeconds: 5
  # -- How often (in seconds) to perform the readiness probe
  periodSeconds: 5
  # -- Number of times after which if a probe fails in a row, Kubernetes considers that the overall check has failed: the container is not ready
  failureThreshold: 3
   # -- Configuration of the Kubelet http request on the server
  httpGet:
    # -- Path to access on the HTTP server
    path: /health
    # -- Name or number of the port to access on the container, on which the server is listening
    port: 8000

# -- Liveness probe configuration
livenessProbe:
 # -- Number of seconds after the container has started before liveness probe is initiated
  initialDelaySeconds: 15
  # -- Number of times after which if a probe fails in a row, Kubernetes considers that the overall check has failed: the container is not alive
  failureThreshold: 3
  # -- How often (in seconds) to perform the liveness probe
  periodSeconds: 10
  # -- Configuration of the Kubelet http request on the server
  httpGet:
    # -- Path to access on the HTTP server
    path: /health
    # -- Name or number of the port to access on the container, on which the server is listening
    port: 8000

labels:
  environment: "test"
  release: "test"