|
- apiVersion: v1
- kind: Namespace
- metadata:
- labels:
- control-plane: kfserving-controller-manager
- controller-tools.k8s.io: "1.0"
- istio-injection: disabled
- name: kfserving-system
- ---
- apiVersion: v1
- kind: Namespace
- metadata:
- labels:
- istio-injection: disabled
- serving.kubeflow.org/inferenceservice: enabled
- name: kfserving-pod
- ---
- apiVersion: apiextensions.k8s.io/v1beta1
- kind: CustomResourceDefinition
- metadata:
- creationTimestamp: null
- labels:
- controller-tools.k8s.io: "1.0"
- name: inferenceservices.serving.kubeflow.org
- spec:
- additionalPrinterColumns:
- - JSONPath: .status.url
- name: URL
- type: string
- - JSONPath: .status.conditions[?(@.type=='Ready')].status
- name: Ready
- type: string
- - JSONPath: .status.traffic
- name: Default Traffic
- type: integer
- - JSONPath: .status.canaryTraffic
- name: Canary Traffic
- type: integer
- - JSONPath: .metadata.creationTimestamp
- name: Age
- type: date
- group: serving.kubeflow.org
- names:
- kind: InferenceService
- plural: inferenceservices
- shortNames:
- - inferenceservice
- scope: Namespaced
- validation:
- openAPIV3Schema:
- properties:
- apiVersion:
- description: 'APIVersion defines the versioned schema of this representation
- of an object. Servers should convert recognized schemas to the latest
- internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources'
- type: string
- kind:
- description: 'Kind is a string value representing the REST resource this
- object represents. Servers may infer this from the endpoint the client
- submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
- type: string
- metadata:
- type: object
- spec:
- properties:
- canary:
- description: Canary defines an alternate endpoints to route a percentage
- of traffic.
- properties:
- explainer:
- description: Explainer defines the model explanation service spec,
- explainer service calls to predictor or transformer if it is specified.
- properties:
- alibi:
- description: Spec for alibi explainer
- properties:
- config:
- description: Inline custom parameter settings for explainer
- type: object
- resources:
- description: Defaults to requests and limits of 1CPU, 2Gb
- MEM.
- type: object
- runtimeVersion:
- description: Defaults to latest Alibi Version
- type: string
- storageUri:
- description: The location of a trained explanation model
- type: string
- type:
- description: The type of Alibi explainer
- type: string
- required:
- - type
- type: object
- custom:
- description: Spec for a custom explainer
- properties:
- resources:
- description: Defaults to requests and limits of 1CPU, 2Gb
- MEM.
- type: object
- container:
- type: object
- required:
- - container
- type: object
- logger:
- description: Activate request/response logging
- properties:
- mode:
- description: What payloads to log
- type: string
- url:
- description: URL to send request logging CloudEvents
- type: string
- type: object
- maxReplicas:
- description: This is the up bound for autoscaler to scale to
- format: int64
- type: integer
- minReplicas:
- description: Minimum number of replicas, pods won't scale down
- to 0 in case of no traffic
- format: int64
- type: integer
- parallelism:
- description: Parallelism, how many requests can be processed
- concurrently
- format: int64
- type: integer
- serviceAccountName:
- description: ServiceAccountName is the name of the ServiceAccount
- to use to run the service
- type: string
- type: object
- predictor:
- description: Predictor defines the model serving spec +required
- properties:
- custom:
- description: Spec for a custom predictor
- properties:
- resources:
- description: Defaults to requests and limits of 1CPU, 2Gb
- MEM.
- type: object
- container:
- type: object
- required:
- - container
- type: object
- logger:
- description: Activate request/response logging
- properties:
- mode:
- description: What payloads to log
- type: string
- url:
- description: URL to send request logging CloudEvents
- type: string
- type: object
- maxReplicas:
- description: This is the up bound for autoscaler to scale to
- format: int64
- type: integer
- minReplicas:
- description: Minimum number of replicas, pods won't scale down
- to 0 in case of no traffic
- format: int64
- type: integer
- onnx:
- description: Spec for ONNX runtime (https://github.com/microsoft/onnxruntime)
- properties:
- resources:
- description: Defaults to requests and limits of 1CPU, 2Gb
- MEM.
- type: object
- runtimeVersion:
- description: Allowed runtime versions are specified in the
- inferenceservice config map
- type: string
- storageUri:
- description: The location of the trained model
- type: string
- required:
- - storageUri
- type: object
- parallelism:
- description: Parallelism, how many requests can be processed
- concurrently
- format: int64
- type: integer
- pytorch:
- description: Spec for PyTorch predictor
- properties:
- modelClassName:
- description: Defaults PyTorch model class name to 'PyTorchModel'
- type: string
- resources:
- description: Defaults to requests and limits of 1CPU, 2Gb
- MEM.
- type: object
- runtimeVersion:
- description: Allowed runtime versions are specified in the
- inferenceservice config map
- type: string
- storageUri:
- description: The location of the trained model
- type: string
- required:
- - storageUri
- type: object
- serviceAccountName:
- description: ServiceAccountName is the name of the ServiceAccount
- to use to run the service
- type: string
- sklearn:
- description: Spec for SKLearn predictor
- properties:
- resources:
- description: Defaults to requests and limits of 1CPU, 2Gb
- MEM.
- type: object
- runtimeVersion:
- description: Allowed runtime versions are specified in the
- inferenceservice config map
- type: string
- storageUri:
- description: The location of the trained model
- type: string
- required:
- - storageUri
- type: object
- tensorflow:
- description: Spec for Tensorflow Serving (https://github.com/tensorflow/serving)
- properties:
- resources:
- description: Defaults to requests and limits of 1CPU, 2Gb
- MEM.
- type: object
- runtimeVersion:
- description: Allowed runtime versions are specified in the
- inferenceservice config map.
- type: string
- storageUri:
- description: The location of the trained model
- type: string
- required:
- - storageUri
- type: object
- tensorrt:
- description: Spec for TensorRT Inference Server (https://github.com/NVIDIA/triton-inference-server)
- properties:
- resources:
- description: Defaults to requests and limits of 1CPU, 2Gb
- MEM.
- type: object
- runtimeVersion:
- description: Allowed runtime versions are specified in the
- inferenceservice config map
- type: string
- storageUri:
- description: The location of the trained model
- type: string
- required:
- - storageUri
- type: object
- xgboost:
- description: Spec for XGBoost predictor
- properties:
- nthread:
- description: Number of thread to be used by XGBoost
- format: int64
- type: integer
- resources:
- description: Defaults to requests and limits of 1CPU, 2Gb
- MEM.
- type: object
- runtimeVersion:
- description: Allowed runtime versions are specified in the
- inferenceservice config map
- type: string
- storageUri:
- description: The location of the trained model
- type: string
- required:
- - storageUri
- type: object
- type: object
- transformer:
- description: Transformer defines the pre/post processing before
- and after the predictor call, transformer service calls to predictor
- service.
- properties:
- custom:
- description: Spec for a custom transformer
- properties:
- container:
- type: object
- required:
- - container
- type: object
- logger:
- description: Activate request/response logging
- properties:
- mode:
- description: What payloads to log
- type: string
- url:
- description: URL to send request logging CloudEvents
- type: string
- type: object
- maxReplicas:
- description: This is the up bound for autoscaler to scale to
- format: int64
- type: integer
- minReplicas:
- description: Minimum number of replicas, pods won't scale down
- to 0 in case of no traffic
- format: int64
- type: integer
- parallelism:
- description: Parallelism, how many requests can be processed
- concurrently
- format: int64
- type: integer
- serviceAccountName:
- description: ServiceAccountName is the name of the ServiceAccount
- to use to run the service
- type: string
- type: object
- required:
- - predictor
- type: object
- canaryTrafficPercent:
- description: CanaryTrafficPercent defines the percentage of traffic
- going to canary InferenceService endpoints
- format: int64
- type: integer
- default:
- description: Default defines default InferenceService endpoints +required
- properties:
- explainer:
- description: Explainer defines the model explanation service spec,
- explainer service calls to predictor or transformer if it is specified.
- properties:
- alibi:
- description: Spec for alibi explainer
- properties:
- config:
- description: Inline custom parameter settings for explainer
- type: object
- resources:
- description: Defaults to requests and limits of 1CPU, 2Gb
- MEM.
- type: object
- runtimeVersion:
- description: Defaults to latest Alibi Version
- type: string
- storageUri:
- description: The location of a trained explanation model
- type: string
- type:
- description: The type of Alibi explainer
- type: string
- required:
- - type
- type: object
- custom:
- description: Spec for a custom explainer
- properties:
- resources:
- description: Defaults to requests and limits of 1CPU, 2Gb
- MEM.
- type: object
- container:
- type: object
- required:
- - container
- type: object
- logger:
- description: Activate request/response logging
- properties:
- mode:
- description: What payloads to log
- type: string
- url:
- description: URL to send request logging CloudEvents
- type: string
- type: object
- maxReplicas:
- description: This is the up bound for autoscaler to scale to
- format: int64
- type: integer
- minReplicas:
- description: Minimum number of replicas, pods won't scale down
- to 0 in case of no traffic
- format: int64
- type: integer
- parallelism:
- description: Parallelism, how many requests can be processed
- concurrently
- format: int64
- type: integer
- serviceAccountName:
- description: ServiceAccountName is the name of the ServiceAccount
- to use to run the service
- type: string
- type: object
- predictor:
- description: Predictor defines the model serving spec +required
- properties:
- custom:
- description: Spec for a custom predictor
- properties:
- resources:
- description: Defaults to requests and limits of 1CPU, 2Gb
- MEM.
- type: object
- container:
- type: object
- required:
- - container
- type: object
- logger:
- description: Activate request/response logging
- properties:
- mode:
- description: What payloads to log
- type: string
- url:
- description: URL to send request logging CloudEvents
- type: string
- type: object
- maxReplicas:
- description: This is the up bound for autoscaler to scale to
- format: int64
- type: integer
- minReplicas:
- description: Minimum number of replicas, pods won't scale down
- to 0 in case of no traffic
- format: int64
- type: integer
- onnx:
- description: Spec for ONNX runtime (https://github.com/microsoft/onnxruntime)
- properties:
- resources:
- description: Defaults to requests and limits of 1CPU, 2Gb
- MEM.
- type: object
- runtimeVersion:
- description: Allowed runtime versions are specified in the
- inferenceservice config map
- type: string
- storageUri:
- description: The location of the trained model
- type: string
- required:
- - storageUri
- type: object
- parallelism:
- description: Parallelism, how many requests can be processed
- concurrently
- format: int64
- type: integer
- pytorch:
- description: Spec for PyTorch predictor
- properties:
- modelClassName:
- description: Defaults PyTorch model class name to 'PyTorchModel'
- type: string
- resources:
- description: Defaults to requests and limits of 1CPU, 2Gb
- MEM.
- type: object
- runtimeVersion:
- description: Allowed runtime versions are specified in the
- inferenceservice config map
- type: string
- storageUri:
- description: The location of the trained model
- type: string
- required:
- - storageUri
- type: object
- serviceAccountName:
- description: ServiceAccountName is the name of the ServiceAccount
- to use to run the service
- type: string
- sklearn:
- description: Spec for SKLearn predictor
- properties:
- resources:
- description: Defaults to requests and limits of 1CPU, 2Gb
- MEM.
- type: object
- runtimeVersion:
- description: Allowed runtime versions are specified in the
- inferenceservice config map
- type: string
- storageUri:
- description: The location of the trained model
- type: string
- required:
- - storageUri
- type: object
- tensorflow:
- description: Spec for Tensorflow Serving (https://github.com/tensorflow/serving)
- properties:
- resources:
- description: Defaults to requests and limits of 1CPU, 2Gb
- MEM.
- type: object
- runtimeVersion:
- description: Allowed runtime versions are specified in the
- inferenceservice config map.
- type: string
- storageUri:
- description: The location of the trained model
- type: string
- required:
- - storageUri
- type: object
- tensorrt:
- description: Spec for TensorRT Inference Server (https://github.com/NVIDIA/triton-inference-server)
- properties:
- resources:
- description: Defaults to requests and limits of 1CPU, 2Gb
- MEM.
- type: object
- runtimeVersion:
- description: Allowed runtime versions are specified in the
- inferenceservice config map
- type: string
- storageUri:
- description: The location of the trained model
- type: string
- required:
- - storageUri
- type: object
- xgboost:
- description: Spec for XGBoost predictor
- properties:
- nthread:
- description: Number of thread to be used by XGBoost
- format: int64
- type: integer
- resources:
- description: Defaults to requests and limits of 1CPU, 2Gb
- MEM.
- type: object
- runtimeVersion:
- description: Allowed runtime versions are specified in the
- inferenceservice config map
- type: string
- storageUri:
- description: The location of the trained model
- type: string
- required:
- - storageUri
- type: object
- type: object
- transformer:
- description: Transformer defines the pre/post processing before
- and after the predictor call, transformer service calls to predictor
- service.
- properties:
- custom:
- description: Spec for a custom transformer
- properties:
- container:
- type: object
- required:
- - container
- type: object
- logger:
- description: Activate request/response logging
- properties:
- mode:
- description: What payloads to log
- type: string
- url:
- description: URL to send request logging CloudEvents
- type: string
- type: object
- maxReplicas:
- description: This is the up bound for autoscaler to scale to
- format: int64
- type: integer
- minReplicas:
- description: Minimum number of replicas, pods won't scale down
- to 0 in case of no traffic
- format: int64
- type: integer
- parallelism:
- description: Parallelism, how many requests can be processed
- concurrently
- format: int64
- type: integer
- serviceAccountName:
- description: ServiceAccountName is the name of the ServiceAccount
- to use to run the service
- type: string
- type: object
- required:
- - predictor
- type: object
- required:
- - default
- type: object
- status:
- properties:
- canary:
- description: Statuses for the canary endpoints of the InferenceService
- type: object
- canaryTraffic:
- description: Traffic percentage that goes to canary services
- format: int64
- type: integer
- conditions:
- description: Conditions the latest available observations of a resource's
- current state. +patchMergeKey=type +patchStrategy=merge
- items:
- properties:
- lastTransitionTime:
- description: LastTransitionTime is the last time the condition
- transitioned from one status to another. We use VolatileTime
- in place of metav1.Time to exclude this from creating equality.Semantic
- differences (all other things held constant).
- type: string
- message:
- description: A human readable message indicating details about
- the transition.
- type: string
- reason:
- description: The reason for the condition's last transition.
- type: string
- severity:
- description: Severity with which to treat failures of this type
- of condition. When this is not specified, it defaults to Error.
- type: string
- status:
- description: Status of the condition, one of True, False, Unknown.
- +required
- type: string
- type:
- description: Type of condition. +required
- type: string
- required:
- - type
- - status
- type: object
- type: array
- default:
- description: Statuses for the default endpoints of the InferenceService
- type: object
- observedGeneration:
- description: ObservedGeneration is the 'Generation' of the Service that
- was last processed by the controller.
- format: int64
- type: integer
- traffic:
- description: Traffic percentage that goes to default services
- format: int64
- type: integer
- url:
- description: URL of the InferenceService
- type: string
- type: object
- version: v1alpha2
- status:
- acceptedNames:
- kind: ""
- plural: ""
- conditions: []
- storedVersions: []
- ---
- apiVersion: rbac.authorization.k8s.io/v1
- kind: ClusterRole
- metadata:
- name: kfserving-proxy-role
- rules:
- - apiGroups:
- - authentication.k8s.io
- resources:
- - tokenreviews
- verbs:
- - create
- - apiGroups:
- - authorization.k8s.io
- resources:
- - subjectaccessreviews
- verbs:
- - create
- ---
- apiVersion: rbac.authorization.k8s.io/v1
- kind: ClusterRole
- metadata:
- creationTimestamp: null
- name: manager-role
- rules:
- - apiGroups:
- - serving.knative.dev
- resources:
- - services
- verbs:
- - get
- - list
- - watch
- - create
- - update
- - patch
- - delete
- - apiGroups:
- - serving.knative.dev
- resources:
- - services/status
- verbs:
- - get
- - update
- - patch
- - apiGroups:
- - networking.istio.io
- resources:
- - virtualservices
- verbs:
- - get
- - list
- - watch
- - create
- - update
- - patch
- - delete
- - apiGroups:
- - networking.istio.io
- resources:
- - virtualservices/status
- verbs:
- - get
- - update
- - patch
- - apiGroups:
- - serving.kubeflow.org
- resources:
- - inferenceservices
- verbs:
- - get
- - list
- - watch
- - create
- - update
- - patch
- - delete
- - apiGroups:
- - serving.kubeflow.org
- resources:
- - inferenceservices/status
- verbs:
- - get
- - update
- - patch
- - apiGroups:
- - ""
- resources:
- - serviceaccounts
- verbs:
- - get
- - list
- - watch
- - apiGroups:
- - ""
- resources:
- - secrets
- verbs:
- - get
- - list
- - watch
- - apiGroups:
- - ""
- resources:
- - configmaps
- verbs:
- - get
- - list
- - watch
- - apiGroups:
- - admissionregistration.k8s.io
- resources:
- - mutatingwebhookconfigurations
- - validatingwebhookconfigurations
- verbs:
- - get
- - list
- - watch
- - create
- - update
- - patch
- - delete
- - apiGroups:
- - ""
- resources:
- - secrets
- verbs:
- - get
- - list
- - watch
- - create
- - update
- - patch
- - delete
- - apiGroups:
- - ""
- resources:
- - services
- verbs:
- - get
- - list
- - watch
- - create
- - update
- - patch
- - delete
- - apiGroups:
- - ""
- resources:
- - namespaces
- verbs:
- - get
- - list
- - watch
- ---
- apiVersion: rbac.authorization.k8s.io/v1
- kind: ClusterRoleBinding
- metadata:
- name: kfserving-proxy-rolebinding
- roleRef:
- apiGroup: rbac.authorization.k8s.io
- kind: ClusterRole
- name: kfserving-proxy-role
- subjects:
- - kind: ServiceAccount
- name: default
- namespace: kfserving-system
- ---
- apiVersion: rbac.authorization.k8s.io/v1
- kind: ClusterRoleBinding
- metadata:
- creationTimestamp: null
- name: manager-rolebinding
- roleRef:
- apiGroup: rbac.authorization.k8s.io
- kind: ClusterRole
- name: manager-role
- subjects:
- - kind: ServiceAccount
- name: default
- namespace: kfserving-system
- ---
- apiVersion: v1
- data:
- credentials: |-
- {
- "gcs": {
- "gcsCredentialFileName": "gcloud-application-credentials.json"
- },
- "s3": {
- "s3AccessKeyIDName": "awsAccessKeyID",
- "s3SecretAccessKeyName": "awsSecretAccessKey"
- }
- }
- explainers: |-
- {
- "alibi": {
- "image" : "harbor.test.cn:8443/18test/",
- "defaultImageVersion": "0.2.2",
- "allowedImageVersions": [
- "0.2.2"
- ]
- }
- }
- ingress: |-
- {
- "ingressGateway" : "harbor.test.cn:8443/18test/",
- "ingressService" : "istio-ingressgateway.istio-system.svc.cluster.local"
- }
- logger: |-
- {
- "image": "harbor.test.cn:8443/18test/",
- "memoryRequest": "100Mi",
- "memoryLimit": "1Gi",
- "cpuRequest": "100m",
- "cpuLimit": "1"
- }
- predictors: |-
- {
- "tensorflow": {
- "image": "harbor.test.cn:8443/18test/apulistech/tensorflow-serving",
- "defaultImageVersion": "2.2.0",
- "defaultGpuImageVersion": "2.2.0-gpu",
- "allowedImageVersions": [
- "1.15.0",
- "1.15.0-arm64_910",
- "1.15.0-gpu",
- "2.2.0",
- "2.2.0-arm64_910",
- "2.2.0-gpu"
- ]
- },
- "apulisVision": {
- "image": "harbor.test.cn:8443/18test/apulistech/apulisvision",
- "defaultImageVersion": "1.0.0",
- "allowedImageVersions": [
- "1.0.0"
- ]
- },
- "mindspore-serving": {
- "image": "harbor.test.cn:8443/18test/apulistech/mindspore-serving",
- "defaultImageVersion": "centerface-amd64_310",
- "allowedImageVersions": [ "yolov3-arm64_910","centerface-amd64_310","yolov3-amd64_910" ]
- }
-
- }
- storageInitializer: |-
- {
- "image": "harbor.test.cn:8443/18test/apulistech/kfserving-storage-initializer",
- "memoryRequest": "100Mi",
- "memoryLimit": "1Gi",
- "cpuRequest": "100m",
- "cpuLimit": "1"
- }
- transformers: |-
- {
- }
- kind: ConfigMap
- metadata:
- name: inferenceservice-config
- namespace: kfserving-system
- ---
- apiVersion: v1
- kind: Secret
- metadata:
- name: kfserving-webhook-server-secret
- namespace: kfserving-system
- ---
- apiVersion: v1
- kind: Service
- metadata:
- annotations:
- prometheus.io/port: "8443"
- prometheus.io/scheme: https
- prometheus.io/scrape: "true"
- labels:
- control-plane: controller-manager
- controller-tools.k8s.io: "1.0"
- name: kfserving-controller-manager-metrics-service
- namespace: kfserving-system
- spec:
- ports:
- - name: https
- port: 8443
- targetPort: https
- selector:
- control-plane: controller-manager
- controller-tools.k8s.io: "1.0"
- ---
- apiVersion: v1
- kind: Service
- metadata:
- labels:
- control-plane: kfserving-controller-manager
- controller-tools.k8s.io: "1.0"
- name: kfserving-controller-manager-service
- namespace: kfserving-system
- spec:
- ports:
- - port: 443
- selector:
- control-plane: kfserving-controller-manager
- controller-tools.k8s.io: "1.0"
- ---
- apiVersion: apps/v1
- kind: StatefulSet
- metadata:
- labels:
- control-plane: kfserving-controller-manager
- controller-tools.k8s.io: "1.0"
- name: kfserving-controller-manager-amd64
- namespace: kfserving-system
- spec:
- selector:
- matchLabels:
- control-plane: kfserving-controller-manager
- controller-tools.k8s.io: "1.0"
- serviceName: controller-manager-service
- template:
- metadata:
- labels:
- control-plane: kfserving-controller-manager
- controller-tools.k8s.io: "1.0"
- spec:
- nodeSelector:
- restfulapi: active
- archType: amd64
- tolerations:
- - key: CriticalAddonsOnly
- operator: Exists
- - key: node-role.kubernetes.io/master
- effect: NoSchedule
- containers:
- - args:
- - --secure-listen-address=0.0.0.0:8443
- - --upstream=http://127.0.0.1:8080/
- - --logtostderr=true
- - --v=10
- image: harbor.apulis.cn:8443/aiarts_v1.6.0_rc4_single/apulistech/kfserving-kube-rbac-proxy:latest
-
- imagePullPolicy: IfNotPresent
- name: kube-rbac-proxy
- ports:
- - containerPort: 8443
- name: https
- - args:
- - --metrics-addr=127.0.0.1:8080
- command:
- - /manager
- env:
- - name: POD_NAMESPACE
- valueFrom:
- fieldRef:
- fieldPath: metadata.namespace
- - name: SECRET_NAME
- value: kfserving-webhook-server-secret
- - name: ENABLE_WEBHOOK_NAMESPACE_SELECTOR
- value: enabled
- image: harbor.apulis.cn:8443/aiarts_v1.6.0_rc4_single/apulistech/kfserving-manager:latest
-
- imagePullPolicy: IfNotPresent
- name: manager
- ports:
- - containerPort: 9876
- name: webhook-server
- protocol: TCP
- resources:
- limits:
- cpu: 100m
- memory: 300Mi
- requests:
- cpu: 100m
- memory: 200Mi
- volumeMounts:
- - mountPath: /tmp/cert
- name: cert
- readOnly: true
- terminationGracePeriodSeconds: 10
- volumes:
- - name: cert
- secret:
- defaultMode: 420
- secretName: kfserving-webhook-server-secret
|