NVIDIA TensorRT MNIST Example with Triton Inference Server — seldon-core documentation (original) (raw)
This example shows how you can deploy a TensorRT model with NVIDIA Triton Server. In this case we use a prebuilt TensorRT model for NVIDIA v100 GPUs.
Note this example requires some advanced setup and is directed for those with tensorRT experience.
Prerequisites¶
- Install requirements in
requirements.txt
- An authorized kubernetes cluster with V100 GPUs installed and configured.
- For GKE see GKE GPU Documentation
- Install Seldon Core and install Ambassador and port-forward to Ambassador on localhost:8003
This example uses the KFServing protocol supported by Triton Infernence Server which Seldon also supports.
%matplotlib inline import json
import numpy as np import tensorflow as tf import tensorflow_datasets as tfds from matplotlib import pyplot as plt
def gen_image(arr): two_d = (np.reshape(arr, (28, 28)) * 255).astype(np.uint8) plt.imshow(two_d, cmap=plt.cm.gray_r, interpolation="nearest") return plt
(ds_train, ds_test), ds_info = tfds.load( "mnist", split=["train", "test"], shuffle_files=True, as_supervised=True, with_info=True, )
def normalize_img(image, label):
"""Normalizes images: uint8
-> float32
."""
return tf.cast(image, tf.float32) * 255, label
ds_train = ds_train.map(normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)
npX = tfds.as_numpy(ds_train, graph=None)
MEANS = np.array( [ 255.0, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 254, 254, 254, 253, 252, 252, 251, 251, 252, 252, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 254, 254, 253, 251, 249, 248, 245, 243, 242, 242, 243, 246, 248, 251, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 254, 253, 250, 247, 242, 235, 228, 220, 213, 210, 211, 216, 224, 232, 240, 246, 251, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, 254, 251, 248, 242, 234, 223, 211, 196, 181, 170, 164, 166, 175, 189, 205, 221, 233, 243, 248, 252, 254, 255, 255, 255, 255, 255, 255, 254, 252, 248, 241, 231, 217, 202, 184, 166, 149, 136, 131, 134, 143, 159, 180, 201, 220, 234, 243, 249, 253, 255, 255, 255, 255, 255, 254, 253, 249, 243, 233, 219, 201, 181, 161, 143, 130, 122, 120, 122, 129, 141, 161, 185, 208, 227, 240, 248, 252, 254, 255, 255, 255, 255, 254, 251, 246, 238, 226, 208, 187, 164, 146, 135, 131, 132, 133, 132, 133, 139, 154, 178, 202, 223, 239, 248, 252, 255, 255, 255, 255, 254, 253, 251, 245, 236, 221, 200, 177, 156, 144, 144, 150, 156, 156, 151, 144, 144, 156, 178, 202, 224, 240, 249, 253, 255, 255, 255, 255, 254, 253, 251, 245, 235, 218, 195, 172, 155, 152, 161, 172, 176, 170, 161, 150, 149, 161, 183, 207, 227, 242, 250, 254, 255, 255, 255, 255, 255, 254, 251, 246, 234, 215, 191, 168, 156, 160, 173, 182, 179, 169, 157, 147, 149, 166, 190, 213, 230, 243, 251, 254, 255, 255, 255, 255, 255, 254, 252, 246, 233, 212, 186, 165, 157, 164, 175, 176, 165, 153, 142, 137, 147, 170, 196, 217, 231, 242, 251, 255, 255, 255, 255, 255, 255, 254, 252, 245, 230, 207, 182, 163, 158, 164, 168, 158, 143, 131, 125, 128, 146, 174, 200, 218, 231, 241, 250, 254, 255, 255, 255, 255, 255, 255, 252, 243, 227, 205, 181, 164, 159, 161, 157, 139, 124, 115, 118, 127, 148, 176, 199, 216, 230, 240, 249, 254, 255, 255, 255, 255, 255, 254, 251, 241, 224, 204, 184, 169, 163, 160, 150, 132, 119, 116, 123, 133, 153, 177, 197, 214, 228, 240, 249, 254, 255, 255, 255, 255, 255, 254, 251, 239, 222, 205, 189, 177, 171, 166, 154, 139, 129, 128, 134, 144, 159, 177, 195, 213, 228, 241, 249, 254, 255, 255, 255, 255, 255, 254, 249, 237, 222, 207, 195, 186, 180, 175, 166, 153, 143, 140, 142, 150, 162, 178, 195, 214, 230, 242, 250, 254, 255, 255, 255, 255, 255, 253, 247, 235, 220, 207, 197, 189, 183, 179, 172, 160, 148, 142, 143, 150, 161, 178, 198, 217, 233, 244, 250, 254, 255, 255, 255, 255, 255, 253, 246, 233, 218, 204, 192, 184, 177, 172, 165, 153, 142, 137, 139, 148, 163, 183, 204, 222, 236, 246, 251, 254, 255, 255, 255, 255, 255, 253, 247, 234, 218, 201, 186, 174, 165, 157, 148, 137, 130, 129, 137, 151, 171, 194, 214, 230, 242, 248, 252, 254, 255, 255, 255, 255, 255, 253, 249, 238, 222, 203, 184, 168, 154, 143, 132, 124, 123, 130, 145, 165, 188, 209, 227, 239, 247, 251, 253, 255, 255, 255, 255, 255, 255, 254, 251, 244, 232, 214, 194, 174, 156, 142, 132, 130, 134, 148, 167, 189, 210, 226, 238, 246, 250, 253, 254, 255, 255, 255, 255, 255, 255, 255, 253, 250, 243, 231, 215, 196, 178, 163, 155, 156, 164, 179, 197, 215, 230, 240, 247, 251, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, 254, 253, 251, 246, 238, 228, 217, 208, 203, 204, 210, 218, 228, 236, 243, 248, 251, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 254, 252, 249, 245, 241, 238, 237, 237, 239, 242, 245, 247, 250, 252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 254, 254, 253, 252, 250, 249, 248, 249, 249, 250, 252, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 254, 254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, ] )
%%writefile model.yaml apiVersion: machinelearning.seldon.io/v1alpha2 kind: SeldonDeployment metadata: name: mnist spec: protocol: kfserving transport: rest predictors:
- graph:
children: []
implementation: TRITON_SERVER
modelUri: gs://seldon-models/tensorrt/v100_mnist
name: mnist
componentSpecs:
- spec:
containers:
- name: mnist resources: limits: nvidia.com/gpu: 1
name: tensorrt replicas: 1
- spec:
containers:
!kubectl apply -f model.yaml
seldondeployment.machinelearning.seldon.io/mnist created
!kubectl rollout status deploy/$(kubectl get deploy -l seldon-deployment-id=mnist -o jsonpath='{.items[0].metadata.name}')
deployment "mnist-tensorrt-0-mnist" successfully rolled out
Check metadata of model
!curl http://0.0.0.0:8003/seldon/default/mnist/v2/models/mnist
{"name":"mnist","versions":["1"],"platform":"tensorrt_plan","inputs":[{"name":"data","datatype":"FP32","shape":[-1,1,28,28]}],"outputs":[{"name":"prob","datatype":"FP32","shape":[-1,10,1,1]}]}
Test prediction on random digit.
x,y = next(npX)
X = 255 - x
X = (X.reshape(784) - MEANS)
gen_image(x)
values = np.expand_dims(X, axis=0).reshape((1,1,28,28)).flatten().tolist()
cmd = '{"inputs":[{"name":"data","data":'+str(values)+',"datatype":"FP32","shape":[1,1,28,28]}]}'
with open("input.json","w") as f:
f.write(cmd)
res=!curl -s -d @./input.json
-X POST http://0.0.0.0:8003/seldon/default/mnist/v2/models/mnist/infer
-H "Content-Type: application/json"
d=json.loads(res[0])
print(d)
predicted = np.array(d["outputs"][0]["data"]).argmax()
print("Truth",y,"predicted",predicted)
{'model_name': 'mnist', 'model_version': '1', 'outputs': [{'name': 'prob', 'datatype': 'FP32', 'shape': [1, 10, 1, 1], 'data': [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]}]} Truth 4 predicted 4