Encountering 0 bytes input in asynchronous internal model call in Business Logic Scripting even though image is non-empty (original) (raw)

Please provide the following information when requesting support.

Hardware - NVIDIA RTX A2000 8GB Laptop GPU
Operating System: Ubuntu 22.04
Riva Version: N/A
TLT Version (if relevant): N/A
How to reproduce the issue ? (This is for errors. Please share the command and the detailed log here)

I am trying to use Business Logic Scripting to get custom dictionary output from my model. The structure of the model repository is given below.

model_repository/
├── parseq
│ ├── 1
│ │ └── model.onnx
│ └── config.pbtxt
├── ppe_kit_detection
│ ├── 1
│ │ └── model.onnx
│ └── config.pbtxt
└── ppe_kit_detection_main
├── 1
│ └── model.py
└── config.pbtxt

All models load successfully and are in READY state.

triton | +------------------------+---------+--------+
triton | | Model | Version | Status |
triton | +------------------------+---------+--------+
triton | | parseq | 1 | READY |
triton | | ppe_kit_detection | 1 | READY |
triton | | ppe_kit_detection_main | 1 | READY |
triton | +------------------------+---------+--------+

The model.py file is given below.

import json
import numpy as np
import cv2
import asyncio
import triton_python_backend_utils as pb_utils
from PIL import Image

class TritonPythonModel:
    def initialize(self, args):
        model_config = json.loads(args["model_config"])
        self.model_name = model_config["name"]
        
        # inner model params
        self.im_w = 736
        self.im_h = 736
        self.batch_sz = 16
        self.inner_input_name = "images"
        self.inner_output_name = "detections"
        self.inner_model_name = "ppe_kit_detection"

        # get model output configuration
        self.output_name = "output"
        output_config = pb_utils.get_output_config_by_name(
            model_config, self.output_name
        )
        self.output_dtype = pb_utils.triton_string_to_numpy(
            output_config["data_type"]
        )

        # post-process params
        self.conf_thresh = 0.25
        print(f"{self.model_name.upper()} initialized!", flush=True)

    async def execute(self, requests):
        num_requests = len(requests)
        infer_awaits = []
        try:
            # dispatch inference requests
            for i, rq in enumerate(requests):
                # pre-process image
                im_tensor = pb_utils.get_input_tensor_by_name(rq, "person_input").as_numpy()[0]
                print(im_tensor.shape, flush=True)
                im = cv2.resize(im_tensor, (self.im_w, self.im_h))                          # resize
                im = im.transpose(2, 0, 1).astype(np.float32) / 255.0                       # pixels from 0-1
                im = np.clip(im, a_min=0, a_max=1)                                          # clip to maintain range
                im = (im - 0.5) / 0.5                                                       # standardize
                im = np.expand_dims(im, axis=0)                                             # add batch dimn.

                # call inner models
                print(f"DEBUG: Sample image: {im.shape}\n", im[0, :10, :10], flush=True)
                infer_req = pb_utils.InferenceRequest(
                    model_name=self.inner_model_name,
                    requested_output_names=[self.inner_output_name],
                    inputs=[pb_utils.Tensor(self.inner_input_name, im)]
                )

                infer_awaits.append(infer_req.async_exec())

            print(f"INFO: Number of images: {len(infer_awaits)}")
            
            # gather responses
            infer_responses = await asyncio.gather(*infer_awaits)

            # parse and post-processes
            responses = []
            for i, rs in enumerate(infer_responses):
                # handle errors
                if rs.has_error():
                    print(f"ERROR [{i}]: {rs.error().message()}", flush=True)
                    responses.append({
                        "error": True,
                        "message": f"[PARTIAL]: {rs.error().message()}"
                    })
                    continue

                # postprocess
                detections = pb_utils.get_output_tensor_by_name(rs, "detections").as_numpy()[0]
                scores = detections[:, 4]
                classes = detections[:, 5]
                mask = scores > self.conf_thresh
                final_scores, final_classes = scores[mask], classes[mask]
                helmets, shoes = final_scores[final_classes==0], final_scores[final_classes==2]

                responses.append({
                    "has_helmet": len(helmets)>0,
                    "has_shoes": len(shoes)>1,
                    "helmet_confidence": float(np.min(helmets)) if len(helmets)>0 else -1.0,
                    "shoe_confidence": float(np.min(shoes)) if len(shoes)>0 else -1.0
                })

        except Exception as err:
            # critical error -- send error message to all responses
            print(f"ERROR: {err}", flush=True)
            error_msg = {
                "error": True,
                "message": f"[CRITICAL]: {err}"
            }
            responses = [error_msg.copy() for __ in range(num_requests)]

        # convert all respones to output type
        final_responses = []
        for i, rs in enumerate(responses):
            rs_msg = json.dumps(rs)
            print(f"")
            output_tensor = pb_utils.Tensor(self.output_name, np.array([rs_msg]).astype(self.output_dtype))
            final_responses.append(pb_utils.InferenceResponse(output_tensors=[output_tensor]))

        return final_responses

The ppe_kit_detection_main/config.pbtxt is given below.

name: "ppe_kit_detection_main"
backend: "python"
max_batch_size: 128

input [
{
name: "person_input"
data_type: TYPE_UINT8
dims: [-1, -1, 3 ]          # HxWxC
}
]

output [
{
name: "output"
data_type: TYPE_STRING
dims: [ -1 ]
}
]

dynamic_batching {
max_queue_delay_microseconds: 5000
}

The ppe_kit_detection/config.pbtxt is given below.

name: "ppe_kit_detection"
platform: "onnxruntime_onnx"
max_batch_size: 16

input [
{
name: "images"
data_type: TYPE_FP32
dims: [ 3, 736, 736 ]
}
]

output [
{
name: "detections"
data_type: TYPE_FP32
dims: [ 300, 6 ]
}
]

dynamic_batching {
max_queue_delay_microseconds: 1000
}

The script I am using to test my logic is given below.

from typing import List

import numpy as np
import tritonclient.http as httpclient
# from tritonclient.utils import InferenceServerClientException

def test_ppe_model():
    # 1. Connection Settings
    url = "localhost:8000"  # Update if Triton is running elsewhere
    model_name = "ppe_kit_detection_main"

    try:
        # 2. Initialize Client
        triton_client = httpclient.InferenceServerClient(url=url)

        # 3. Prepare Dummy Input
        # Dimensions: [Batch, H, W, C] -> batch size 1, random H/W
        height, width = 480, 640
        dummy_image = np.random.randint(0, 255, (1, height, width, 3), dtype=np.uint8)

        # 4. Define Inputs and Outputs
        inputs: List[httpclient.InferInput] = []
        inputs.append(httpclient.InferInput("person_input", dummy_image.shape, "UINT8"))
        inputs[0].set_data_from_numpy(dummy_image)

        outputs: List[httpclient.InferRequestedOutput] = []
        outputs.append(httpclient.InferRequestedOutput("output"))

        # 5. Run Inference
        print(f"Sending request to model '{model_name}'...")
        results = triton_client.infer(
            model_name=model_name,
            inputs=inputs,
            outputs=outputs
        )

        # 6. Parse Result
        # Triton returns STRING types as bytes in a numpy array
        print(results.get_response())
        output_data = results.as_numpy("output")
        for i, json_resp in enumerate(output_data):
            # Decode bytes to string
            decoded_json = json_resp.decode('utf-8')
            print(f"\nResult for input {i}:")
            print(decoded_json)

    # except InferenceServerClientException as e:
    #     print(f"Inference failed: {e}")
    except Exception as e:
        print(f"An error occurred: {e}")

if __name__ == "__main__":
    test_ppe_model()

When I check the logs of Triton (deployed in container) I get the following error.

triton  | I0616 05:43:41.505186 168 http_server.cc:4580] "HTTP request: 2 /v2/models/ppe_kit_detection_main/infer"
triton  | I0616 05:43:41.505227 168 model_lifecycle.cc:339] "GetModel() 'ppe_kit_detection_main' version -1"
triton  | I0616 05:43:41.505240 168 model_lifecycle.cc:339] "GetModel() 'ppe_kit_detection_main' version -1"
triton  | I0616 05:43:41.505425 168 infer_request.cc:132] "[request id: <id_unknown>] Setting state from INITIALIZED to INITIALIZED"
triton  | I0616 05:43:41.505436 168 infer_request.cc:905] "[request id: <id_unknown>] prepared: [0x0x76de04007ee0] request id: , model: ppe_kit_detection_main, requested version: -1, actual version: 1, flags: 0x0, correlation id: 0, batch size: 1, priority: 0, timeout (us): 0\noriginal inputs:\n[0x0x76de04008808] input: person_input, type: UINT8, original shape: [1,480,640,3], batch + shape: [1,480,640,3], shape: [480,640,3]\noverride inputs:\ninputs:\n[0x0x76de04008808] input: person_input, type: UINT8, original shape: [1,480,640,3], batch + shape: [1,480,640,3], shape: [480,640,3]\noriginal requested outputs:\noutput\nrequested outputs:\noutput\n"
triton  | I0616 05:43:41.505456 168 infer_request.cc:132] "[request id: <id_unknown>] Setting state from INITIALIZED to PENDING"
triton  | I0616 05:43:41.510702 168 infer_request.cc:132] "[request id: <id_unknown>] Setting state from PENDING to EXECUTING"
triton  | I0616 05:43:41.510739 168 python_be.cc:1209] "model ppe_kit_detection_main, instance ppe_kit_detection_main_0, executing 1 requests"
triton  | (480, 640, 3)
triton  | DEBUG: Sample image: (1, 3, 736, 736)
triton  |  [[[-0.12156862 -0.12156862 -0.12156862 ... -0.12156862 -0.12156862
triton  |    -0.12156862]
triton  |   [-0.12156862 -0.12156862 -0.12156862 ... -0.12156862 -0.12156862
triton  |    -0.12156862]
triton  |   [-0.12156862 -0.12156862 -0.12156862 ... -0.12156862 -0.12156862
triton  |    -0.12156862]
triton  |   ...
triton  |   [-0.12156862 -0.12156862 -0.12156862 ... -0.12156862 -0.12156862
triton  |    -0.12156862]
triton  |   [-0.12156862 -0.12156862 -0.12156862 ... -0.12156862 -0.12156862
triton  |    -0.12156862]
triton  |   [-0.12156862 -0.12156862 -0.12156862 ... -0.12156862 -0.12156862
triton  |    -0.12156862]]
triton  | 
triton  |  [[-0.12156862 -0.12156862 -0.12156862 ... -0.12156862 -0.12156862
triton  |    -0.12156862]
triton  |   [-0.12156862 -0.12156862 -0.12156862 ... -0.12156862 -0.12156862
triton  |    -0.12156862]
triton  |   [-0.12156862 -0.12156862 -0.12156862 ... -0.12156862 -0.12156862
triton  |    -0.12156862]
triton  |   ...
triton  |   [-0.12156862 -0.12156862 -0.12156862 ... -0.12156862 -0.12156862
triton  |    -0.12156862]
triton  |   [-0.12156862 -0.12156862 -0.12156862 ... -0.12156862 -0.12156862
triton  |    -0.12156862]
triton  |   [-0.12156862 -0.12156862 -0.12156862 ... -0.12156862 -0.12156862
triton  |    -0.12156862]]
triton  | 
triton  |  [[-0.12156862 -0.12156862 -0.12156862 ... -0.12156862 -0.12156862
triton  |    -0.12156862]
triton  |   [-0.12156862 -0.12156862 -0.12156862 ... -0.12156862 -0.12156862
triton  |    -0.12156862]
triton  |   [-0.12156862 -0.12156862 -0.12156862 ... -0.12156862 -0.12156862
triton  |    -0.12156862]
triton  |   ...
triton  |   [-0.12156862 -0.12156862 -0.12156862 ... -0.12156862 -0.12156862
triton  |    -0.12156862]
triton  |   [-0.12156862 -0.12156862 -0.12156862 ... -0.12156862 -0.12156862
triton  |    -0.12156862]
triton  |   [-0.12156862 -0.12156862 -0.12156862 ... -0.12156862 -0.12156862
triton  |    -0.12156862]]]
triton  | I0616 05:43:41.530372 168 model_lifecycle.cc:339] "GetModel() 'ppe_kit_detection' version -1"
triton  | I0616 05:43:41.530404 168 model_lifecycle.cc:339] "GetModel() 'ppe_kit_detection' version -1"
triton  | I0616 05:43:41.530410 168 model_lifecycle.cc:339] "GetModel() 'ppe_kit_detection' version -1"
triton  | INFO: Number of images: 1
triton  | ERROR [0]: Model ppe_kit_detection - Error when running inference: [request id: <id_unknown>] input byte size mismatch for input 'images' for model 'ppe_kit_detection'. Expected 6500352, got 0
triton  | I0616 05:43:41.531502 168 infer_response.cc:174] "add response output: output: output, type: BYTES, shape: [1]"
triton  | I0616 05:43:41.531663 168 infer_request.cc:132] "[request id: <id_unknown>] Setting state from EXECUTING to RELEASED"
triton  | I0616 05:43:41.531693 168 python_be.cc:2043] "TRITONBACKEND_ModelInstanceExecute: model instance name ppe_kit_detection_main_0 released 1 requests"

As you can see the image is not empty. The tensor is valid and is being included in the inference request with the correct input name of the internal detection model. Still an error is thrown saying the input is 0 bytes. Where am I going wrong?

Also, when I print the response from the test script I should get the JSON but all I get is this.

Sending request to model 'ppe_kit_detection_main'...
{'model_name': 'ppe_kit_detection_main', 'model_version': '1', 'outputs': [{'name': 'output', 'datatype': 'BYTES', 'shape': [1], 'parameters': {'binary_data_size': 0}}]}
An error occurred: cannot reshape array of size 0 into shape (1,)

My test script is given below

from typing import List

import numpy as np
import tritonclient.http as httpclient
# from tritonclient.utils import InferenceServerClientException

def test_ppe_model():
    # 1. Connection Settings
    url = "localhost:8000"  # Update if Triton is running elsewhere
    model_name = "ppe_kit_detection_main"

    try:
        # 2. Initialize Client
        triton_client = httpclient.InferenceServerClient(url=url)

        # 3. Prepare Dummy Input
        # Dimensions: [Batch, H, W, C] -> batch size 1, random H/W
        height, width = 480, 640
        dummy_image = np.random.randint(0, 255, (1, height, width, 3), dtype=np.uint8)

        # 4. Define Inputs and Outputs
        inputs: List[httpclient.InferInput] = []
        inputs.append(httpclient.InferInput("person_input", dummy_image.shape, "UINT8"))
        inputs[0].set_data_from_numpy(dummy_image)

        outputs: List[httpclient.InferRequestedOutput] = []
        outputs.append(httpclient.InferRequestedOutput("output"))

        # 5. Run Inference
        print(f"Sending request to model '{model_name}'...")
        results = triton_client.infer(
            model_name=model_name,
            inputs=inputs,
            outputs=outputs
        )

        # 6. Parse Result
        # Triton returns STRING types as bytes in a numpy array
        print(results.get_response())
        output_data = results.as_numpy("output")
        for i, json_resp in enumerate(output_data):
            # Decode bytes to string
            decoded_json = json_resp.decode('utf-8')
            print(f"\nResult for input {i}:")
            print(decoded_json)

    # except InferenceServerClientException as e:
    #     print(f"Inference failed: {e}")
    except Exception as e:
        print(f"An error occurred: {e}")

if __name__ == "__main__":
    test_ppe_model()

Why am I not getting the JSON output? What is the problem over here?