Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Segmentation Fault while using ROS melodic for Inferencing using TensorRT on Jetson Nano #2

Open
jrvis1726 opened this issue Dec 1, 2023 · 0 comments

Comments

@jrvis1726
Copy link

jrvis1726 commented Dec 1, 2023

Hello everyone,

I am currently working on a project using a Jetson Nano where I am trying to perform inference using TensorRT (Yolov5 object detection) in a ROS (Melodic) node. I have encountered a Bus Error(Core dumped) / segmentation fault (core dumped), which seems to arise from a memory conflict between ROS and TensorRT. I am seeking advice or solutions from anyone who might have faced and resolved a similar issue.

I have provided the code to my inference node below. I would greatly appreciate any insights, suggestions, or solutions.

Environment Details:

Jetpack Version: 4.6.
ROS Version: ROS Melodic
TensorRT Version: 8.2.1.8
PyCUDA: 2019.1.2
Torch version: 1.10.0
CUDA: 10.2

`
#!/usr/bin/env python3
import rospy
from sensor_msgs.msg import Image
from std_msgs.msg import String
import json
import cv2
import imutils
from cv_bridge import CvBridge, CvBridgeError
import numpy as np
import tensorrt as trt
import pycuda.autoinit
import random
import ctypes
import pycuda.driver as cuda
import time
import multiprocessing as mp
import threading

EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
host_inputs = []
cuda_inputs = []
host_outputs = []
cuda_outputs = []
bindings = []

class YoloTRT():
def init(self, library, engine, conf, yolo_ver):
print("Initiating Engine")
self.CONF_THRESH = conf
self.IOU_THRESHOLD = 0.4
self.LEN_ALL_RESULT = 38001
self.LEN_ONE_RESULT = 38
self.yolo_version = yolo_ver
self.categories = ["pedestrian", "people", "bicycle", "car", "van", "truck", "tricycle", "awning-tricycle", "bus", "motor"]

    self.categories = ["person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
        "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
        "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
        "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
        "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
        "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
        "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
        "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
        "hair drier", "toothbrush"]
   
    TRT_LOGGER = trt.Logger(trt.Logger.INFO)

    ctypes.CDLL(library)

    with open(engine, 'rb') as f:
        serialized_engine = f.read()

    runtime = trt.Runtime(TRT_LOGGER)
    self.engine = runtime.deserialize_cuda_engine(serialized_engine)
    self.batch_size = self.engine.max_batch_size

    for binding in self.engine:
        size = trt.volume(self.engine.get_binding_shape(binding)) * self.batch_size
        dtype = trt.nptype(self.engine.get_binding_dtype(binding))
        host_mem = cuda.pagelocked_empty(size, dtype)
        cuda_mem = cuda.mem_alloc(host_mem.nbytes)

        bindings.append(int(cuda_mem))
        if self.engine.binding_is_input(binding):
            self.input_w = self.engine.get_binding_shape(binding)[-1]
            self.input_h = self.engine.get_binding_shape(binding)[-2]
            host_inputs.append(host_mem)
            cuda_inputs.append(cuda_mem)
        else:
            host_outputs.append(host_mem)
            cuda_outputs.append(cuda_mem)
    print("Done Initializing Engine")

def PreProcessImg(self, img):
    image_raw = img
    h, w, c = image_raw.shape
    image = cv2.cvtColor(image_raw, cv2.COLOR_BGR2RGB)
    r_w = self.input_w / w
    r_h = self.input_h / h
    if r_h > r_w:
        tw = self.input_w
        th = int(r_w * h)
        tx1 = tx2 = 0
        ty1 = int((self.input_h - th) / 2)
        ty2 = self.input_h - th - ty1
    else:
        tw = int(r_h * w)
        th = self.input_h
        tx1 = int((self.input_w - tw) / 2)
        tx2 = self.input_w - tw - tx1
        ty1 = ty2 = 0
    image = cv2.resize(image, (tw, th))
    image = cv2.copyMakeBorder(image, ty1, ty2, tx1, tx2, cv2.BORDER_CONSTANT, None, (128, 128, 128))
    image = image.astype(np.float32)
    image /= 255.0
    image = np.transpose(image, [2, 0, 1])
    image = np.expand_dims(image, axis=0)
    image = np.ascontiguousarray(image)
    return image, image_raw, h, w

def Inference(self, img):
    input_image, image_raw, origin_h, origin_w = self.PreProcessImg(img)
    np.copyto(host_inputs[0], input_image.ravel())
    stream = cuda.Stream()
    self.context = self.engine.create_execution_context()
    cuda.memcpy_htod_async(cuda_inputs[0], host_inputs[0], stream)
    t1 = time.time()
    self.context.execute_async(self.batch_size, bindings, stream_handle=stream.handle)
    cuda.memcpy_dtoh_async(host_outputs[0], cuda_outputs[0], stream)
    stream.synchronize()
    t2 = time.time()
    output = host_outputs[0]
           
    for i in range(self.batch_size):
        result_boxes, result_scores, result_classid = self.PostProcess(output[i * self.LEN_ALL_RESULT: (i + 1) * self.LEN_ALL_RESULT], origin_h, origin_w)
       
    det_res = []
    for j in range(len(result_boxes)):
        box = result_boxes[j]
        det = dict()
        det["class"] = self.categories[int(result_classid[j])]
        det["conf"] = result_scores[j]
        det["box"] = box
        det_res.append(det)
        self.PlotBbox(box, img, label="{}:{:.2f}".format(self.categories[int(result_classid[j])], result_scores[j]),)
    return det_res, t2-t1

def PostProcess(self, output, origin_h, origin_w):
    num = int(output[0])
    if self.yolo_version == "v5":
        pred = np.reshape(output[1:], (-1, self.LEN_ONE_RESULT))[:num, :]
        pred = pred[:, :6]
    elif self.yolo_version == "v7":
        pred = np.reshape(output[1:], (-1, 6))[:num, :]
   
    boxes = self.NonMaxSuppression(pred, origin_h, origin_w, conf_thres=self.CONF_THRESH, nms_thres=self.IOU_THRESHOLD)
    result_boxes = boxes[:, :4] if len(boxes) else np.array([])
    result_scores = boxes[:, 4] if len(boxes) else np.array([])
    result_classid = boxes[:, 5] if len(boxes) else np.array([])
    return result_boxes, result_scores, result_classid

def NonMaxSuppression(self, prediction, origin_h, origin_w, conf_thres=0.5, nms_thres=0.4):
    boxes = prediction[prediction[:, 4] >= conf_thres]
    boxes[:, :4] = self.xywh2xyxy(origin_h, origin_w, boxes[:, :4])
    boxes[:, 0] = np.clip(boxes[:, 0], 0, origin_w -1)
    boxes[:, 2] = np.clip(boxes[:, 2], 0, origin_w -1)
    boxes[:, 1] = np.clip(boxes[:, 1], 0, origin_h -1)
    boxes[:, 3] = np.clip(boxes[:, 3], 0, origin_h -1)
    confs = boxes[:, 4]
    boxes = boxes[np.argsort(-confs)]
    keep_boxes = []
    while boxes.shape[0]:
        large_overlap = self.bbox_iou(np.expand_dims(boxes[0, :4], 0), boxes[:, :4]) > nms_thres
        label_match = boxes[0, -1] == boxes[:, -1]
        # Indices of boxes with lower confidence scores, large IOUs and matching labels
        invalid = large_overlap & label_match
        keep_boxes += [boxes[0]]
        boxes = boxes[~invalid]
    boxes = np.stack(keep_boxes, 0) if len(keep_boxes) else np.array([])
    return boxes

def xywh2xyxy(self, origin_h, origin_w, x):
    y = np.zeros_like(x)
    r_w = self.input_w / origin_w
    r_h = self.input_h / origin_h
    if r_h > r_w:
        y[:, 0] = x[:, 0] - x[:, 2] / 2
        y[:, 2] = x[:, 0] + x[:, 2] / 2
        y[:, 1] = x[:, 1] - x[:, 3] / 2 - (self.input_h - r_w * origin_h) / 2
        y[:, 3] = x[:, 1] + x[:, 3] / 2 - (self.input_h - r_w * origin_h) / 2
        y /= r_w
    else:
        y[:, 0] = x[:, 0] - x[:, 2] / 2 - (self.input_w - r_h * origin_w) / 2
        y[:, 2] = x[:, 0] + x[:, 2] / 2 - (self.input_w - r_h * origin_w) / 2
        y[:, 1] = x[:, 1] - x[:, 3] / 2
        y[:, 3] = x[:, 1] + x[:, 3] / 2
        y /= r_h
    return y

def bbox_iou(self, box1, box2, x1y1x2y2=True):
    if not x1y1x2y2:
        # Transform from center and width to exact coordinates
        b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
        b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
        b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
        b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
    else:
        # Get the coordinates of bounding boxes
        b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
        b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]

    inter_rect_x1 = np.maximum(b1_x1, b2_x1)
    inter_rect_y1 = np.maximum(b1_y1, b2_y1)
    inter_rect_x2 = np.minimum(b1_x2, b2_x2)
    inter_rect_y2 = np.minimum(b1_y2, b2_y2)
    inter_area = np.clip(inter_rect_x2 - inter_rect_x1 + 1, 0, None) * \
                 np.clip(inter_rect_y2 - inter_rect_y1 + 1, 0, None)
    b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
    b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)

    iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)

    return iou

def PlotBbox(self, x, img, color=None, label=None, line_thickness=None):
    tl = (line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1)  # line/font thickness
    color = color or [random.randint(0, 255) for _ in range(3)]
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
    cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
    if label:
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
        cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA,)

def process_image(yolo):
bridge = CvBridge()
while not rospy.is_shutdown():
try:
data = rospy.wait_for_message("/camera/image_raw", Image, timeout=None)
cv_image = bridge.imgmsg_to_cv2(data, "bgr8")
cv_image = imutils.resize(cv_image, width=1504)
detections, _ = yolo.Inference(cv_image)
relevant_detections = [det for det in detections if det['class'] in ['pedestrian', 'people']]

        if relevant_detections:
            detection_pub.publish(json.dumps(relevant_detections))
    except CvBridgeError as e:
        print(e)

if name == 'main':
mp.set_start_method('spawn')
print("Init ROS Node")
#rospy.init_node('yolo_image_processor', anonymous=True)
print("ROS Node init done")

detection_pub = rospy.Publisher("/detections", String, queue_size=10)
print("Calling YOLO wrapper")
yolo = YoloTRT(library="path/to/libmyplugins.so", engine="path/to/yolov5s.engine", conf=0.5, yolo_ver="v5")
print("Yolo Wrapper initialized")

try:
    process_image(yolo)
except KeyboardInterrupt:
    print("Shutting down YoloTRT Node")

`

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant