TheSpaghettiDetective · e-fominov · May 6, 2023 · May 6, 2023 · May 7, 2023 · May 7, 2023
diff --git a/ml_api/.gitignore b/ml_api/.gitignore
@@ -1 +1,2 @@
 model/model.weights
+model/*.onnx
diff --git a/ml_api/detect.py b/ml_api/detect.py
@@ -0,0 +1,119 @@
+#!python3
+import cv2
+from dataclasses import asdict
+import json
+from lib.geometry import compare_detections, Detection
+import os
+from lib.detection_model import *
+
+KNOWN_IMAGE_EXTENSIONS = ('.jpg', '.png') 
+KNOWN_VIDEO_EXTENSIONS = ('.mp4', '.avi')
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("image", type=str, help="Image file path")
+    parser.add_argument("--weights", type=str, default="model/model.weights", help="Model weights file")
+    parser.add_argument("--det-threshold", type=float, default=0.25, help="Detection threshold")
+    parser.add_argument("--nms-threshold", type=float, default=0.4, help="NMS threshold")
+    parser.add_argument("--preheat", action='store_true', help="Make a dry run of NN for initlalization")
+    parser.add_argument("--cpu", action='store_true', help="Force use CPU")
+    parser.add_argument("--save-detections-to", type=str, help="Save detections into this file")
+    parser.add_argument("--compare-detections-with", type=str, help="Load detections from this file and compare with result")
+    parser.add_argument("--render-to", type=str, help="Save detections into this file or directory")
+    parser.add_argument("--print", action='store_true', help="Print detections")
+    opt = parser.parse_args()
+
+
+    net_main_1, meta_main_1 = load_net("model/model.cfg", opt.weights, "model/model.meta")
+
+    # force use CPU, only implemented for ONNX
+    if opt.cpu and onnx_ready and isinstance(net_main_1, OnnxNet):
+        net_main_1.force_cpu()
+
+    assert os.path.exists(opt.image)
+    filename = os.path.basename(opt.image)
+    filename, extension = os.path.splitext(filename)
+
+    is_image = extension in KNOWN_IMAGE_EXTENSIONS
+    is_video = extension in KNOWN_VIDEO_EXTENSIONS 
+    frame_number = 0
+    vwr = None
+    if is_video:
+        cap = cv2.VideoCapture(opt.image)
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        frame_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+        frame_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        reading_success, custom_image_bgr = cap.read() 
+        if opt.render_to:
+            fourcc = cv2.VideoWriter_fourcc("m", "p", "4", "v")
+            vwr = cv2.VideoWriter(opt.render_to, fourcc, fps, (frame_w, frame_h))
+    else:
+        cap = None
+        fps = 0.0
+        custom_image_bgr = cv2.imread(opt.image)
+        reading_success = True
+
+
+    # this will make library initialize all the required resources at the first run
+    # then the following runs will be much faster
+    if opt.preheat:
+        detections = detect(net_main_1, meta_main_1, custom_image_bgr, thresh=opt.det_threshold, nms=opt.nms_threshold)
+
+    while reading_success:
+        started_at = time.time()
+        detections = detect(net_main_1, meta_main_1, custom_image_bgr, thresh=opt.det_threshold, nms=opt.nms_threshold)
+        finished_at = time.time()
+        execution_time = finished_at - started_at
+        print(f"Frame #{frame_number} execution time: {execution_time:.3} sec, detection count: {len(detections)}")
+
+        detections = Detection.from_tuple_list(detections)
+        # dump detections into some file
+        if opt.save_detections_to:
+            output_filename, output_extension = os.path.splitext(opt.save_detections_to)
+            if is_video and not output_extension and not os.path.exists(opt.save_detections_to):
+                os.makedirs(opt.save_detections_to)
+            if os.path.isdir(opt.save_detections_to):
+                if is_video:
+                    output_file_name = f"{filename}#{frame_number:04}.json"
+                else:
+                    output_file_name = f"{filename}.json"
+                output_file_name = os.path.join(opt.save_detections_to, output_file_name)
+            else:
+                output_file_name = opt.save_detections_to
+
+            with open(output_file_name, "w") as f:
+                json.dump([asdict(d) for d in detections], f)
+
+        # load detections from some file and compare with detection result
+        if opt.compare_detections_with:
+            if is_video:
+                read_file_name = os.path.join(opt.compare_detections_with, f"{filename}#{frame_number:04}.json") 
+            else:
+                read_file_name = opt.compare_detections_with
+
+            with open(read_file_name) as f:
+                items = json.load(f)
+                loaded = [Detection.from_dict(d) for d in items]
+                compare_result = compare_detections(loaded, detections)
+                if not compare_result:
+                    print(f"Frame #{frame_number} loaded detections and resulting are different")
+        if opt.render_to:
+            for d in detections:
+                cv2.rectangle(custom_image_bgr, 
+                    (int(d.box.left()), int(d.box.top())), (int(d.box.right()), int(d.box.bottom())), 
+                    (0, 255, 0), 2)
+            if vwr:
+                vwr.write(custom_image_bgr)
+            else:
+                cv2.imwrite(opt.render_to, custom_image_bgr)
+
+
+        if opt.print:
+            print(detections)
+
+        if is_image:
+            reading_success = False
+        elif cap:
+            reading_success, custom_image_bgr = cap.read()
+            frame_number += 1
+
diff --git a/ml_api/lib/backend_darknet.py b/ml_api/lib/backend_darknet.py
@@ -0,0 +1,239 @@
+# pylint: disable=R, W0401, W0614, W0703
+from ctypes import *
+import random
+import os
+import cv2
+import platform
+from typing import List, Tuple
+
+
+# C-structures from Darknet lib
+
+class BOX(Structure):
+    _fields_ = [("x", c_float),
+                ("y", c_float),
+                ("w", c_float),
+                ("h", c_float)]
+
+
+class DETECTION(Structure):
+    _fields_ = [("bbox", BOX),
+                ("classes", c_int),
+                ("prob", POINTER(c_float)),
+                ("mask", POINTER(c_float)),
+                ("objectness", c_float),
+                ("sort_class", c_int)]
+
+
+class IMAGE(Structure):
+    _fields_ = [("w", c_int),
+                ("h", c_int),
+                ("c", c_int),
+                ("data", POINTER(c_float))]
+
+
+class METADATA(Structure):
+    _fields_ = [("classes", c_int),
+                ("names", POINTER(c_char_p))]
+
+class YoloNet:
+    """Darknet-based detector implementation"""
+    net: c_void_p
+    meta: METADATA
+
+    def __init__(self, config_path: str, weight_path: str, meta_path: str):
+        if not os.path.exists(config_path):
+            raise ValueError("Invalid config path `"+os.path.abspath(config_path)+"`")
+        if not os.path.exists(weight_path):
+            raise ValueError("Invalid weight path `"+os.path.abspath(weight_path)+"`")
+        if not os.path.exists(meta_path):
+            raise ValueError("Invalid data file path `"+os.path.abspath(meta_path)+"`")
+        self.net = load_net_custom(config_path.encode("ascii"), weight_path.encode("ascii"), 0, 1)  # batch size = 1
+        self.meta = load_meta(meta_path.encode("ascii"))
+
+    def detect(self, meta, image, alt_names, thresh=.5, hier_thresh=.5, nms=.45, debug=False) -> List[Tuple[str, float, Tuple[float, float, float, float]]]:
+        #pylint: disable= C0321
+        custom_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        im, arr = array_to_image(custom_image)             # you should comment line below: free_image(im)
+        if debug:
+            print("Loaded image")
+        num = c_int(0)
+        if debug:
+            print("Assigned num")
+        pnum = pointer(num)
+        if debug:
+            print("Assigned pnum")
+        predict_image(self.net, im)
+        if debug:
+            print("did prediction")
+        dets = get_network_boxes(self.net, custom_image.shape[1], custom_image.shape[0], thresh, hier_thresh, None, 0, pnum, 0)  # OpenCV
+        if debug:
+            print("Got dets")
+        num = pnum[0]
+        if debug:
+            print("got zeroth index of pnum")
+        if nms:
+            do_nms_sort(dets, num, meta.classes, nms)
+        if debug:
+            print("did sort")
+        res = []
+        if debug:
+            print("about to range")
+        for j in range(num):
+            if debug:
+                print("Ranging on "+str(j)+" of "+str(num))
+            if debug:
+                print("Classes: "+str(meta), meta.classes, meta.names)
+            for i in range(meta.classes):
+                if debug:
+                    print("Class-ranging on "+str(i)+" of "+str(meta.classes)+"= "+str(dets[j].prob[i]))
+                if dets[j].prob[i] > 0:
+                    b = dets[j].bbox
+                    if alt_names is None:
+                        nameTag = meta.names[i]
+                    else:
+                        nameTag = alt_names[i]
+                    if debug:
+                        print("Got bbox", b)
+                        print(nameTag)
+                        print(dets[j].prob[i])
+                        print((b.x, b.y, b.w, b.h))
+                    res.append((nameTag, dets[j].prob[i], (b.x, b.y, b.w, b.h)))
+        if debug:
+            print("did range")
+        res = sorted(res, key=lambda x: -x[1])
+        if debug:
+            print("did sort")
+        free_detections(dets, num)
+        if debug:
+            print("freed detections")
+        return res
+
+DIRNAME = os.path.abspath(
+    os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", "bin")
+)
+# Loads darknet shared library. May fail if some dependencies like OpenCV not installed
+
+hasGPU = os.environ.get('HAS_GPU', 'False') == 'True'
+so_path = os.path.join(DIRNAME, "model_{}{}.so".format('gpu_' if hasGPU else '', platform.machine()))
+
+lib = CDLL(so_path, RTLD_GLOBAL)
+lib.network_width.argtypes = [c_void_p]
+lib.network_width.restype = c_int
+lib.network_height.argtypes = [c_void_p]
+lib.network_height.restype = c_int
+
+predict = lib.network_predict
+predict.argtypes = [c_void_p, POINTER(c_float)]
+predict.restype = POINTER(c_float)
+
+if hasGPU:
+    set_gpu = lib.cuda_set_device
+    set_gpu.argtypes = [c_int]
+
+make_image = lib.make_image
+make_image.argtypes = [c_int, c_int, c_int]
+make_image.restype = IMAGE
+
+get_network_boxes = lib.get_network_boxes
+get_network_boxes.argtypes = [c_void_p, c_int, c_int, c_float, c_float, POINTER(c_int), c_int, POINTER(c_int), c_int]
+get_network_boxes.restype = POINTER(DETECTION)
+
+make_network_boxes = lib.make_network_boxes
+make_network_boxes.argtypes = [c_void_p]
+make_network_boxes.restype = POINTER(DETECTION)
+
+free_detections = lib.free_detections
+free_detections.argtypes = [POINTER(DETECTION), c_int]
+
+free_ptrs = lib.free_ptrs
+free_ptrs.argtypes = [POINTER(c_void_p), c_int]
+
+network_predict = lib.network_predict
+network_predict.argtypes = [c_void_p, POINTER(c_float)]
+
+reset_rnn = lib.reset_rnn
+reset_rnn.argtypes = [c_void_p]
+
+load_net = lib.load_network
+load_net.argtypes = [c_char_p, c_char_p, c_int]
+load_net.restype = c_void_p
+
+load_net_custom = lib.load_network_custom
+load_net_custom.argtypes = [c_char_p, c_char_p, c_int, c_int]
+load_net_custom.restype = c_void_p
+
+do_nms_obj = lib.do_nms_obj
+do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]
+
+do_nms_sort = lib.do_nms_sort
+do_nms_sort.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]
+
+free_image = lib.free_image
+free_image.argtypes = [IMAGE]
+
+letterbox_image = lib.letterbox_image
+letterbox_image.argtypes = [IMAGE, c_int, c_int]
+letterbox_image.restype = IMAGE
+
+load_meta = lib.get_metadata
+lib.get_metadata.argtypes = [c_char_p]
+lib.get_metadata.restype = METADATA
+
+load_image = lib.load_image_color
+load_image.argtypes = [c_char_p, c_int, c_int]
+load_image.restype = IMAGE
+
+rgbgr_image = lib.rgbgr_image
+rgbgr_image.argtypes = [IMAGE]
+
+predict_image = lib.network_predict_image
+predict_image.argtypes = [c_void_p, IMAGE]
+predict_image.restype = POINTER(c_float)
+
+def sample(probs):
+    s = sum(probs)
+    probs = [a/s for a in probs]
+    r = random.uniform(0, 1)
+    for i in range(len(probs)):
+        r = r - probs[i]
+        if r <= 0:
+            return i
+    return len(probs)-1
+
+
+def c_array(ctype, values):
+    arr = (ctype*len(values))()
+    arr[:] = values
+    return arr
+
+def array_to_image(arr):
+    import numpy as np
+    # need to return old values to avoid python freeing memory
+    arr = arr.transpose(2, 0, 1)
+    c = arr.shape[0]
+    h = arr.shape[1]
+    w = arr.shape[2]
+    arr = np.ascontiguousarray(arr.flat, dtype=np.float32) / 255.0
+    data = arr.ctypes.data_as(POINTER(c_float))
+    im = IMAGE(w, h, c, data)
+    return im, arr
+
+
+def classify(net, meta, im):
+    global alt_names
+
+    out = predict_image(net, im)
+    res = []
+    for i in range(meta.classes):
+        if alt_names is None:
+            nameTag = meta.names[i]
+        else:
+            nameTag = alt_names[i]
+        res.append((nameTag, out[i]))
+    res = sorted(res, key=lambda x: -x[1])
+    return res
+
+
+
+