ios – How do I return the labels from the CoreML model in this async native Swift code?

0
265


The following code snippet is the ios native component of a frame processor being used by react-native-vision-camera in a react-native app. I recently discovered that if I load the model in an async fashion I can improve performance by up to 5x. The snippet runs just fine. It gets called by the react-native code, the MLModel is loaded, it processes the video frame it’s sent and correctly creates labels. But, I can’t figure out how to return the labels back to the Javascript process. I’ve added a number of print statements to assess what’s happening and it seems that the labels variable is properly handled within the async MlModel.load() section, it’s populated with the labels created by the model, but the section after the MLModel.load() where I try to return those labels, the labels object is somehow empty.

TIA for your help,

Brendan

SWIFT CODE

import Foundation
import Vision

@available(iOS 14.0, *)
@objc(YOLOv5sImageLabellerPlugin)
public class YOLOv5sImageLabellerPlugin: NSObject, FrameProcessorPluginBase {
  @objc
  public static func callback(_ frame: Frame!, withArgs _: [Any]!) -> Any! {
    guard let imageBuffer = CMSampleBufferGetImageBuffer(frame.buffer) else {
      return nil
    }

    struct FrameLabel: Codable {
      var label: String
      var confidence: Float
      var boundingBox: Bbox
    }

    struct Bbox: Codable {
      var minx: CGFloat
      var miny: CGFloat
      var width: CGFloat
      var height: CGFloat
    }

    var labels = [FrameLabel]()
    let encoder = JSONEncoder()
    if let url = Bundle.main.url(forResource: "Yolov5sModel", withExtension: "mlmodelc") {
      let config = MLModelConfiguration()
      MLModel.load(contentsOf: url, configuration: config) { result in
        switch result {
            case .success(let yoloModel):
              print("Model loaded and ready.")
              let methodStart = Date()
              runModel(imageBuffer: imageBuffer, yoloModel: yoloModel)
              let methodFinish = Date()
              let executionTime = methodFinish.timeIntervalSince(methodStart)
              print("MODEL Execution time: \(executionTime)")
            case .failure(let error):
                print("Error loading model: \(error)")
        }
      }
      print("I AM HERE!")
      print("AFTER LABELS: \(labels)")
      let data = try! encoder.encode(labels)
      let json = String(data: data, encoding: .utf8)
      return json
    }

    func runModel(imageBuffer: CVPixelBuffer, yoloModel: MLModel) {
      print("MODEL RUNNING")
      do {
        let model = try VNCoreMLModel(for: yoloModel)
        let handler = VNImageRequestHandler(cvPixelBuffer: imageBuffer)

        let request = VNCoreMLRequest(model: model, completionHandler: { request, _ in
          if let results = request.results as? [VNRecognizedObjectObservation] {
            print("RESULTS \(results)")
            processResults(results: results)
          }
        })

        request.preferBackgroundProcessing = false
        request.imageCropAndScaleOption = VNImageCropAndScaleOption.centerCrop
        try handler.perform([request])
      } catch {
        print("failed to perform")
      }
    }

    func processResults(results: [VNRecognizedObjectObservation]) {
      for result in results {
        let boundingBox = Bbox(minx: result.boundingBox.minX, miny: result.boundingBox.minY, width: result.boundingBox.width, height: result.boundingBox.height)
        let frameLabel = FrameLabel(label: result.labels[0].identifier, confidence: result.confidence, boundingBox: boundingBox)
        print("processResults: \(frameLabel)")
        labels.append(frameLabel)
      }
      print("processResults LABELS \(labels)")
    }

    return labels
  }
}

EXAMPLE LOG

Model loaded and ready.
MODEL RUNNING
RESULTS [<VNRecognizedObjectObservation: 0x280461b00> F8101A4B-AABA-4C05-8076-556714844CF8 VNCoreMLRequestRevision1 confidence=0.960490 boundingBox=[0.331763, 0.539413, 0.0946777, 0.0488342] labels=[label1, label2], <VNRecognizedObjectObservation: 0x280463690> 81CD119A-0290-46C0-9F82-9F5B1D328C7A VNCoreMLRequestRevision1 confidence=0.911648 boundingBox=[0.753906, 0.526202, 0.0765625, 0.0403198] labels=[label1, label2], <VNRecognizedObjectObservation: 0x280461950> 53370905-8957-49EB-B227-5358397A43AF VNCoreMLRequestRevision1 confidence=0.616545 boundingBox=[0.368903, 0.556092, 0.0243042, 0.0139389] labels=[label2, label1], <VNRecognizedObjectObservation: 0x280460ea0> 7FF9172C-9801-4016-8BB0-F8FCF6B24083 VNCoreMLRequestRevision1 confidence=0.503912 boundingBox=[0.776624, 0.539825, 0.0287842, 0.015271] labels=[label2, label1]]
processResults: FrameLabel(label: "label1", confidence: 0.9604895, boundingBox: App.YOLOv5sImageLabellerPlugin.(unknown context at $10398c0e8).(unknown context at $10398c220).Bbox(minx: 0.3317627049982548, miny: 0.5394134416710585, width: 0.09467773884534836, height: 0.04883422935381532))
processResults: FrameLabel(label: "label1", confidence: 0.91164815, boundingBox: App.YOLOv5sImageLabellerPlugin.(unknown context at $10398c0e8).(unknown context at $10398c220).Bbox(minx: 0.7539062611758709, miny: 0.5262023888062686, width: 0.07656250149011612, height: 0.04031982505694032))
processResults: FrameLabel(label: "label2", confidence: 0.6165447, boundingBox: App.YOLOv5sImageLabellerPlugin.(unknown context at $10398c0e8).(unknown context at $10398c220).Bbox(minx: 0.3689025994390249, miny: 0.5560920648276806, width: 0.02430419996380806, height: 0.01393890380859375))
processResults: FrameLabel(label: "label2", confidence: 0.503912, boundingBox: App.YOLOv5sImageLabellerPlugin.(unknown context at $10398c0e8).(unknown context at $10398c220).Bbox(minx: 0.7766235349699855, miny: 0.5398254357860424, width: 0.02878418006002903, height: 0.015270996722392738))
processResults LABELS [App.YOLOv5sImageLabellerPlugin.(unknown context at $10398c0e8).(unknown context at $10398c164).FrameLabel(label: "label1", confidence: 0.9604895, boundingBox: App.YOLOv5sImageLabellerPlugin.(unknown context at $10398c0e8).(unknown context at $10398c220).Bbox(minx: 0.3317627049982548, miny: 0.5394134416710585, width: 0.09467773884534836, height: 0.04883422935381532)), App.YOLOv5sImageLabellerPlugin.(unknown context at $10398c0e8).(unknown context at $10398c164).FrameLabel(label: "label1", confidence: 0.91164815, boundingBox: App.YOLOv5sImageLabellerPlugin.(unknown context at $10398c0e8).(unknown context at $10398c220).Bbox(minx: 0.7539062611758709, miny: 0.5262023888062686, width: 0.07656250149011612, height: 0.04031982505694032)), App.YOLOv5sImageLabellerPlugin.(unknown context at $10398c0e8).(unknown context at $10398c164).FrameLabel(label: "label2", confidence: 0.6165447, boundingBox: App.YOLOv5sImageLabellerPlugin.(unknown context at $10398c0e8).(unknown context at $10398c220).Bbox(minx: 0.3689025994390249, miny: 0.5560920648276806, width: 0.02430419996380806, height: 0.01393890380859375)), App.YOLOv5sImageLabellerPlugin.(unknown context at $10398c0e8).(unknown context at $10398c164).FrameLabel(label: "label2", confidence: 0.503912, boundingBox: App.YOLOv5sImageLabellerPlugin.(unknown context at $10398c0e8).(unknown context at $10398c220).Bbox(minx: 0.7766235349699855, miny: 0.5398254357860424, width: 0.02878418006002903, height: 0.015270996722392738))]
MODEL Execution time: 0.012269020080566406
I AM HERE!
AFTER LABELS: []
2022-10-03 09:51:00.840215+1100 App[592:74927] [javascript] JSON LABELS: