From 6d9a192033544a75c6f5e8c3b23bb043786a89ae Mon Sep 17 00:00:00 2001 From: himanshunaidu Date: Sat, 24 Jan 2026 15:40:59 -0800 Subject: [PATCH 01/11] Add legacy attributes for future comparison with older implementations --- .../AccessibilityFeatureAttribute.swift | 43 +++++++++++++++++++ .../Config/MapillaryCustom11ClassConfig.swift | 5 ++- .../AttributeEstimationPipeline.swift | 9 ++++ .../SubView/AnnotationFeatureDetailView.swift | 26 +++++++++++ 4 files changed, 82 insertions(+), 1 deletion(-) diff --git a/IOSAccessAssessment/AccessibilityFeature/Attributes/AccessibilityFeatureAttribute.swift b/IOSAccessAssessment/AccessibilityFeature/Attributes/AccessibilityFeatureAttribute.swift index 860a384..1723bac 100644 --- a/IOSAccessAssessment/AccessibilityFeature/Attributes/AccessibilityFeatureAttribute.swift +++ b/IOSAccessAssessment/AccessibilityFeature/Attributes/AccessibilityFeatureAttribute.swift @@ -24,6 +24,13 @@ enum AccessibilityFeatureAttribute: String, Identifiable, CaseIterable, Codable, case lidarDepth case latitudeDelta case longitudeDelta + /** + - NOTE: + Legacy attributes for comparison with older data + */ + case widthLegacy + case runningSlopeLegacy + case crossSlopeLegacy enum Value: Sendable, Codable, Equatable { case length(Measurement) @@ -97,6 +104,24 @@ enum AccessibilityFeatureAttribute: String, Identifiable, CaseIterable, Codable, valueType: .length(Measurement(value: 0, unit: .meters)), osmTagKey: APIConstants.TagKeys.longitudeDeltaKey ) + case .widthLegacy: + return Metadata( + id: 10, name: "Width Legacy", unit: UnitLength.meters, + valueType: .length(Measurement(value: 0, unit: .meters)), + osmTagKey: "width_legacy" + ) + case .runningSlopeLegacy: + return Metadata( + id: 20, name: "Running Slope Legacy", unit: UnitAngle.degrees, + valueType: .angle(Measurement(value: 0, unit: .degrees)), + osmTagKey: "incline_legacy" + ) + case .crossSlopeLegacy: + return Metadata( + id: 30, name: "Cross Slope Legacy", unit: UnitAngle.degrees, + valueType: .angle(Measurement(value: 0, unit: .degrees)), + osmTagKey: "cross_slope_legacy" + ) } } @@ -150,6 +175,12 @@ extension AccessibilityFeatureAttribute { return true case (.longitudeDelta, .length): return true + case (.widthLegacy, .length): + return true + case (.runningSlopeLegacy, .angle): + return true + case (.crossSlopeLegacy, .angle): + return true default: return false } @@ -198,6 +229,12 @@ extension AccessibilityFeatureAttribute { return .length(Measurement(value: value, unit: .meters)) case .longitudeDelta: return .length(Measurement(value: value, unit: .meters)) + case .widthLegacy: + return .length(Measurement(value: value, unit: .meters)) + case .runningSlopeLegacy: + return .angle(Measurement(value: value, unit: .degrees)) + case .crossSlopeLegacy: + return .angle(Measurement(value: value, unit: .degrees)) } } @@ -229,6 +266,12 @@ extension AccessibilityFeatureAttribute { return String(format: "%.2f", measurement.converted(to: .meters).value) case (.longitudeDelta, .length(let measurement)): return String(format: "%.2f", measurement.converted(to: .meters).value) + case (.widthLegacy, .length(let measurement)): + return String(format: "%.2f", measurement.converted(to: .meters).value) + case (.runningSlopeLegacy, .angle(let measurement)): + return String(format: "%.2f", measurement.converted(to: .degrees).value) + case (.crossSlopeLegacy, .angle(let measurement)): + return String(format: "%.2f", measurement.converted(to: .degrees).value) default: return nil } diff --git a/IOSAccessAssessment/AccessibilityFeature/Config/MapillaryCustom11ClassConfig.swift b/IOSAccessAssessment/AccessibilityFeature/Config/MapillaryCustom11ClassConfig.swift index b581d35..575dd42 100644 --- a/IOSAccessAssessment/AccessibilityFeature/Config/MapillaryCustom11ClassConfig.swift +++ b/IOSAccessAssessment/AccessibilityFeature/Config/MapillaryCustom11ClassConfig.swift @@ -24,7 +24,10 @@ extension AccessibilityFeatureConfig { x: 0.0, y: 0.5, width: 1.0, height: 0.4 ), meshClassification: [.floor], - attributes: [.width, .runningSlope, .crossSlope, .surfaceIntegrity], + attributes: [ + .width, .runningSlope, .crossSlope, .surfaceIntegrity, + .widthLegacy, .runningSlopeLegacy, .crossSlopeLegacy + ], oswPolicy: OSWPolicy(oswElementClass: .Sidewalk) ), diff --git a/IOSAccessAssessment/AttributeEstimation/AttributeEstimationPipeline.swift b/IOSAccessAssessment/AttributeEstimation/AttributeEstimationPipeline.swift index 62ef3dc..eefd171 100644 --- a/IOSAccessAssessment/AttributeEstimation/AttributeEstimationPipeline.swift +++ b/IOSAccessAssessment/AttributeEstimation/AttributeEstimationPipeline.swift @@ -141,6 +141,15 @@ class AttributeEstimationPipeline: ObservableObject { case .crossSlope: let crossSlopeAttributeValue = try self.calculateCrossSlope(accessibilityFeature: accessibilityFeature) try accessibilityFeature.setAttributeValue(crossSlopeAttributeValue, for: .crossSlope, isCalculated: true) + case .widthLegacy: + let widthAttributeValue = try self.calculateWidth(accessibilityFeature: accessibilityFeature) + try accessibilityFeature.setAttributeValue(widthAttributeValue, for: .widthLegacy, isCalculated: true) + case .runningSlopeLegacy: + let runningSlopeAttributeValue = try self.calculateRunningSlope(accessibilityFeature: accessibilityFeature) + try accessibilityFeature.setAttributeValue(runningSlopeAttributeValue, for: .runningSlopeLegacy, isCalculated: true) + case .crossSlopeLegacy: + let crossSlopeAttributeValue = try self.calculateCrossSlope(accessibilityFeature: accessibilityFeature) + try accessibilityFeature.setAttributeValue(crossSlopeAttributeValue, for: .crossSlopeLegacy, isCalculated: true) default: continue } diff --git a/IOSAccessAssessment/View/SubView/AnnotationFeatureDetailView.swift b/IOSAccessAssessment/View/SubView/AnnotationFeatureDetailView.swift index e529499..e54d45d 100644 --- a/IOSAccessAssessment/View/SubView/AnnotationFeatureDetailView.swift +++ b/IOSAccessAssessment/View/SubView/AnnotationFeatureDetailView.swift @@ -168,6 +168,7 @@ struct AnnotationFeatureDetailView: View { } } + /// Experimental Attributes Section if (accessibilityFeature.accessibilityFeatureClass.experimentalAttributes.contains(.lidarDepth)) { Section(header: Text(AccessibilityFeatureAttribute.lidarDepth.displayName)) { numberTextView(attribute: .lidarDepth) @@ -185,6 +186,31 @@ struct AnnotationFeatureDetailView: View { numberTextView(attribute: .longitudeDelta) } } + + /// Legacy Attributes Section + if (accessibilityFeature.accessibilityFeatureClass.attributes.contains(.widthLegacy)) + { + Section(header: Text(AccessibilityFeatureAttribute.widthLegacy.displayName)) { + numberTextFieldView(attribute: .widthLegacy) + .focused($focusedField, equals: .widthLegacy) + } + } + + if (accessibilityFeature.accessibilityFeatureClass.attributes.contains(.runningSlopeLegacy)) + { + Section(header: Text(AccessibilityFeatureAttribute.runningSlopeLegacy.displayName)) { + numberTextFieldView(attribute: .runningSlopeLegacy) + .focused($focusedField, equals: .runningSlopeLegacy) + } + } + + if (accessibilityFeature.accessibilityFeatureClass.attributes.contains(.crossSlopeLegacy)) + { + Section(header: Text(AccessibilityFeatureAttribute.crossSlopeLegacy.displayName)) { + numberTextFieldView(attribute: .crossSlopeLegacy) + .focused($focusedField, equals: .crossSlopeLegacy) + } + } } } .onAppear { From 1131ffc783fd164116740edaeed5c95380ec531c Mon Sep 17 00:00:00 2001 From: himanshunaidu Date: Sat, 24 Jan 2026 17:59:46 -0800 Subject: [PATCH 02/11] Start adding logic for plane fitting --- IOSAccessAssessment.xcodeproj/project.pbxproj | 18 +++++++++++ .../AttributeEstimation/Plane/PlaneFit.swift | 7 ++++ .../Plane/PlaneFitting.metal | 32 +++++++++++++++++++ IOSAccessAssessment/Mesh/MeshPipeline.metal | 2 +- 4 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 IOSAccessAssessment/AttributeEstimation/Plane/PlaneFit.swift create mode 100644 IOSAccessAssessment/AttributeEstimation/Plane/PlaneFitting.metal diff --git a/IOSAccessAssessment.xcodeproj/project.pbxproj b/IOSAccessAssessment.xcodeproj/project.pbxproj index 9eba2c7..568bde3 100644 --- a/IOSAccessAssessment.xcodeproj/project.pbxproj +++ b/IOSAccessAssessment.xcodeproj/project.pbxproj @@ -68,6 +68,8 @@ A35E051E2EDFB09A003C26CF /* OSMWay.swift in Sources */ = {isa = PBXBuildFile; fileRef = A35E051D2EDFB099003C26CF /* OSMWay.swift */; }; A364B5332F25576000325E5C /* DepthFilter.swift in Sources */ = {isa = PBXBuildFile; fileRef = A364B5322F25575D00325E5C /* DepthFilter.swift */; }; A364B5352F25589B00325E5C /* DepthFiltering.metal in Sources */ = {isa = PBXBuildFile; fileRef = A364B5342F25589600325E5C /* DepthFiltering.metal */; }; + A364B5D92F259AD700325E5C /* PlaneFit.swift in Sources */ = {isa = PBXBuildFile; fileRef = A364B5D82F259AD600325E5C /* PlaneFit.swift */; }; + A364B5DD2F259AFE00325E5C /* PlaneFitting.metal in Sources */ = {isa = PBXBuildFile; fileRef = A364B5DC2F259AF900325E5C /* PlaneFitting.metal */; }; A36C6E022E134CE600A86004 /* bisenetv2_35_640_640.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = A36C6E012E134CE600A86004 /* bisenetv2_35_640_640.mlpackage */; }; A374FAB72EE0173600055268 /* OSMResponseElement.swift in Sources */ = {isa = PBXBuildFile; fileRef = A374FAB62EE0173200055268 /* OSMResponseElement.swift */; }; A37E3E3C2EED60F300B07B77 /* PngEncoder.mm in Sources */ = {isa = PBXBuildFile; fileRef = A37E3E3B2EED60F300B07B77 /* PngEncoder.mm */; }; @@ -243,6 +245,8 @@ A35E051D2EDFB099003C26CF /* OSMWay.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OSMWay.swift; sourceTree = ""; }; A364B5322F25575D00325E5C /* DepthFilter.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DepthFilter.swift; sourceTree = ""; }; A364B5342F25589600325E5C /* DepthFiltering.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = DepthFiltering.metal; sourceTree = ""; }; + A364B5D82F259AD600325E5C /* PlaneFit.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PlaneFit.swift; sourceTree = ""; }; + A364B5DC2F259AF900325E5C /* PlaneFitting.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PlaneFitting.metal; sourceTree = ""; }; A36C6E012E134CE600A86004 /* bisenetv2_35_640_640.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = bisenetv2_35_640_640.mlpackage; sourceTree = ""; }; A374FAB62EE0173200055268 /* OSMResponseElement.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OSMResponseElement.swift; sourceTree = ""; }; A37E3E382EED60F300B07B77 /* lodepng.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = lodepng.h; sourceTree = ""; }; @@ -602,6 +606,7 @@ A35E050B2EDE359C003C26CF /* AttributeEstimation */ = { isa = PBXGroup; children = ( + A364B5D72F259A7800325E5C /* Plane */, A35E050E2EDE35ED003C26CF /* Localization */, A35E05172EDEA470003C26CF /* AttributeEstimationPipeline.swift */, ); @@ -663,6 +668,15 @@ path = Helpers; sourceTree = ""; }; + A364B5D72F259A7800325E5C /* Plane */ = { + isa = PBXGroup; + children = ( + A364B5DC2F259AF900325E5C /* PlaneFitting.metal */, + A364B5D82F259AD600325E5C /* PlaneFit.swift */, + ); + path = Plane; + sourceTree = ""; + }; A37E3E352EED60C100B07B77 /* CHelpers */ = { isa = PBXGroup; children = ( @@ -1185,6 +1199,7 @@ A35E05182EDEA476003C26CF /* AttributeEstimationPipeline.swift in Sources */, CAA947792CDE700A000C6918 /* AuthService.swift in Sources */, A3C22FD32CF194A600533BF7 /* CGImageUtils.swift in Sources */, + A364B5DD2F259AFE00325E5C /* PlaneFitting.metal in Sources */, A3DC22E92DCF0F9A0020CE84 /* ImageProcessing.metal in Sources */, A37E72142ED95C0C00CFE4EF /* MeshHelpers.swift in Sources */, A30C67E62EE27331006E4321 /* EditableAccessibilityFeature.swift in Sources */, @@ -1249,6 +1264,7 @@ A3FFAA832DE5253E002B99BD /* bisenetv2_53_640_640.mlpackage in Sources */, A3FFAA7A2DE01A0F002B99BD /* ARCameraView.swift in Sources */, A37E3E9E2EFBAA8700B07B77 /* AccessibilityFeatureSnapshot.swift in Sources */, + A364B5D92F259AD700325E5C /* PlaneFit.swift in Sources */, A3FFAA782DE01637002B99BD /* ARCameraUtils.swift in Sources */, A3FE166E2E1C2AF200DAE5BE /* SegmentationEncoder.swift in Sources */, A30BED3A2ED162F1004A5B51 /* ConnectedComponents.swift in Sources */, @@ -1442,6 +1458,7 @@ LD_RUNPATH_SEARCH_PATHS = "$(inherited)"; MARKETING_VERSION = 0.3; MTL_HEADER_SEARCH_PATHS = "$(SRCROOT)/IOSAccessAssessment/**"; + OTHER_CFLAGS = "-DACCELERATE_NEW_LAPACK"; PRODUCT_BUNDLE_IDENTIFIER = edu.uw.pointmapper; PRODUCT_NAME = "$(TARGET_NAME)"; SUPPORTED_PLATFORMS = "iphoneos iphonesimulator"; @@ -1485,6 +1502,7 @@ LD_RUNPATH_SEARCH_PATHS = "$(inherited)"; MARKETING_VERSION = 0.3; MTL_HEADER_SEARCH_PATHS = "$(SRCROOT)/IOSAccessAssessment/**"; + OTHER_CFLAGS = "-DACCELERATE_NEW_LAPACK"; PRODUCT_BUNDLE_IDENTIFIER = edu.uw.pointmapper; PRODUCT_NAME = "$(TARGET_NAME)"; SUPPORTED_PLATFORMS = "iphoneos iphonesimulator"; diff --git a/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFit.swift b/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFit.swift new file mode 100644 index 0000000..965d560 --- /dev/null +++ b/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFit.swift @@ -0,0 +1,7 @@ +// +// PlaneFit.swift +// IOSAccessAssessment +// +// Created by Himanshu on 1/24/26. +// + diff --git a/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFitting.metal b/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFitting.metal new file mode 100644 index 0000000..724ff26 --- /dev/null +++ b/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFitting.metal @@ -0,0 +1,32 @@ +// +// PlaneFitting.metal +// IOSAccessAssessment +// +// Created by Himanshu on 1/24/26. +// + +#include +#include +using namespace metal; +#import "ShaderTypes.h" + +inline float3 projectPixelToWorld( + float2 pixelCoord, + float depthValue, + constant float4x4& cameraTransform, + constant float3x3& invIntrinsics, + uint2 imageSize +) { + float3 imagePoint = float3(pixelCoord, 1.0); + float3 ray = invIntrinsics * imagePoint; + float3 rayDirection = normalize(ray); + + float3 cameraPoint = rayDirection * depthValue; + cameraPoint = float3(cameraPoint.x, -cameraPoint.y, -cameraPoint.z); + float4 cameraPoint4 = float4(cameraPoint, 1.0); + + float4 worldPoint4 = cameraTransform * cameraPoint4; + float3 worldPoint = worldPoint4.xyz / worldPoint4.w; + + return worldPoint; +} diff --git a/IOSAccessAssessment/Mesh/MeshPipeline.metal b/IOSAccessAssessment/Mesh/MeshPipeline.metal index 09e82cb..cfaeb11 100644 --- a/IOSAccessAssessment/Mesh/MeshPipeline.metal +++ b/IOSAccessAssessment/Mesh/MeshPipeline.metal @@ -9,7 +9,7 @@ using namespace metal; #import "ShaderTypes.h" -// For debugging +/// For debugging enum DebugSlot : uint { zBelowZero = 0, outsideImage = 1, From 97aea75cf0dbe8e25fff2b6bf142077570034bfa Mon Sep 17 00:00:00 2001 From: himanshunaidu Date: Sat, 24 Jan 2026 18:05:46 -0800 Subject: [PATCH 03/11] Move the delta flip logic in LocalizationProcessor outside the delta function --- .../Localization/LocalizationProcessor.swift | 23 +++++++++++++------ 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/IOSAccessAssessment/AttributeEstimation/Localization/LocalizationProcessor.swift b/IOSAccessAssessment/AttributeEstimation/Localization/LocalizationProcessor.swift index e046ef7..4d5ef30 100644 --- a/IOSAccessAssessment/AttributeEstimation/Localization/LocalizationProcessor.swift +++ b/IOSAccessAssessment/AttributeEstimation/Localization/LocalizationProcessor.swift @@ -60,8 +60,10 @@ struct LocalizationProcessor { cameraTransform: cameraTransform, cameraIntrinsics: cameraIntrinsics ) + let latitudeDelta = -delta.z + let longitudeDelta = delta.x return self.calculateLocation( - latitudeDelta: delta.z, longitudeDelta: delta.x, + latitudeDelta: latitudeDelta, longitudeDelta: longitudeDelta, deviceLocation: deviceLocation ) } @@ -81,7 +83,7 @@ struct LocalizationProcessor { cameraTransform: cameraTransform, cameraIntrinsics: cameraIntrinsics ) - return SIMD2(delta.z, delta.x) + return SIMD2( -delta.z, delta.x ) } func calculateLocation( @@ -149,8 +151,6 @@ struct LocalizationProcessor { cameraTransform.columns.3.y, cameraTransform.columns.3.z) var delta = worldPoint - cameraOriginPoint - /// Fix the z-axis back so that it points north - delta.z = -delta.z return delta } @@ -194,10 +194,13 @@ extension LocalizationProcessor { throw LocalizationProcessorError.invalidBounds } let trapezoidDeltas = trapezoidBoundsWithDepth.map { pointWithDepth in - return getDeltaFromPoint( + let delta = getDeltaFromPoint( point: pointWithDepth.point, depth: pointWithDepth.depth, imageSize: imageSize, cameraTransform: cameraTransform, cameraIntrinsics: cameraIntrinsics ) + var deltaNorth = delta + deltaNorth.z = -deltaNorth.z + return deltaNorth } let bottomLeft = trapezoidDeltas[0] let topLeft = trapezoidDeltas[1] @@ -240,10 +243,13 @@ extension LocalizationProcessor { throw LocalizationProcessorError.invalidBounds } let trapezoidDeltas = trapezoidBoundsWithDepth.map { pointWithDepth in - return getDeltaFromPoint( + let delta = getDeltaFromPoint( point: pointWithDepth.point, depth: pointWithDepth.depth, imageSize: imageSize, cameraTransform: cameraTransform, cameraIntrinsics: cameraIntrinsics ) + var deltaNorth = delta + deltaNorth.z = -deltaNorth.z + return deltaNorth } let bottomLeft = trapezoidDeltas[0] let topLeft = trapezoidDeltas[1] @@ -275,10 +281,13 @@ extension LocalizationProcessor { throw LocalizationProcessorError.invalidBounds } let trapezoidDeltas = trapezoidBoundsWithDepth.map { pointWithDepth in - return getDeltaFromPoint( + let delta = getDeltaFromPoint( point: pointWithDepth.point, depth: pointWithDepth.depth, imageSize: imageSize, cameraTransform: cameraTransform, cameraIntrinsics: cameraIntrinsics ) + var deltaNorth = delta + deltaNorth.z = -deltaNorth.z + return deltaNorth } let bottomLeft = trapezoidDeltas[0] let topLeft = trapezoidDeltas[1] From 5494cf81ef2a94c9c0650e5d99d6b009024f2335 Mon Sep 17 00:00:00 2001 From: himanshunaidu Date: Sat, 24 Jan 2026 20:50:09 -0800 Subject: [PATCH 04/11] Add kernel for computing plane 3d world points --- IOSAccessAssessment.xcodeproj/project.pbxproj | 4 +- .../Plane/PlaneFitting.metal | 41 ++++++++++++++++++- .../Image/Depth/DepthMapProcessor.swift | 1 + IOSAccessAssessment/Mesh/MeshPipeline.metal | 2 +- IOSAccessAssessment/ShaderTypes.h | 16 ++++++++ 5 files changed, 59 insertions(+), 5 deletions(-) diff --git a/IOSAccessAssessment.xcodeproj/project.pbxproj b/IOSAccessAssessment.xcodeproj/project.pbxproj index 568bde3..afa87bf 100644 --- a/IOSAccessAssessment.xcodeproj/project.pbxproj +++ b/IOSAccessAssessment.xcodeproj/project.pbxproj @@ -1458,7 +1458,7 @@ LD_RUNPATH_SEARCH_PATHS = "$(inherited)"; MARKETING_VERSION = 0.3; MTL_HEADER_SEARCH_PATHS = "$(SRCROOT)/IOSAccessAssessment/**"; - OTHER_CFLAGS = "-DACCELERATE_NEW_LAPACK"; + OTHER_CFLAGS = ""; PRODUCT_BUNDLE_IDENTIFIER = edu.uw.pointmapper; PRODUCT_NAME = "$(TARGET_NAME)"; SUPPORTED_PLATFORMS = "iphoneos iphonesimulator"; @@ -1502,7 +1502,7 @@ LD_RUNPATH_SEARCH_PATHS = "$(inherited)"; MARKETING_VERSION = 0.3; MTL_HEADER_SEARCH_PATHS = "$(SRCROOT)/IOSAccessAssessment/**"; - OTHER_CFLAGS = "-DACCELERATE_NEW_LAPACK"; + OTHER_CFLAGS = ""; PRODUCT_BUNDLE_IDENTIFIER = edu.uw.pointmapper; PRODUCT_NAME = "$(TARGET_NAME)"; SUPPORTED_PLATFORMS = "iphoneos iphonesimulator"; diff --git a/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFitting.metal b/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFitting.metal index 724ff26..159442e 100644 --- a/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFitting.metal +++ b/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFitting.metal @@ -14,8 +14,7 @@ inline float3 projectPixelToWorld( float2 pixelCoord, float depthValue, constant float4x4& cameraTransform, - constant float3x3& invIntrinsics, - uint2 imageSize + constant float3x3& invIntrinsics ) { float3 imagePoint = float3(pixelCoord, 1.0); float3 ray = invIntrinsics * imagePoint; @@ -30,3 +29,41 @@ inline float3 projectPixelToWorld( return worldPoint; } + +// Plane Fitting Point Extraction Kernel +// Assumes the depth texture is the same size as the segmentation texture +kernel void computePlanePoints( + texture2d segmentationTexture [[texture(0)]], + texture2d depthTexture [[texture(1)]], + constant uint8_t& targetValue [[buffer(0)]], + constant PlanePointsParams& params [[buffer(1)]], + device PlanePoint* points [[buffer(2)]], + device atomic_uint* pointCount [[buffer(3)]], + uint2 gid [[thread_position_in_grid]] +) { + if (gid.x >= segmentationTexture.get_width() || gid.y >= segmentationTexture.get_height()) + return; + + float4 pixelColor = segmentationTexture.read(gid); + float grayscale = pixelColor.r; + + // Normalize grayscale to the range of the LUT + uint index = min(uint(round(grayscale * 255.0)), 255u); + if (index != targetValue) { + return; + } + float depthValue = depthTexture.read(gid).r; + if (depthValue <= params.minDepthThreshold || depthValue >= params.maxDepthThreshold) { + return; + } + + float3 worldPoint = projectPixelToWorld( + float2(gid), + depthValue, + params.cameraTransform, + params.invIntrinsics + ); + + uint idx = atomic_fetch_add_explicit(pointCount, 1u, memory_order_relaxed); + outPoints[idx].p = worldPoint; +} diff --git a/IOSAccessAssessment/Image/Depth/DepthMapProcessor.swift b/IOSAccessAssessment/Image/Depth/DepthMapProcessor.swift index ba3e1ef..8a0490b 100644 --- a/IOSAccessAssessment/Image/Depth/DepthMapProcessor.swift +++ b/IOSAccessAssessment/Image/Depth/DepthMapProcessor.swift @@ -41,6 +41,7 @@ struct DepthMapProcessor { pixelFormatType: kCVPixelFormatType_DepthFloat32, colorSpace: nil ) + print("DepthMapProcessor initialized with depth image of size: \(depthWidth)x\(depthHeight)") } private func getDepthAtPoint(point: CGPoint) throws -> Float { diff --git a/IOSAccessAssessment/Mesh/MeshPipeline.metal b/IOSAccessAssessment/Mesh/MeshPipeline.metal index cfaeb11..09e82cb 100644 --- a/IOSAccessAssessment/Mesh/MeshPipeline.metal +++ b/IOSAccessAssessment/Mesh/MeshPipeline.metal @@ -9,7 +9,7 @@ using namespace metal; #import "ShaderTypes.h" -/// For debugging +// For debugging enum DebugSlot : uint { zBelowZero = 0, outsideImage = 1, diff --git a/IOSAccessAssessment/ShaderTypes.h b/IOSAccessAssessment/ShaderTypes.h index c41d7da..e227009 100644 --- a/IOSAccessAssessment/ShaderTypes.h +++ b/IOSAccessAssessment/ShaderTypes.h @@ -14,6 +14,7 @@ typedef uint8_t MTL_UINT8; // 8-bit typedef uint MTL_UINT; // 32-bit typedef uint MTL_BOOL; // use 0/1 + typedef float3 MTL_FLOAT3; typedef float4x4 MTL_FLOAT4X4; typedef float3x3 MTL_FLOAT3X3; // 48 bytes (3 cols, 16B aligned) typedef uint2 MTL_UINT2; @@ -24,6 +25,7 @@ typedef uint8_t MTL_UINT8; // 8-bit typedef uint32_t MTL_UINT; typedef uint32_t MTL_BOOL; // 0/1 + typedef simd_float3 MTL_FLOAT3; typedef simd_float4x4 MTL_FLOAT4X4; typedef simd_float3x3 MTL_FLOAT3X3; // 48 bytes typedef simd_uint2 MTL_UINT2; @@ -59,3 +61,17 @@ typedef struct BoundsParams { float maxX; float maxY; } BoundsParams; + +// For PCA Plane Fitting +typedef struct PlanePoint { + MTL_FLOAT3 p; +} PlanePoint; + +typedef struct PlanePointsParams { + MTL_UINT2 imageSize; + float minDepthThreshold; + float maxDepthThreshold; + MTL_FLOAT4X4 cameraTransform; + MTL_FLOAT3X3 invIntrinsics; +} PlanePointsParams; + From 2ab0aeb995f0a59446367ca28ab0a8d1f087ba12 Mon Sep 17 00:00:00 2001 From: himanshunaidu Date: Sun, 25 Jan 2026 16:15:02 -0800 Subject: [PATCH 05/11] Add Swift struct WorldPoints to utilize world points kernel --- IOSAccessAssessment.xcodeproj/project.pbxproj | 28 +++- .../Plane/PlaneFitting.metal | 6 +- .../Plane/WorldPoints.swift | 156 ++++++++++++++++++ .../Image/Depth/DepthFilter.swift | 12 +- .../Mesh/MeshGPUSnapshot.swift | 26 +-- .../Mesh/SegmentationMeshRecord.swift | 8 +- .../Utils/MetalBufferUtils.swift} | 12 +- .../Segmentation/SegmentationARPipeline.swift | 2 +- 8 files changed, 213 insertions(+), 37 deletions(-) create mode 100644 IOSAccessAssessment/AttributeEstimation/Plane/WorldPoints.swift rename IOSAccessAssessment/{Mesh/Utils/MeshBufferUtils.swift => Metal/Utils/MetalBufferUtils.swift} (86%) diff --git a/IOSAccessAssessment.xcodeproj/project.pbxproj b/IOSAccessAssessment.xcodeproj/project.pbxproj index afa87bf..d896a16 100644 --- a/IOSAccessAssessment.xcodeproj/project.pbxproj +++ b/IOSAccessAssessment.xcodeproj/project.pbxproj @@ -70,6 +70,7 @@ A364B5352F25589B00325E5C /* DepthFiltering.metal in Sources */ = {isa = PBXBuildFile; fileRef = A364B5342F25589600325E5C /* DepthFiltering.metal */; }; A364B5D92F259AD700325E5C /* PlaneFit.swift in Sources */ = {isa = PBXBuildFile; fileRef = A364B5D82F259AD600325E5C /* PlaneFit.swift */; }; A364B5DD2F259AFE00325E5C /* PlaneFitting.metal in Sources */ = {isa = PBXBuildFile; fileRef = A364B5DC2F259AF900325E5C /* PlaneFitting.metal */; }; + A364B5DF2F26DB5700325E5C /* WorldPoints.swift in Sources */ = {isa = PBXBuildFile; fileRef = A364B5DE2F26DB5300325E5C /* WorldPoints.swift */; }; A36C6E022E134CE600A86004 /* bisenetv2_35_640_640.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = A36C6E012E134CE600A86004 /* bisenetv2_35_640_640.mlpackage */; }; A374FAB72EE0173600055268 /* OSMResponseElement.swift in Sources */ = {isa = PBXBuildFile; fileRef = A374FAB62EE0173200055268 /* OSMResponseElement.swift */; }; A37E3E3C2EED60F300B07B77 /* PngEncoder.mm in Sources */ = {isa = PBXBuildFile; fileRef = A37E3E3B2EED60F300B07B77 /* PngEncoder.mm */; }; @@ -109,7 +110,7 @@ A3D78D762E654F18003BFE78 /* ProfileView.swift in Sources */ = {isa = PBXBuildFile; fileRef = A3D78D752E654F14003BFE78 /* ProfileView.swift */; }; A3DA4DA82EB94D84005BB812 /* MeshGPUSnapshot.swift in Sources */ = {isa = PBXBuildFile; fileRef = A3DA4DA72EB94D81005BB812 /* MeshGPUSnapshot.swift */; }; A3DA4DAE2EB98D70005BB812 /* MeshPipeline.metal in Sources */ = {isa = PBXBuildFile; fileRef = A3DA4DAD2EB98D70005BB812 /* MeshPipeline.metal */; }; - A3DA4DB12EB99A5C005BB812 /* MeshBufferUtils.swift in Sources */ = {isa = PBXBuildFile; fileRef = A3DA4DB02EB99A5A005BB812 /* MeshBufferUtils.swift */; }; + A3DA4DB12EB99A5C005BB812 /* MetalBufferUtils.swift in Sources */ = {isa = PBXBuildFile; fileRef = A3DA4DB02EB99A5A005BB812 /* MetalBufferUtils.swift */; }; A3DA4DB62EBAE101005BB812 /* Stub.m in Sources */ = {isa = PBXBuildFile; fileRef = A3DA4DB52EBAE101005BB812 /* Stub.m */; }; A3DA4DBC2EBCB881005BB812 /* SegmentationMeshRecord.swift in Sources */ = {isa = PBXBuildFile; fileRef = A3DA4DBB2EBCB87E005BB812 /* SegmentationMeshRecord.swift */; }; A3DA4DBE2EBCB9F9005BB812 /* MetalContext.swift in Sources */ = {isa = PBXBuildFile; fileRef = A3DA4DBD2EBCB9F9005BB812 /* MetalContext.swift */; }; @@ -247,6 +248,7 @@ A364B5342F25589600325E5C /* DepthFiltering.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = DepthFiltering.metal; sourceTree = ""; }; A364B5D82F259AD600325E5C /* PlaneFit.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PlaneFit.swift; sourceTree = ""; }; A364B5DC2F259AF900325E5C /* PlaneFitting.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PlaneFitting.metal; sourceTree = ""; }; + A364B5DE2F26DB5300325E5C /* WorldPoints.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = WorldPoints.swift; sourceTree = ""; }; A36C6E012E134CE600A86004 /* bisenetv2_35_640_640.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = bisenetv2_35_640_640.mlpackage; sourceTree = ""; }; A374FAB62EE0173200055268 /* OSMResponseElement.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OSMResponseElement.swift; sourceTree = ""; }; A37E3E382EED60F300B07B77 /* lodepng.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = lodepng.h; sourceTree = ""; }; @@ -287,7 +289,7 @@ A3D78D752E654F14003BFE78 /* ProfileView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ProfileView.swift; sourceTree = ""; }; A3DA4DA72EB94D81005BB812 /* MeshGPUSnapshot.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MeshGPUSnapshot.swift; sourceTree = ""; }; A3DA4DAD2EB98D70005BB812 /* MeshPipeline.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = MeshPipeline.metal; sourceTree = ""; }; - A3DA4DB02EB99A5A005BB812 /* MeshBufferUtils.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MeshBufferUtils.swift; sourceTree = ""; }; + A3DA4DB02EB99A5A005BB812 /* MetalBufferUtils.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MetalBufferUtils.swift; sourceTree = ""; }; A3DA4DB32EBAE05C005BB812 /* ShaderTypes.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ShaderTypes.h; sourceTree = ""; }; A3DA4DB42EBAE101005BB812 /* IOSAccessAssessment-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "IOSAccessAssessment-Bridging-Header.h"; sourceTree = ""; }; A3DA4DB52EBAE101005BB812 /* Stub.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = Stub.m; sourceTree = ""; }; @@ -390,6 +392,7 @@ A3E84ECE2DDAC7980096A645 /* Annotation */, DAA7F8C62CA76514003666D8 /* Image */, A3DA4DA62EB9320E005BB812 /* Mesh */, + A364B5E02F26E0F400325E5C /* Metal */, DAA7F8BE2CA683DC003666D8 /* Segmentation */, 55659C092BB785EA0094DF01 /* MachineLearning */, 55659C0E2BB786240094DF01 /* View */, @@ -671,12 +674,29 @@ A364B5D72F259A7800325E5C /* Plane */ = { isa = PBXGroup; children = ( + A364B5DE2F26DB5300325E5C /* WorldPoints.swift */, A364B5DC2F259AF900325E5C /* PlaneFitting.metal */, A364B5D82F259AD600325E5C /* PlaneFit.swift */, ); path = Plane; sourceTree = ""; }; + A364B5E02F26E0F400325E5C /* Metal */ = { + isa = PBXGroup; + children = ( + A364B5E12F26E12300325E5C /* Utils */, + ); + path = Metal; + sourceTree = ""; + }; + A364B5E12F26E12300325E5C /* Utils */ = { + isa = PBXGroup; + children = ( + A3DA4DB02EB99A5A005BB812 /* MetalBufferUtils.swift */, + ); + path = Utils; + sourceTree = ""; + }; A37E3E352EED60C100B07B77 /* CHelpers */ = { isa = PBXGroup; children = ( @@ -891,7 +911,6 @@ isa = PBXGroup; children = ( A3A413A12EC9C3F60039298C /* MeshRasterizer.swift */, - A3DA4DB02EB99A5A005BB812 /* MeshBufferUtils.swift */, ); path = Utils; sourceTree = ""; @@ -1229,7 +1248,7 @@ A32943532EE814A700C4C1BC /* OSWElement.swift in Sources */, A30801502EC0926800B1BA3A /* ContourUtils.swift in Sources */, A32943482EE7C0DD00C4C1BC /* OSWElementClass.swift in Sources */, - A3DA4DB12EB99A5C005BB812 /* MeshBufferUtils.swift in Sources */, + A3DA4DB12EB99A5C005BB812 /* MetalBufferUtils.swift in Sources */, A37E72102ED66A6400CFE4EF /* SegmentationAnnotationPipeline.swift in Sources */, A329434C2EE7CFE800C4C1BC /* OSWField.swift in Sources */, A3A413A42ECD3C7E0039298C /* RasterizeConfig.swift in Sources */, @@ -1260,6 +1279,7 @@ A3420F1C2E8D82E700CD617E /* APIEnvironment.swift in Sources */, A3DC22ED2DCF10050020CE84 /* Homography.metal in Sources */, A3C55A472EAF513B00F6CFDC /* FrameRasterizer.swift in Sources */, + A364B5DF2F26DB5700325E5C /* WorldPoints.swift in Sources */, A3FE16672E18C81800DAE5BE /* LocationEncoder.swift in Sources */, A3FFAA832DE5253E002B99BD /* bisenetv2_53_640_640.mlpackage in Sources */, A3FFAA7A2DE01A0F002B99BD /* ARCameraView.swift in Sources */, diff --git a/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFitting.metal b/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFitting.metal index 159442e..f537e9a 100644 --- a/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFitting.metal +++ b/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFitting.metal @@ -32,7 +32,7 @@ inline float3 projectPixelToWorld( // Plane Fitting Point Extraction Kernel // Assumes the depth texture is the same size as the segmentation texture -kernel void computePlanePoints( +kernel void computeWorldPoints( texture2d segmentationTexture [[texture(0)]], texture2d depthTexture [[texture(1)]], constant uint8_t& targetValue [[buffer(0)]], @@ -60,10 +60,10 @@ kernel void computePlanePoints( float3 worldPoint = projectPixelToWorld( float2(gid), depthValue, - params.cameraTransform, + params.cameraTransform, params.invIntrinsics ); uint idx = atomic_fetch_add_explicit(pointCount, 1u, memory_order_relaxed); - outPoints[idx].p = worldPoint; + points[idx].p = worldPoint; } diff --git a/IOSAccessAssessment/AttributeEstimation/Plane/WorldPoints.swift b/IOSAccessAssessment/AttributeEstimation/Plane/WorldPoints.swift new file mode 100644 index 0000000..5cf4b88 --- /dev/null +++ b/IOSAccessAssessment/AttributeEstimation/Plane/WorldPoints.swift @@ -0,0 +1,156 @@ +// +// WorldPoints.swift +// IOSAccessAssessment +// +// Created by Himanshu on 1/25/26. +// + +import ARKit +import RealityKit +import MetalKit +import simd + +enum WorldPointsError: Error, LocalizedError { + case metalInitializationFailed + case invalidInputImage + case textureCreationFailed + case metalPipelineCreationError + case meshPipelineBlitEncoderError + case outputImageCreationFailed + + var errorDescription: String? { + switch self { + case .metalInitializationFailed: + return "Failed to initialize Metal resources." + case .invalidInputImage: + return "The input image is invalid." + case .textureCreationFailed: + return "Failed to create Metal textures." + case .metalPipelineCreationError: + return "Failed to create Metal compute pipeline." + case .meshPipelineBlitEncoderError: + return "Failed to create Blit Command Encoder for the Segmentation Mesh Creation." + case .outputImageCreationFailed: + return "Failed to create output CIImage from Metal texture." + } + } +} + +/** + Extacting 3D world points. + */ +struct WorldPoints { + private let device: MTLDevice + private let commandQueue: MTLCommandQueue + private let pipeline: MTLComputePipelineState + private let textureLoader: MTKTextureLoader + + private let ciContext: CIContext + + init() throws { + guard let device = MTLCreateSystemDefaultDevice(), + let commandQueue = device.makeCommandQueue() else { + throw WorldPointsError.metalInitializationFailed + } + self.device = device + self.commandQueue = commandQueue + self.textureLoader = MTKTextureLoader(device: device) + + self.ciContext = CIContext(mtlDevice: device, options: [.workingColorSpace: NSNull(), .outputColorSpace: NSNull()]) + + guard let kernelFunction = device.makeDefaultLibrary()?.makeFunction(name: "computeWorldPoints"), + let pipeline = try? device.makeComputePipelineState(function: kernelFunction) else { + throw WorldPointsError.metalInitializationFailed + } + self.pipeline = pipeline + } + + func getWorldPoints( + segmentationLabelImage: CIImage, + depthImage: CIImage, + targetValue: UInt8, + cameraTransform: simd_float4x4, + cameraIntrinsics: simd_float3x3 + ) throws -> [PlanePoint] { + guard let commandBuffer = self.commandQueue.makeCommandBuffer() else { + throw WorldPointsError.metalPipelineCreationError + } + + print("PlanePoint Alignment and stride: \(MemoryLayout.alignment), \(MemoryLayout.alignment)") + + let imageSize = simd_uint2(UInt32(segmentationLabelImage.extent.width), UInt32(segmentationLabelImage.extent.height)) + let invIntrinsics = simd_inverse(cameraIntrinsics) + + let segmentationLabelTexture = try segmentationLabelImage.toMTLTexture( + device: self.device, commandBuffer: commandBuffer, pixelFormat: .r8Unorm, + context: self.ciContext, + colorSpace: CGColorSpaceCreateDeviceRGB() /// Dummy color space + ) + let depthTexture = try depthImage.toMTLTexture( + device: self.device, commandBuffer: commandBuffer, pixelFormat: .r32Float, + context: self.ciContext, + colorSpace: CGColorSpaceCreateDeviceRGB() /// Dummy color space + ) + var targetValueVar = targetValue + var params = PlanePointsParams( + imageSize: imageSize, + minDepthThreshold: Constants.DepthConstants.depthMinThreshold, + maxDepthThreshold: Constants.DepthConstants.depthMaxThreshold, + cameraTransform: cameraTransform, + invIntrinsics: invIntrinsics + ) + let pointCount: MTLBuffer = try MetalBufferUtils.makeBuffer( + device: self.device, length: MemoryLayout.stride, options: .storageModeShared + ) + let maxPoints = imageSize.x * imageSize.y + let pointsBuffer: MTLBuffer = try MetalBufferUtils.makeBuffer( + device: self.device, length: MemoryLayout.stride * Int(maxPoints), options: .storageModeShared + ) + + /** + Initialize point count to zero. + */ + guard let blit = commandBuffer.makeBlitCommandEncoder() else { + throw WorldPointsError.meshPipelineBlitEncoderError + } + blit.fill(buffer: pointCount, range: 0...stride, value: 0) + blit.endEncoding() + + /** + Encode compute command. + */ + guard let commandEncoder = commandBuffer.makeComputeCommandEncoder() else { + throw WorldPointsError.metalPipelineCreationError + } + + commandEncoder.setComputePipelineState(self.pipeline) + commandEncoder.setTexture(segmentationLabelTexture, index: 0) + commandEncoder.setTexture(depthTexture, index: 1) + commandEncoder.setBytes(&targetValueVar, length: MemoryLayout.size, index: 0) + commandEncoder.setBytes(¶ms, length: MemoryLayout.stride, index: 1) + commandEncoder.setBuffer(pointsBuffer, offset: 0, index: 2) + commandEncoder.setBuffer(pointCount, offset: 0, index: 3) + + let threadgroupSize = MTLSize(width: pipeline.threadExecutionWidth, height: pipeline.maxTotalThreadsPerThreadgroup / pipeline.threadExecutionWidth, depth: 1) + let threadgroups = MTLSize(width: (Int(imageSize.x) + threadgroupSize.width - 1) / threadgroupSize.width, + height: (Int(imageSize.y) + threadgroupSize.height - 1) / threadgroupSize.height, + depth: 1) + commandEncoder.dispatchThreadgroups(threadgroups, threadsPerThreadgroup: threadgroupSize) + commandEncoder.endEncoding() + + commandBuffer.commit() + commandBuffer.waitUntilCompleted() + + let pointsCountPointer = pointCount.contents().bindMemory(to: UInt32.self, capacity: 1).pointee + let actualPointCount = Int(pointsCountPointer) + var worldPoints: [PlanePoint] = [] + if actualPointCount > 0 { + let pointsPointer = pointsBuffer.contents().bindMemory(to: PlanePoint.self, capacity: actualPointCount) + for i in 0.. CIImage { let descriptor = MTLTextureDescriptor.texture2DDescriptor(pixelFormat: .r8Unorm, width: Int(inputImage.extent.width), height: Int(inputImage.extent.height), mipmapped: false) @@ -78,19 +78,19 @@ struct DepthFilter { context: self.ciContext, colorSpace: CGColorSpaceCreateDeviceRGB() /// Dummy color space ) - let depthTexture = try depthMap.toMTLTexture( + let depthTexture = try depthImage.toMTLTexture( device: self.device, commandBuffer: commandBuffer, pixelFormat: .r32Float, context: self.ciContext, colorSpace: CGColorSpaceCreateDeviceRGB() /// Dummy color space ) guard let outputTexture = self.device.makeTexture(descriptor: descriptor) else { - throw BinaryMaskFilterError.textureCreationFailed + throw DepthFilterError.textureCreationFailed } var depthMinThresholdVar = depthMinThreshold var depthMaxThresholdVar = depthMaxThreshold guard let commandEncoder = commandBuffer.makeComputeCommandEncoder() else { - throw BinaryMaskFilterError.metalPipelineCreationError + throw DepthFilterError.metalPipelineCreationError } commandEncoder.setComputePipelineState(self.pipeline) diff --git a/IOSAccessAssessment/Mesh/MeshGPUSnapshot.swift b/IOSAccessAssessment/Mesh/MeshGPUSnapshot.swift index 9aa2866..87ab8ae 100644 --- a/IOSAccessAssessment/Mesh/MeshGPUSnapshot.swift +++ b/IOSAccessAssessment/Mesh/MeshGPUSnapshot.swift @@ -105,8 +105,8 @@ final class MeshGPUSnapshotGenerator: NSObject { let anchorTransform = meshAnchor.transform var meshGPUAnchor: MeshGPUAnchor = try currentSnapshot?.anchors[meshAnchor.identifier] ?? { - let vertexBuffer = try MeshBufferUtils.makeBuffer(device: device, length: defaultBufferSize, options: .storageModeShared) - let indexBuffer = try MeshBufferUtils.makeBuffer(device: device, length: defaultBufferSize, options: .storageModeShared) + let vertexBuffer = try MetalBufferUtils.makeBuffer(device: device, length: defaultBufferSize, options: .storageModeShared) + let indexBuffer = try MetalBufferUtils.makeBuffer(device: device, length: defaultBufferSize, options: .storageModeShared) return MeshGPUAnchor( vertexBuffer: vertexBuffer, indexBuffer: indexBuffer, classificationBuffer: nil, anchorTransform: anchorTransform ) @@ -116,13 +116,13 @@ final class MeshGPUSnapshotGenerator: NSObject { // MARK: This code assumes the vertex format will always be only Float3 let vertexElemSize = MemoryLayout.stride * 3 let vertexByteCount = vertices.count * vertexElemSize - try MeshBufferUtils.ensureCapacity(device: device, buf: &meshGPUAnchor.vertexBuffer, requiredBytes: vertexByteCount) + try MetalBufferUtils.ensureCapacity(device: device, buf: &meshGPUAnchor.vertexBuffer, requiredBytes: vertexByteCount) let vertexSrcPtr = vertices.buffer.contents().advanced(by: vertices.offset) if (vertices.stride == vertexElemSize) { - try MeshBufferUtils.copyContiguous(srcPtr: vertexSrcPtr, dst: meshGPUAnchor.vertexBuffer, byteCount: vertexByteCount) + try MetalBufferUtils.copyContiguous(srcPtr: vertexSrcPtr, dst: meshGPUAnchor.vertexBuffer, byteCount: vertexByteCount) } else { - try MeshBufferUtils.copyStrided(count: vertices.count, srcPtr: vertexSrcPtr, srcStride: vertices.stride, + try MetalBufferUtils.copyStrided(count: vertices.count, srcPtr: vertexSrcPtr, srcStride: vertices.stride, dst: meshGPUAnchor.vertexBuffer, elemSize: vertexElemSize) } @@ -130,13 +130,13 @@ final class MeshGPUSnapshotGenerator: NSObject { // MARK: This code assumes the index type will always be only UInt32 let indexTypeSize = MemoryLayout.stride let indexByteCount = faces.count * faces.bytesPerIndex * faces.indexCountPerPrimitive - try MeshBufferUtils.ensureCapacity(device: device, buf: &meshGPUAnchor.indexBuffer, requiredBytes: indexByteCount) + try MetalBufferUtils.ensureCapacity(device: device, buf: &meshGPUAnchor.indexBuffer, requiredBytes: indexByteCount) let indexSrcPtr = faces.buffer.contents() if (faces.bytesPerIndex == indexTypeSize) { - try MeshBufferUtils.copyContiguous(srcPtr: indexSrcPtr, dst: meshGPUAnchor.indexBuffer, byteCount: indexByteCount) + try MetalBufferUtils.copyContiguous(srcPtr: indexSrcPtr, dst: meshGPUAnchor.indexBuffer, byteCount: indexByteCount) } else { - try MeshBufferUtils.copyStrided(count: faces.count * faces.indexCountPerPrimitive, srcPtr: indexSrcPtr, srcStride: faces.bytesPerIndex, + try MetalBufferUtils.copyStrided(count: faces.count * faces.indexCountPerPrimitive, srcPtr: indexSrcPtr, srcStride: faces.bytesPerIndex, dst: meshGPUAnchor.indexBuffer, elemSize: indexTypeSize) } @@ -146,18 +146,18 @@ final class MeshGPUSnapshotGenerator: NSObject { let classificationElemSize = MemoryLayout.stride let classificationByteCount = classifications.count * classificationElemSize if meshGPUAnchor.classificationBuffer == nil { - let newCapacity = MeshBufferUtils.nextCap(classificationByteCount) - meshGPUAnchor.classificationBuffer = try MeshBufferUtils.makeBuffer(device: device, length: newCapacity, options: .storageModeShared) + let newCapacity = MetalBufferUtils.nextCap(classificationByteCount) + meshGPUAnchor.classificationBuffer = try MetalBufferUtils.makeBuffer(device: device, length: newCapacity, options: .storageModeShared) } else { - try MeshBufferUtils.ensureCapacity(device: device, buf: &meshGPUAnchor.classificationBuffer!, requiredBytes: classificationByteCount) + try MetalBufferUtils.ensureCapacity(device: device, buf: &meshGPUAnchor.classificationBuffer!, requiredBytes: classificationByteCount) } let classificationSrcPtr = classifications.buffer.contents().advanced(by: classifications.offset) if (classifications.stride == classificationElemSize) { - try MeshBufferUtils.copyContiguous( + try MetalBufferUtils.copyContiguous( srcPtr: classificationSrcPtr, dst: meshGPUAnchor.classificationBuffer!, byteCount: classificationByteCount ) } else { - try MeshBufferUtils.copyStrided( + try MetalBufferUtils.copyStrided( count: classifications.count, srcPtr: classificationSrcPtr, srcStride: classifications.stride, dst: meshGPUAnchor.classificationBuffer!, elemSize: classificationElemSize) } diff --git a/IOSAccessAssessment/Mesh/SegmentationMeshRecord.swift b/IOSAccessAssessment/Mesh/SegmentationMeshRecord.swift index a8c6c7f..7021893 100644 --- a/IOSAccessAssessment/Mesh/SegmentationMeshRecord.swift +++ b/IOSAccessAssessment/Mesh/SegmentationMeshRecord.swift @@ -132,20 +132,20 @@ final class SegmentationMeshRecord { self.entity.model?.mesh = resource } - let outTriCount: MTLBuffer = try MeshBufferUtils.makeBuffer( + let outTriCount: MTLBuffer = try MetalBufferUtils.makeBuffer( device: self.context.device, length: MemoryLayout.stride, options: .storageModeShared ) // For debugging let debugSlots = Int(3) // MARK: Hard-coded let debugBytes = debugSlots * MemoryLayout.stride - let debugCounter: MTLBuffer = try MeshBufferUtils.makeBuffer( + let debugCounter: MTLBuffer = try MetalBufferUtils.makeBuffer( device: self.context.device, length: debugBytes, options: .storageModeShared ) - let aabbMinU = try MeshBufferUtils.makeBuffer( + let aabbMinU = try MetalBufferUtils.makeBuffer( device: self.context.device, length: 3 * MemoryLayout.stride, options: .storageModeShared ) - let aabbMaxU = try MeshBufferUtils.makeBuffer( + let aabbMaxU = try MetalBufferUtils.makeBuffer( device: self.context.device, length: 3 * MemoryLayout.stride, options: .storageModeShared ) do { diff --git a/IOSAccessAssessment/Mesh/Utils/MeshBufferUtils.swift b/IOSAccessAssessment/Metal/Utils/MetalBufferUtils.swift similarity index 86% rename from IOSAccessAssessment/Mesh/Utils/MeshBufferUtils.swift rename to IOSAccessAssessment/Metal/Utils/MetalBufferUtils.swift index 5fa3572..7a53e17 100644 --- a/IOSAccessAssessment/Mesh/Utils/MeshBufferUtils.swift +++ b/IOSAccessAssessment/Metal/Utils/MetalBufferUtils.swift @@ -1,5 +1,5 @@ // -// MeshBufferUtils.swift +// MetalBufferUtils.swift // IOSAccessAssessment // // Created by Himanshu on 11/3/25. @@ -9,7 +9,7 @@ import RealityKit import Metal import simd -enum MeshBufferUtilsError: Error, LocalizedError { +enum MetalBufferUtilsError: Error, LocalizedError { case bufferTooSmall(expected: Int, actual: Int) case bufferCreationFailed @@ -24,13 +24,13 @@ enum MeshBufferUtilsError: Error, LocalizedError { } -struct MeshBufferUtils { +struct MetalBufferUtils { static let defaultBufferSize: Int = 1024 @inline(__always) static func copyContiguous(srcPtr: UnsafeRawPointer, dst: MTLBuffer, byteCount: Int) throws { guard byteCount <= dst.length else { - throw MeshBufferUtilsError.bufferTooSmall(expected: byteCount, actual: dst.length) + throw MetalBufferUtilsError.bufferTooSmall(expected: byteCount, actual: dst.length) } let dstPtr = dst.contents() dstPtr.copyMemory(from: srcPtr, byteCount: byteCount) @@ -40,7 +40,7 @@ struct MeshBufferUtils { static func copyStrided(count: Int, srcPtr: UnsafeRawPointer, srcStride: Int, dst: MTLBuffer, elemSize: Int) throws { guard count * elemSize <= dst.length else { - throw MeshBufferUtilsError.bufferTooSmall(expected: count * elemSize, actual: dst.length) + throw MetalBufferUtilsError.bufferTooSmall(expected: count * elemSize, actual: dst.length) } let dstPtr = dst.contents() for i in 0.. MTLBuffer { guard let buffer = device.makeBuffer(length: length, options: options) else { - throw MeshBufferUtilsError.bufferCreationFailed + throw MetalBufferUtilsError.bufferCreationFailed } return buffer } diff --git a/IOSAccessAssessment/Segmentation/SegmentationARPipeline.swift b/IOSAccessAssessment/Segmentation/SegmentationARPipeline.swift index 683ccf2..6dafe9b 100644 --- a/IOSAccessAssessment/Segmentation/SegmentationARPipeline.swift +++ b/IOSAccessAssessment/Segmentation/SegmentationARPipeline.swift @@ -193,7 +193,7 @@ final class SegmentationARPipeline: ObservableObject { let depthMinThresholdValue = Constants.DepthConstants.depthMinThreshold let depthMaxThresholdValue = Constants.DepthConstants.depthMaxThreshold depthFilteredSegmentationImage = try depthFilter.apply( - to: segmentationImage, depthMap: depthImage, + to: segmentationImage, depthImage: depthImage, depthMinThreshold: depthMinThresholdValue, depthMaxThreshold: depthMaxThresholdValue ) } From 954299ae735dcb4c426ddb3d71401defcb37c0cb Mon Sep 17 00:00:00 2001 From: himanshunaidu Date: Sun, 25 Jan 2026 17:18:11 -0800 Subject: [PATCH 06/11] Start incorporating world points calculation in planefit --- IOSAccessAssessment.xcodeproj/project.pbxproj | 8 ++--- .../AttributeEstimation/Plane/PlaneFit.swift | 32 +++++++++++++++++++ ...oints.swift => WorldPointsProcessor.swift} | 16 +++++----- 3 files changed, 44 insertions(+), 12 deletions(-) rename IOSAccessAssessment/AttributeEstimation/Plane/{WorldPoints.swift => WorldPointsProcessor.swift} (92%) diff --git a/IOSAccessAssessment.xcodeproj/project.pbxproj b/IOSAccessAssessment.xcodeproj/project.pbxproj index d896a16..c36b00c 100644 --- a/IOSAccessAssessment.xcodeproj/project.pbxproj +++ b/IOSAccessAssessment.xcodeproj/project.pbxproj @@ -70,7 +70,7 @@ A364B5352F25589B00325E5C /* DepthFiltering.metal in Sources */ = {isa = PBXBuildFile; fileRef = A364B5342F25589600325E5C /* DepthFiltering.metal */; }; A364B5D92F259AD700325E5C /* PlaneFit.swift in Sources */ = {isa = PBXBuildFile; fileRef = A364B5D82F259AD600325E5C /* PlaneFit.swift */; }; A364B5DD2F259AFE00325E5C /* PlaneFitting.metal in Sources */ = {isa = PBXBuildFile; fileRef = A364B5DC2F259AF900325E5C /* PlaneFitting.metal */; }; - A364B5DF2F26DB5700325E5C /* WorldPoints.swift in Sources */ = {isa = PBXBuildFile; fileRef = A364B5DE2F26DB5300325E5C /* WorldPoints.swift */; }; + A364B5DF2F26DB5700325E5C /* WorldPointsProcessor.swift in Sources */ = {isa = PBXBuildFile; fileRef = A364B5DE2F26DB5300325E5C /* WorldPointsProcessor.swift */; }; A36C6E022E134CE600A86004 /* bisenetv2_35_640_640.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = A36C6E012E134CE600A86004 /* bisenetv2_35_640_640.mlpackage */; }; A374FAB72EE0173600055268 /* OSMResponseElement.swift in Sources */ = {isa = PBXBuildFile; fileRef = A374FAB62EE0173200055268 /* OSMResponseElement.swift */; }; A37E3E3C2EED60F300B07B77 /* PngEncoder.mm in Sources */ = {isa = PBXBuildFile; fileRef = A37E3E3B2EED60F300B07B77 /* PngEncoder.mm */; }; @@ -248,7 +248,7 @@ A364B5342F25589600325E5C /* DepthFiltering.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = DepthFiltering.metal; sourceTree = ""; }; A364B5D82F259AD600325E5C /* PlaneFit.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PlaneFit.swift; sourceTree = ""; }; A364B5DC2F259AF900325E5C /* PlaneFitting.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PlaneFitting.metal; sourceTree = ""; }; - A364B5DE2F26DB5300325E5C /* WorldPoints.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = WorldPoints.swift; sourceTree = ""; }; + A364B5DE2F26DB5300325E5C /* WorldPointsProcessor.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = WorldPointsProcessor.swift; sourceTree = ""; }; A36C6E012E134CE600A86004 /* bisenetv2_35_640_640.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = bisenetv2_35_640_640.mlpackage; sourceTree = ""; }; A374FAB62EE0173200055268 /* OSMResponseElement.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OSMResponseElement.swift; sourceTree = ""; }; A37E3E382EED60F300B07B77 /* lodepng.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = lodepng.h; sourceTree = ""; }; @@ -674,7 +674,7 @@ A364B5D72F259A7800325E5C /* Plane */ = { isa = PBXGroup; children = ( - A364B5DE2F26DB5300325E5C /* WorldPoints.swift */, + A364B5DE2F26DB5300325E5C /* WorldPointsProcessor.swift */, A364B5DC2F259AF900325E5C /* PlaneFitting.metal */, A364B5D82F259AD600325E5C /* PlaneFit.swift */, ); @@ -1279,7 +1279,7 @@ A3420F1C2E8D82E700CD617E /* APIEnvironment.swift in Sources */, A3DC22ED2DCF10050020CE84 /* Homography.metal in Sources */, A3C55A472EAF513B00F6CFDC /* FrameRasterizer.swift in Sources */, - A364B5DF2F26DB5700325E5C /* WorldPoints.swift in Sources */, + A364B5DF2F26DB5700325E5C /* WorldPointsProcessor.swift in Sources */, A3FE16672E18C81800DAE5BE /* LocationEncoder.swift in Sources */, A3FFAA832DE5253E002B99BD /* bisenetv2_53_640_640.mlpackage in Sources */, A3FFAA7A2DE01A0F002B99BD /* ARCameraView.swift in Sources */, diff --git a/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFit.swift b/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFit.swift index 965d560..980ac11 100644 --- a/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFit.swift +++ b/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFit.swift @@ -5,3 +5,35 @@ // Created by Himanshu on 1/24/26. // +import simd +import Accelerate +import CoreImage + +enum PlaneFitError: Error, LocalizedError { + case initializationError(message: String) + + var errorDescription: String? { + switch self { + case .initializationError(let message): + return "PlaneFit Initialization Error: \(message)" + } + } +} + +struct PlaneFit { + private let worldPointsProcessor: WorldPointsProcessor + + init() throws { + self.worldPointsProcessor = try WorldPointsProcessor() + } + + func fitPlanePCAWithImage( + segmentationLabelImage: CIImage, + depthImage: CIImage, + targetValue: UInt8, + cameraTransform: simd_float4x4, + cameraIntrinsics: simd_float3x3 + ) { + + } +} diff --git a/IOSAccessAssessment/AttributeEstimation/Plane/WorldPoints.swift b/IOSAccessAssessment/AttributeEstimation/Plane/WorldPointsProcessor.swift similarity index 92% rename from IOSAccessAssessment/AttributeEstimation/Plane/WorldPoints.swift rename to IOSAccessAssessment/AttributeEstimation/Plane/WorldPointsProcessor.swift index 5cf4b88..3eb36ff 100644 --- a/IOSAccessAssessment/AttributeEstimation/Plane/WorldPoints.swift +++ b/IOSAccessAssessment/AttributeEstimation/Plane/WorldPointsProcessor.swift @@ -1,5 +1,5 @@ // -// WorldPoints.swift +// WorldPointsProcessor.swift // IOSAccessAssessment // // Created by Himanshu on 1/25/26. @@ -10,7 +10,7 @@ import RealityKit import MetalKit import simd -enum WorldPointsError: Error, LocalizedError { +enum WorldPointsProcessorError: Error, LocalizedError { case metalInitializationFailed case invalidInputImage case textureCreationFailed @@ -39,7 +39,7 @@ enum WorldPointsError: Error, LocalizedError { /** Extacting 3D world points. */ -struct WorldPoints { +struct WorldPointsProcessor { private let device: MTLDevice private let commandQueue: MTLCommandQueue private let pipeline: MTLComputePipelineState @@ -50,7 +50,7 @@ struct WorldPoints { init() throws { guard let device = MTLCreateSystemDefaultDevice(), let commandQueue = device.makeCommandQueue() else { - throw WorldPointsError.metalInitializationFailed + throw WorldPointsProcessorError.metalInitializationFailed } self.device = device self.commandQueue = commandQueue @@ -60,7 +60,7 @@ struct WorldPoints { guard let kernelFunction = device.makeDefaultLibrary()?.makeFunction(name: "computeWorldPoints"), let pipeline = try? device.makeComputePipelineState(function: kernelFunction) else { - throw WorldPointsError.metalInitializationFailed + throw WorldPointsProcessorError.metalInitializationFailed } self.pipeline = pipeline } @@ -73,7 +73,7 @@ struct WorldPoints { cameraIntrinsics: simd_float3x3 ) throws -> [PlanePoint] { guard let commandBuffer = self.commandQueue.makeCommandBuffer() else { - throw WorldPointsError.metalPipelineCreationError + throw WorldPointsProcessorError.metalPipelineCreationError } print("PlanePoint Alignment and stride: \(MemoryLayout.alignment), \(MemoryLayout.alignment)") @@ -111,7 +111,7 @@ struct WorldPoints { Initialize point count to zero. */ guard let blit = commandBuffer.makeBlitCommandEncoder() else { - throw WorldPointsError.meshPipelineBlitEncoderError + throw WorldPointsProcessorError.meshPipelineBlitEncoderError } blit.fill(buffer: pointCount, range: 0...stride, value: 0) blit.endEncoding() @@ -120,7 +120,7 @@ struct WorldPoints { Encode compute command. */ guard let commandEncoder = commandBuffer.makeComputeCommandEncoder() else { - throw WorldPointsError.metalPipelineCreationError + throw WorldPointsProcessorError.metalPipelineCreationError } commandEncoder.setComputePipelineState(self.pipeline) From 3178dc8d87ad58d923aa4c5e809ede1eaee8bb7e Mon Sep 17 00:00:00 2001 From: himanshunaidu Date: Sun, 25 Jan 2026 17:29:43 -0800 Subject: [PATCH 07/11] Separate out attribute estimation pipeline and create distinct legacy and new attribute calculation implementations --- IOSAccessAssessment.xcodeproj/project.pbxproj | 36 +- .../AttributeEstimationPipeline.swift | 351 +----------------- .../Extensions/LocationExtension.swift | 230 ++++++++++++ .../Extensions/OtherAttributeExtension.swift | 129 +++++++ .../OtherAttributeExtensionLegacy.swift | 131 +++++++ .../{PlaneFitting.metal => PlaneFit.metal} | 4 +- ...PlaneFit.swift => PlaneFitProcessor.swift} | 13 +- .../Plane/WorldPointsProcessor.swift | 14 +- IOSAccessAssessment/ShaderTypes.h | 8 +- 9 files changed, 544 insertions(+), 372 deletions(-) create mode 100644 IOSAccessAssessment/AttributeEstimation/Extensions/LocationExtension.swift create mode 100644 IOSAccessAssessment/AttributeEstimation/Extensions/OtherAttributeExtension.swift create mode 100644 IOSAccessAssessment/AttributeEstimation/Extensions/OtherAttributeExtensionLegacy.swift rename IOSAccessAssessment/AttributeEstimation/Plane/{PlaneFitting.metal => PlaneFit.metal} (95%) rename IOSAccessAssessment/AttributeEstimation/Plane/{PlaneFit.swift => PlaneFitProcessor.swift} (64%) diff --git a/IOSAccessAssessment.xcodeproj/project.pbxproj b/IOSAccessAssessment.xcodeproj/project.pbxproj index c36b00c..fd1faa3 100644 --- a/IOSAccessAssessment.xcodeproj/project.pbxproj +++ b/IOSAccessAssessment.xcodeproj/project.pbxproj @@ -46,6 +46,9 @@ A32943592EE8204400C4C1BC /* OSWPolygon.swift in Sources */ = {isa = PBXBuildFile; fileRef = A32943582EE8204400C4C1BC /* OSWPolygon.swift */; }; A337CF692EDFC9F20032EE51 /* ChangesetViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = A337CF682EDFC9EF0032EE51 /* ChangesetViewModel.swift */; }; A3420F1C2E8D82E700CD617E /* APIEnvironment.swift in Sources */ = {isa = PBXBuildFile; fileRef = A3420F1B2E8D82E400CD617E /* APIEnvironment.swift */; }; + A3431E022F26FA2C00B96610 /* LocationExtension.swift in Sources */ = {isa = PBXBuildFile; fileRef = A3431E012F26FA2700B96610 /* LocationExtension.swift */; }; + A3431E042F26FA7200B96610 /* OtherAttributeExtensionLegacy.swift in Sources */ = {isa = PBXBuildFile; fileRef = A3431E032F26FA6B00B96610 /* OtherAttributeExtensionLegacy.swift */; }; + A3431E062F26FA9C00B96610 /* OtherAttributeExtension.swift in Sources */ = {isa = PBXBuildFile; fileRef = A3431E052F26FA9C00B96610 /* OtherAttributeExtension.swift */; }; A3471BA02DAF7ABF00FEB845 /* bisenetv2.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = A3471B9F2DAF7ABF00FEB845 /* bisenetv2.mlpackage */; }; A35547152EC198A600F43AFD /* ContourRequestProcessor.swift in Sources */ = {isa = PBXBuildFile; fileRef = A35547142EC198A600F43AFD /* ContourRequestProcessor.swift */; }; A355471E2EC1A47400F43AFD /* SharedAppData.swift in Sources */ = {isa = PBXBuildFile; fileRef = A355471D2EC1A47200F43AFD /* SharedAppData.swift */; }; @@ -68,8 +71,8 @@ A35E051E2EDFB09A003C26CF /* OSMWay.swift in Sources */ = {isa = PBXBuildFile; fileRef = A35E051D2EDFB099003C26CF /* OSMWay.swift */; }; A364B5332F25576000325E5C /* DepthFilter.swift in Sources */ = {isa = PBXBuildFile; fileRef = A364B5322F25575D00325E5C /* DepthFilter.swift */; }; A364B5352F25589B00325E5C /* DepthFiltering.metal in Sources */ = {isa = PBXBuildFile; fileRef = A364B5342F25589600325E5C /* DepthFiltering.metal */; }; - A364B5D92F259AD700325E5C /* PlaneFit.swift in Sources */ = {isa = PBXBuildFile; fileRef = A364B5D82F259AD600325E5C /* PlaneFit.swift */; }; - A364B5DD2F259AFE00325E5C /* PlaneFitting.metal in Sources */ = {isa = PBXBuildFile; fileRef = A364B5DC2F259AF900325E5C /* PlaneFitting.metal */; }; + A364B5D92F259AD700325E5C /* PlaneFitProcessor.swift in Sources */ = {isa = PBXBuildFile; fileRef = A364B5D82F259AD600325E5C /* PlaneFitProcessor.swift */; }; + A364B5DD2F259AFE00325E5C /* PlaneFit.metal in Sources */ = {isa = PBXBuildFile; fileRef = A364B5DC2F259AF900325E5C /* PlaneFit.metal */; }; A364B5DF2F26DB5700325E5C /* WorldPointsProcessor.swift in Sources */ = {isa = PBXBuildFile; fileRef = A364B5DE2F26DB5300325E5C /* WorldPointsProcessor.swift */; }; A36C6E022E134CE600A86004 /* bisenetv2_35_640_640.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = A36C6E012E134CE600A86004 /* bisenetv2_35_640_640.mlpackage */; }; A374FAB72EE0173600055268 /* OSMResponseElement.swift in Sources */ = {isa = PBXBuildFile; fileRef = A374FAB62EE0173200055268 /* OSMResponseElement.swift */; }; @@ -224,6 +227,9 @@ A32943582EE8204400C4C1BC /* OSWPolygon.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OSWPolygon.swift; sourceTree = ""; }; A337CF682EDFC9EF0032EE51 /* ChangesetViewModel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ChangesetViewModel.swift; sourceTree = ""; }; A3420F1B2E8D82E400CD617E /* APIEnvironment.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = APIEnvironment.swift; sourceTree = ""; }; + A3431E012F26FA2700B96610 /* LocationExtension.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LocationExtension.swift; sourceTree = ""; }; + A3431E032F26FA6B00B96610 /* OtherAttributeExtensionLegacy.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OtherAttributeExtensionLegacy.swift; sourceTree = ""; }; + A3431E052F26FA9C00B96610 /* OtherAttributeExtension.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OtherAttributeExtension.swift; sourceTree = ""; }; A3471B9F2DAF7ABF00FEB845 /* bisenetv2.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = bisenetv2.mlpackage; sourceTree = ""; }; A35547142EC198A600F43AFD /* ContourRequestProcessor.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContourRequestProcessor.swift; sourceTree = ""; }; A355471D2EC1A47200F43AFD /* SharedAppData.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SharedAppData.swift; sourceTree = ""; }; @@ -246,8 +252,8 @@ A35E051D2EDFB099003C26CF /* OSMWay.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OSMWay.swift; sourceTree = ""; }; A364B5322F25575D00325E5C /* DepthFilter.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DepthFilter.swift; sourceTree = ""; }; A364B5342F25589600325E5C /* DepthFiltering.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = DepthFiltering.metal; sourceTree = ""; }; - A364B5D82F259AD600325E5C /* PlaneFit.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PlaneFit.swift; sourceTree = ""; }; - A364B5DC2F259AF900325E5C /* PlaneFitting.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PlaneFitting.metal; sourceTree = ""; }; + A364B5D82F259AD600325E5C /* PlaneFitProcessor.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PlaneFitProcessor.swift; sourceTree = ""; }; + A364B5DC2F259AF900325E5C /* PlaneFit.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PlaneFit.metal; sourceTree = ""; }; A364B5DE2F26DB5300325E5C /* WorldPointsProcessor.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = WorldPointsProcessor.swift; sourceTree = ""; }; A36C6E012E134CE600A86004 /* bisenetv2_35_640_640.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = bisenetv2_35_640_640.mlpackage; sourceTree = ""; }; A374FAB62EE0173200055268 /* OSMResponseElement.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OSMResponseElement.swift; sourceTree = ""; }; @@ -587,6 +593,16 @@ path = Others; sourceTree = ""; }; + A3431E002F26FA0C00B96610 /* Extensions */ = { + isa = PBXGroup; + children = ( + A3431E032F26FA6B00B96610 /* OtherAttributeExtensionLegacy.swift */, + A3431E052F26FA9C00B96610 /* OtherAttributeExtension.swift */, + A3431E012F26FA2700B96610 /* LocationExtension.swift */, + ); + path = Extensions; + sourceTree = ""; + }; A34B70CC2DDFE638007B191F /* ARCamera */ = { isa = PBXGroup; children = ( @@ -609,6 +625,7 @@ A35E050B2EDE359C003C26CF /* AttributeEstimation */ = { isa = PBXGroup; children = ( + A3431E002F26FA0C00B96610 /* Extensions */, A364B5D72F259A7800325E5C /* Plane */, A35E050E2EDE35ED003C26CF /* Localization */, A35E05172EDEA470003C26CF /* AttributeEstimationPipeline.swift */, @@ -675,8 +692,8 @@ isa = PBXGroup; children = ( A364B5DE2F26DB5300325E5C /* WorldPointsProcessor.swift */, - A364B5DC2F259AF900325E5C /* PlaneFitting.metal */, - A364B5D82F259AD600325E5C /* PlaneFit.swift */, + A364B5DC2F259AF900325E5C /* PlaneFit.metal */, + A364B5D82F259AD600325E5C /* PlaneFitProcessor.swift */, ); path = Plane; sourceTree = ""; @@ -1182,6 +1199,7 @@ A35547C42EC1AF5700F43AFD /* CaptureData.swift in Sources */, A308015E2EC09BB700B1BA3A /* CocoCustom35ClassConfig.swift in Sources */, A308015F2EC09BB700B1BA3A /* MapillaryCustom11ClassConfig.swift in Sources */, + A3431E022F26FA2C00B96610 /* LocationExtension.swift in Sources */, A30801602EC09BB700B1BA3A /* VOCClassConfig.swift in Sources */, A35E051A2EDFB017003C26CF /* OSMPayload.swift in Sources */, A30801612EC09BB700B1BA3A /* CocoCustom53ClassConfig.swift in Sources */, @@ -1207,6 +1225,7 @@ CA924A932CEB9AB000FCA928 /* ChangesetService.swift in Sources */, A364B5332F25576000325E5C /* DepthFilter.swift in Sources */, A3DA4DBE2EBCB9F9005BB812 /* MetalContext.swift in Sources */, + A3431E062F26FA9C00B96610 /* OtherAttributeExtension.swift in Sources */, A3A739452DD4BA3F0073C7D2 /* CustomXMLParser.swift in Sources */, A35E051C2EDFB094003C26CF /* OSMNode.swift in Sources */, A35547CE2EC3048700F43AFD /* AnnotationImageViewController.swift in Sources */, @@ -1218,7 +1237,7 @@ A35E05182EDEA476003C26CF /* AttributeEstimationPipeline.swift in Sources */, CAA947792CDE700A000C6918 /* AuthService.swift in Sources */, A3C22FD32CF194A600533BF7 /* CGImageUtils.swift in Sources */, - A364B5DD2F259AFE00325E5C /* PlaneFitting.metal in Sources */, + A364B5DD2F259AFE00325E5C /* PlaneFit.metal in Sources */, A3DC22E92DCF0F9A0020CE84 /* ImageProcessing.metal in Sources */, A37E72142ED95C0C00CFE4EF /* MeshHelpers.swift in Sources */, A30C67E62EE27331006E4321 /* EditableAccessibilityFeature.swift in Sources */, @@ -1256,6 +1275,7 @@ A39C9F3D2DD9BE2800455E45 /* APIConstants.swift in Sources */, A38338C22EDA9E6F00F1A402 /* AnnotationFeatureDetailView.swift in Sources */, A38338C62EDAF3E900F1A402 /* DepthMapProcessor.swift in Sources */, + A3431E042F26FA7200B96610 /* OtherAttributeExtensionLegacy.swift in Sources */, A3A413A62ECD862B0039298C /* AccessibilityFeature.swift in Sources */, A3D78D742E65108E003BFE78 /* WorkspaceViewModel.swift in Sources */, A30C67E82EE2733B006E4321 /* MappedAccessibilityFeature.swift in Sources */, @@ -1284,7 +1304,7 @@ A3FFAA832DE5253E002B99BD /* bisenetv2_53_640_640.mlpackage in Sources */, A3FFAA7A2DE01A0F002B99BD /* ARCameraView.swift in Sources */, A37E3E9E2EFBAA8700B07B77 /* AccessibilityFeatureSnapshot.swift in Sources */, - A364B5D92F259AD700325E5C /* PlaneFit.swift in Sources */, + A364B5D92F259AD700325E5C /* PlaneFitProcessor.swift in Sources */, A3FFAA782DE01637002B99BD /* ARCameraUtils.swift in Sources */, A3FE166E2E1C2AF200DAE5BE /* SegmentationEncoder.swift in Sources */, A30BED3A2ED162F1004A5B51 /* ConnectedComponents.swift in Sources */, diff --git a/IOSAccessAssessment/AttributeEstimation/AttributeEstimationPipeline.swift b/IOSAccessAssessment/AttributeEstimation/AttributeEstimationPipeline.swift index eefd171..1bc0e8e 100644 --- a/IOSAccessAssessment/AttributeEstimation/AttributeEstimationPipeline.swift +++ b/IOSAccessAssessment/AttributeEstimation/AttributeEstimationPipeline.swift @@ -50,6 +50,7 @@ class AttributeEstimationPipeline: ObservableObject { var depthMapProcessor: DepthMapProcessor? var localizationProcessor: LocalizationProcessor? + var planeFitProcesor: PlaneFitProcessor? var captureImageData: (any CaptureImageDataProtocol)? var captureMeshData: (any CaptureMeshDataProtocol)? @@ -64,6 +65,7 @@ class AttributeEstimationPipeline: ObservableObject { } self.depthMapProcessor = try DepthMapProcessor(depthImage: depthImage) self.localizationProcessor = LocalizationProcessor() + self.planeFitProcesor = try PlaneFitProcessor() self.captureImageData = captureImageData self.captureMeshData = captureMeshData } @@ -142,13 +144,13 @@ class AttributeEstimationPipeline: ObservableObject { let crossSlopeAttributeValue = try self.calculateCrossSlope(accessibilityFeature: accessibilityFeature) try accessibilityFeature.setAttributeValue(crossSlopeAttributeValue, for: .crossSlope, isCalculated: true) case .widthLegacy: - let widthAttributeValue = try self.calculateWidth(accessibilityFeature: accessibilityFeature) + let widthAttributeValue = try self.calculateWidthLegacy(accessibilityFeature: accessibilityFeature) try accessibilityFeature.setAttributeValue(widthAttributeValue, for: .widthLegacy, isCalculated: true) case .runningSlopeLegacy: - let runningSlopeAttributeValue = try self.calculateRunningSlope(accessibilityFeature: accessibilityFeature) + let runningSlopeAttributeValue = try self.calculateRunningSlopeLegacy(accessibilityFeature: accessibilityFeature) try accessibilityFeature.setAttributeValue(runningSlopeAttributeValue, for: .runningSlopeLegacy, isCalculated: true) case .crossSlopeLegacy: - let crossSlopeAttributeValue = try self.calculateCrossSlope(accessibilityFeature: accessibilityFeature) + let crossSlopeAttributeValue = try self.calculateCrossSlopeLegacy(accessibilityFeature: accessibilityFeature) try accessibilityFeature.setAttributeValue(crossSlopeAttributeValue, for: .crossSlopeLegacy, isCalculated: true) default: continue @@ -163,346 +165,3 @@ class AttributeEstimationPipeline: ObservableObject { } } } - -/** - Extension for additional location processing methods. - */ -extension AttributeEstimationPipeline { - private func calculateLocationForPoint( - deviceLocation: CLLocationCoordinate2D, - accessibilityFeature: EditableAccessibilityFeature - ) throws -> LocationRequestResult { - guard let depthMapProcessor = self.depthMapProcessor else { - throw AttributeEstimationPipelineError.configurationError(Constants.Texts.depthMapProcessorKey) - } - guard let localizationProcessor = self.localizationProcessor else { - throw AttributeEstimationPipelineError.configurationError(Constants.Texts.localizationProcessorKey) - } - guard let captureImageData = self.captureImageData else { - throw AttributeEstimationPipelineError.missingCaptureData - } - let captureImageDataConcrete = CaptureImageData(captureImageData) - return try getLocationFromCentroid( - depthMapProcessor: depthMapProcessor, - localizationProcessor: localizationProcessor, - captureImageData: captureImageDataConcrete, - deviceLocation: deviceLocation, - accessibilityFeature: accessibilityFeature - ) - } - - private func calculateLocationForLineString( - deviceLocation: CLLocationCoordinate2D, - accessibilityFeature: EditableAccessibilityFeature - ) throws -> LocationRequestResult { - guard let depthMapProcessor = self.depthMapProcessor else { - throw AttributeEstimationPipelineError.configurationError(Constants.Texts.depthMapProcessorKey) - } - guard let localizationProcessor = self.localizationProcessor else { - throw AttributeEstimationPipelineError.configurationError(Constants.Texts.localizationProcessorKey) - } - guard let captureImageData = self.captureImageData else { - throw AttributeEstimationPipelineError.missingCaptureData - } - let captureImageDataConcrete = CaptureImageData(captureImageData) - do { - return try getLocationFromTrapezoid( - depthMapProcessor: depthMapProcessor, - localizationProcessor: localizationProcessor, - captureImageData: captureImageDataConcrete, - deviceLocation: deviceLocation, - accessibilityFeature: accessibilityFeature - ) - } catch { - return try getLocationFromCentroid( - depthMapProcessor: depthMapProcessor, - localizationProcessor: localizationProcessor, - captureImageData: captureImageDataConcrete, - deviceLocation: deviceLocation, - accessibilityFeature: accessibilityFeature - ) - } - } - - private func calculateLocationForPolygon( - deviceLocation: CLLocationCoordinate2D, - accessibilityFeature: EditableAccessibilityFeature - ) throws -> LocationRequestResult { - guard let depthMapProcessor = self.depthMapProcessor else { - throw AttributeEstimationPipelineError.configurationError(Constants.Texts.depthMapProcessorKey) - } - guard let localizationProcessor = self.localizationProcessor else { - throw AttributeEstimationPipelineError.configurationError(Constants.Texts.localizationProcessorKey) - } - guard let captureImageData = self.captureImageData else { - throw AttributeEstimationPipelineError.missingCaptureData - } - let captureImageDataConcrete = CaptureImageData(captureImageData) - do { - return try getLocationFromPolygon( - depthMapProcessor: depthMapProcessor, - localizationProcessor: localizationProcessor, - captureImageData: captureImageDataConcrete, - deviceLocation: deviceLocation, - accessibilityFeature: accessibilityFeature - ) - } catch { - return try getLocationFromCentroid( - depthMapProcessor: depthMapProcessor, - localizationProcessor: localizationProcessor, - captureImageData: captureImageDataConcrete, - deviceLocation: deviceLocation, - accessibilityFeature: accessibilityFeature - ) - } - } - - private func getLocationFromCentroid( - depthMapProcessor: DepthMapProcessor, - localizationProcessor: LocalizationProcessor, - captureImageData: CaptureImageData, - deviceLocation: CLLocationCoordinate2D, - accessibilityFeature: EditableAccessibilityFeature - ) throws -> LocationRequestResult { - let featureDepthValue = try depthMapProcessor.getFeatureDepthAtCentroidInRadius( - detectedFeature: accessibilityFeature, radius: 3 - ) - let featureCentroid = accessibilityFeature.contourDetails.centroid - let locationDelta = localizationProcessor.calculateDelta( - point: featureCentroid, depth: featureDepthValue, - imageSize: captureImageData.originalSize, - cameraTransform: captureImageData.cameraTransform, - cameraIntrinsics: captureImageData.cameraIntrinsics - ) - let locationCoordinate = localizationProcessor.calculateLocation( - point: featureCentroid, depth: featureDepthValue, - imageSize: captureImageData.originalSize, - cameraTransform: captureImageData.cameraTransform, - cameraIntrinsics: captureImageData.cameraIntrinsics, - deviceLocation: deviceLocation - ) - let coordinates: [[CLLocationCoordinate2D]] = [[locationCoordinate]] - return LocationRequestResult( - coordinates: coordinates, locationDelta: locationDelta, lidarDepth: featureDepthValue - ) - } - - private func getLocationFromTrapezoid( - depthMapProcessor: DepthMapProcessor, - localizationProcessor: LocalizationProcessor, - captureImageData: CaptureImageData, - deviceLocation: CLLocationCoordinate2D, - accessibilityFeature: EditableAccessibilityFeature - ) throws -> LocationRequestResult { - let trapezoidBoundPoints = accessibilityFeature.contourDetails.normalizedPoints - guard trapezoidBoundPoints.count == 4 else { - throw AttributeEstimationPipelineError.invalidAttributeData - } - let bottomCenter = simd_float2( - x: (trapezoidBoundPoints[0].x + trapezoidBoundPoints[3].x) / 2, - y: (trapezoidBoundPoints[0].y + trapezoidBoundPoints[3].y) / 2 - ) - let topCenter = simd_float2( - x: (trapezoidBoundPoints[1].x + trapezoidBoundPoints[2].x) / 2, - y: (trapezoidBoundPoints[1].y + trapezoidBoundPoints[2].y) / 2 - ) - let points = [topCenter, bottomCenter] /// Flipped because of image coordinate system - let pointDepthValues = try depthMapProcessor.getFeatureDepthsAtNormalizedPoints(points) - let pointsWithDepth: [PointWithDepth] = zip(points, pointDepthValues).map { - return PointWithDepth(point: CGPoint(x: CGFloat($0.0.x), y: CGFloat($0.0.y)), depth: $0.1) - } - /// For debugging - let locationDeltas: [SIMD2] = pointsWithDepth.map { pointWithDepth in - return localizationProcessor.calculateDelta( - point: pointWithDepth.point, depth: pointWithDepth.depth, - imageSize: captureImageData.originalSize, - cameraTransform: captureImageData.cameraTransform, - cameraIntrinsics: captureImageData.cameraIntrinsics - ) - } - let locationCoordinates: [CLLocationCoordinate2D] = pointsWithDepth.map { pointWithDepth in - return localizationProcessor.calculateLocation( - point: pointWithDepth.point, depth: pointWithDepth.depth, - imageSize: captureImageData.originalSize, - cameraTransform: captureImageData.cameraTransform, - cameraIntrinsics: captureImageData.cameraIntrinsics, - deviceLocation: deviceLocation - ) - } - let coordinates: [[CLLocationCoordinate2D]] = [locationCoordinates] - let locationDelta = locationDeltas.reduce(SIMD2(0, 0), +) / Float(locationDeltas.count) - let lidarDepth = pointDepthValues.reduce(0, +) / Float(pointDepthValues.count) - return LocationRequestResult( - coordinates: coordinates, locationDelta: locationDelta, lidarDepth: lidarDepth - ) - } - - private func getLocationFromPolygon( - depthMapProcessor: DepthMapProcessor, - localizationProcessor: LocalizationProcessor, - captureImageData: CaptureImageData, - deviceLocation: CLLocationCoordinate2D, - accessibilityFeature: EditableAccessibilityFeature - ) throws -> LocationRequestResult { - let polygonPoints = accessibilityFeature.contourDetails.normalizedPoints - let leftMostPoint = polygonPoints.min { $0.x < $1.x } - let rightMostPoint = polygonPoints.max { $0.x < $1.x } - guard let leftMostPoint, let rightMostPoint else { - throw AttributeEstimationPipelineError.invalidAttributeData - } - let centerPoint = simd_float2( - x: (leftMostPoint.x + rightMostPoint.x) / 2, - y: (leftMostPoint.y + rightMostPoint.y) / 2 - ) - let points = [leftMostPoint, centerPoint, rightMostPoint] /// Closing the polygon - let pointDepthValues = try depthMapProcessor.getFeatureDepthsAtNormalizedPoints(points) - let pointsWithDepth: [PointWithDepth] = zip(points, pointDepthValues).map { - return PointWithDepth(point: CGPoint(x: CGFloat($0.0.x), y: CGFloat($0.0.y)), depth: $0.1) - } - /// For debugging - let locationDeltas: [SIMD2] = pointsWithDepth.map { pointWithDepth in - return localizationProcessor.calculateDelta( - point: pointWithDepth.point, depth: pointWithDepth.depth, - imageSize: captureImageData.originalSize, - cameraTransform: captureImageData.cameraTransform, - cameraIntrinsics: captureImageData.cameraIntrinsics - ) - } - let locationCoordinates: [CLLocationCoordinate2D] = pointsWithDepth.map { pointWithDepth in - return localizationProcessor.calculateLocation( - point: pointWithDepth.point, depth: pointWithDepth.depth, - imageSize: captureImageData.originalSize, - cameraTransform: captureImageData.cameraTransform, - cameraIntrinsics: captureImageData.cameraIntrinsics, - deviceLocation: deviceLocation - ) - } - let coordinates: [[CLLocationCoordinate2D]] = [locationCoordinates] - let locationDelta = locationDeltas.reduce(SIMD2(0, 0), +) / Float(locationDeltas.count) - let lidarDepth = pointDepthValues.reduce(0, +) / Float(pointDepthValues.count) - return LocationRequestResult( - coordinates: coordinates, locationDelta: locationDelta, lidarDepth: lidarDepth - ) - } -} - -/** - Extension for attribute calculation with rudimentary methods. - TODO: Improve upon these methods with more robust implementations. - */ -extension AttributeEstimationPipeline { - private func calculateWidth( - accessibilityFeature: EditableAccessibilityFeature - ) throws -> AccessibilityFeatureAttribute.Value { - guard let depthMapProcessor = self.depthMapProcessor else { - throw AttributeEstimationPipelineError.configurationError(Constants.Texts.depthMapProcessorKey) - } - guard let localizationProcessor = self.localizationProcessor else { - throw AttributeEstimationPipelineError.configurationError(Constants.Texts.localizationProcessorKey) - } - guard let captureImageData = self.captureImageData else { - throw AttributeEstimationPipelineError.missingCaptureData - } - let trapezoidBoundPoints = accessibilityFeature.contourDetails.normalizedPoints - guard trapezoidBoundPoints.count == 4 else { - throw AttributeEstimationPipelineError.invalidAttributeData - } - let trapezoidBoundDepthValues = try depthMapProcessor.getFeatureDepthsAtBounds( - detectedFeature: accessibilityFeature - ) - let trapezoidBoundPointsWithDepth: [PointWithDepth] = zip(trapezoidBoundPoints, trapezoidBoundDepthValues).map { - PointWithDepth( - point: CGPoint(x: CGFloat($0.0.x), y: CGFloat($0.0.y)), - depth: $0.1 - ) - } - let widthValue = try localizationProcessor.calculateWidth( - trapezoidBoundsWithDepth: trapezoidBoundPointsWithDepth, - imageSize: captureImageData.originalSize, - cameraTransform: captureImageData.cameraTransform, - cameraIntrinsics: captureImageData.cameraIntrinsics - ) - guard let widthAttributeValue = AccessibilityFeatureAttribute.width.valueFromDouble(Double(widthValue)) else { - throw AttributeEstimationPipelineError.attributeAssignmentError - } - return widthAttributeValue - } - - private func calculateRunningSlope( - accessibilityFeature: EditableAccessibilityFeature - ) throws -> AccessibilityFeatureAttribute.Value { - guard let depthMapProcessor = self.depthMapProcessor else { - throw AttributeEstimationPipelineError.configurationError(Constants.Texts.depthMapProcessorKey) - } - guard let localizationProcessor = self.localizationProcessor else { - throw AttributeEstimationPipelineError.configurationError(Constants.Texts.localizationProcessorKey) - } - guard let captureImageData = self.captureImageData else { - throw AttributeEstimationPipelineError.missingCaptureData - } - let trapezoidBoundPoints = accessibilityFeature.contourDetails.normalizedPoints - guard trapezoidBoundPoints.count == 4 else { - throw AttributeEstimationPipelineError.invalidAttributeData - } - let trapezoidBoundDepthValues = try depthMapProcessor.getFeatureDepthsAtBounds( - detectedFeature: accessibilityFeature - ) - let trapezoidBoundPointsWithDepth: [PointWithDepth] = zip(trapezoidBoundPoints, trapezoidBoundDepthValues).map { - PointWithDepth( - point: CGPoint(x: CGFloat($0.0.x), y: CGFloat($0.0.y)), - depth: $0.1 - ) - } - let runningSlopeValue: Float = try localizationProcessor.calculateRunningSlope( - trapezoidBoundsWithDepth: trapezoidBoundPointsWithDepth, - imageSize: captureImageData.originalSize, - cameraTransform: captureImageData.cameraTransform, - cameraIntrinsics: captureImageData.cameraIntrinsics - ) - guard let runningSlopeAttributeValue = AccessibilityFeatureAttribute.runningSlope.valueFromDouble( - Double(runningSlopeValue) - ) else { - throw AttributeEstimationPipelineError.attributeAssignmentError - } - return runningSlopeAttributeValue - } - - private func calculateCrossSlope( - accessibilityFeature: EditableAccessibilityFeature - ) throws -> AccessibilityFeatureAttribute.Value { - guard let depthMapProcessor = self.depthMapProcessor else { - throw AttributeEstimationPipelineError.configurationError(Constants.Texts.depthMapProcessorKey) - } - guard let localizationProcessor = self.localizationProcessor else { - throw AttributeEstimationPipelineError.configurationError(Constants.Texts.localizationProcessorKey) - } - guard let captureImageData = self.captureImageData else { - throw AttributeEstimationPipelineError.missingCaptureData - } - let trapezoidBoundPoints = accessibilityFeature.contourDetails.normalizedPoints - guard trapezoidBoundPoints.count == 4 else { - throw AttributeEstimationPipelineError.invalidAttributeData - } - let trapezoidBoundDepthValues = try depthMapProcessor.getFeatureDepthsAtBounds( - detectedFeature: accessibilityFeature - ) - let trapezoidBoundPointsWithDepth: [PointWithDepth] = zip(trapezoidBoundPoints, trapezoidBoundDepthValues).map { - PointWithDepth( - point: CGPoint(x: CGFloat($0.0.x), y: CGFloat($0.0.y)), - depth: $0.1 - ) - } - let crossSlopeValue: Float = try localizationProcessor.calculateCrossSlope( - trapezoidBoundsWithDepth: trapezoidBoundPointsWithDepth, - imageSize: captureImageData.originalSize, - cameraTransform: captureImageData.cameraTransform, - cameraIntrinsics: captureImageData.cameraIntrinsics - ) - guard let crossSlopeAttributeValue = AccessibilityFeatureAttribute.crossSlope.valueFromDouble( - Double(crossSlopeValue) - ) else { - throw AttributeEstimationPipelineError.attributeAssignmentError - } - return crossSlopeAttributeValue - } -} diff --git a/IOSAccessAssessment/AttributeEstimation/Extensions/LocationExtension.swift b/IOSAccessAssessment/AttributeEstimation/Extensions/LocationExtension.swift new file mode 100644 index 0000000..97f154d --- /dev/null +++ b/IOSAccessAssessment/AttributeEstimation/Extensions/LocationExtension.swift @@ -0,0 +1,230 @@ +// +// LocationExtension.swift +// IOSAccessAssessment +// +// Created by Himanshu on 1/25/26. +// +import SwiftUI +import CoreLocation + +/** + Extension for additional location processing methods. + */ +extension AttributeEstimationPipeline { + func calculateLocationForPoint( + deviceLocation: CLLocationCoordinate2D, + accessibilityFeature: EditableAccessibilityFeature + ) throws -> LocationRequestResult { + guard let depthMapProcessor = self.depthMapProcessor else { + throw AttributeEstimationPipelineError.configurationError(Constants.Texts.depthMapProcessorKey) + } + guard let localizationProcessor = self.localizationProcessor else { + throw AttributeEstimationPipelineError.configurationError(Constants.Texts.localizationProcessorKey) + } + guard let captureImageData = self.captureImageData else { + throw AttributeEstimationPipelineError.missingCaptureData + } + let captureImageDataConcrete = CaptureImageData(captureImageData) + return try getLocationFromCentroid( + depthMapProcessor: depthMapProcessor, + localizationProcessor: localizationProcessor, + captureImageData: captureImageDataConcrete, + deviceLocation: deviceLocation, + accessibilityFeature: accessibilityFeature + ) + } + + func calculateLocationForLineString( + deviceLocation: CLLocationCoordinate2D, + accessibilityFeature: EditableAccessibilityFeature + ) throws -> LocationRequestResult { + guard let depthMapProcessor = self.depthMapProcessor else { + throw AttributeEstimationPipelineError.configurationError(Constants.Texts.depthMapProcessorKey) + } + guard let localizationProcessor = self.localizationProcessor else { + throw AttributeEstimationPipelineError.configurationError(Constants.Texts.localizationProcessorKey) + } + guard let captureImageData = self.captureImageData else { + throw AttributeEstimationPipelineError.missingCaptureData + } + let captureImageDataConcrete = CaptureImageData(captureImageData) + do { + return try getLocationFromTrapezoid( + depthMapProcessor: depthMapProcessor, + localizationProcessor: localizationProcessor, + captureImageData: captureImageDataConcrete, + deviceLocation: deviceLocation, + accessibilityFeature: accessibilityFeature + ) + } catch { + return try getLocationFromCentroid( + depthMapProcessor: depthMapProcessor, + localizationProcessor: localizationProcessor, + captureImageData: captureImageDataConcrete, + deviceLocation: deviceLocation, + accessibilityFeature: accessibilityFeature + ) + } + } + + func calculateLocationForPolygon( + deviceLocation: CLLocationCoordinate2D, + accessibilityFeature: EditableAccessibilityFeature + ) throws -> LocationRequestResult { + guard let depthMapProcessor = self.depthMapProcessor else { + throw AttributeEstimationPipelineError.configurationError(Constants.Texts.depthMapProcessorKey) + } + guard let localizationProcessor = self.localizationProcessor else { + throw AttributeEstimationPipelineError.configurationError(Constants.Texts.localizationProcessorKey) + } + guard let captureImageData = self.captureImageData else { + throw AttributeEstimationPipelineError.missingCaptureData + } + let captureImageDataConcrete = CaptureImageData(captureImageData) + do { + return try getLocationFromPolygon( + depthMapProcessor: depthMapProcessor, + localizationProcessor: localizationProcessor, + captureImageData: captureImageDataConcrete, + deviceLocation: deviceLocation, + accessibilityFeature: accessibilityFeature + ) + } catch { + return try getLocationFromCentroid( + depthMapProcessor: depthMapProcessor, + localizationProcessor: localizationProcessor, + captureImageData: captureImageDataConcrete, + deviceLocation: deviceLocation, + accessibilityFeature: accessibilityFeature + ) + } + } + + func getLocationFromCentroid( + depthMapProcessor: DepthMapProcessor, + localizationProcessor: LocalizationProcessor, + captureImageData: CaptureImageData, + deviceLocation: CLLocationCoordinate2D, + accessibilityFeature: EditableAccessibilityFeature + ) throws -> LocationRequestResult { + let featureDepthValue = try depthMapProcessor.getFeatureDepthAtCentroidInRadius( + detectedFeature: accessibilityFeature, radius: 3 + ) + let featureCentroid = accessibilityFeature.contourDetails.centroid + let locationDelta = localizationProcessor.calculateDelta( + point: featureCentroid, depth: featureDepthValue, + imageSize: captureImageData.originalSize, + cameraTransform: captureImageData.cameraTransform, + cameraIntrinsics: captureImageData.cameraIntrinsics + ) + let locationCoordinate = localizationProcessor.calculateLocation( + point: featureCentroid, depth: featureDepthValue, + imageSize: captureImageData.originalSize, + cameraTransform: captureImageData.cameraTransform, + cameraIntrinsics: captureImageData.cameraIntrinsics, + deviceLocation: deviceLocation + ) + let coordinates: [[CLLocationCoordinate2D]] = [[locationCoordinate]] + return LocationRequestResult( + coordinates: coordinates, locationDelta: locationDelta, lidarDepth: featureDepthValue + ) + } + + func getLocationFromTrapezoid( + depthMapProcessor: DepthMapProcessor, + localizationProcessor: LocalizationProcessor, + captureImageData: CaptureImageData, + deviceLocation: CLLocationCoordinate2D, + accessibilityFeature: EditableAccessibilityFeature + ) throws -> LocationRequestResult { + let trapezoidBoundPoints = accessibilityFeature.contourDetails.normalizedPoints + guard trapezoidBoundPoints.count == 4 else { + throw AttributeEstimationPipelineError.invalidAttributeData + } + let bottomCenter = simd_float2( + x: (trapezoidBoundPoints[0].x + trapezoidBoundPoints[3].x) / 2, + y: (trapezoidBoundPoints[0].y + trapezoidBoundPoints[3].y) / 2 + ) + let topCenter = simd_float2( + x: (trapezoidBoundPoints[1].x + trapezoidBoundPoints[2].x) / 2, + y: (trapezoidBoundPoints[1].y + trapezoidBoundPoints[2].y) / 2 + ) + let points = [topCenter, bottomCenter] /// Flipped because of image coordinate system + let pointDepthValues = try depthMapProcessor.getFeatureDepthsAtNormalizedPoints(points) + let pointsWithDepth: [PointWithDepth] = zip(points, pointDepthValues).map { + return PointWithDepth(point: CGPoint(x: CGFloat($0.0.x), y: CGFloat($0.0.y)), depth: $0.1) + } + /// For debugging + let locationDeltas: [SIMD2] = pointsWithDepth.map { pointWithDepth in + return localizationProcessor.calculateDelta( + point: pointWithDepth.point, depth: pointWithDepth.depth, + imageSize: captureImageData.originalSize, + cameraTransform: captureImageData.cameraTransform, + cameraIntrinsics: captureImageData.cameraIntrinsics + ) + } + let locationCoordinates: [CLLocationCoordinate2D] = pointsWithDepth.map { pointWithDepth in + return localizationProcessor.calculateLocation( + point: pointWithDepth.point, depth: pointWithDepth.depth, + imageSize: captureImageData.originalSize, + cameraTransform: captureImageData.cameraTransform, + cameraIntrinsics: captureImageData.cameraIntrinsics, + deviceLocation: deviceLocation + ) + } + let coordinates: [[CLLocationCoordinate2D]] = [locationCoordinates] + let locationDelta = locationDeltas.reduce(SIMD2(0, 0), +) / Float(locationDeltas.count) + let lidarDepth = pointDepthValues.reduce(0, +) / Float(pointDepthValues.count) + return LocationRequestResult( + coordinates: coordinates, locationDelta: locationDelta, lidarDepth: lidarDepth + ) + } + + func getLocationFromPolygon( + depthMapProcessor: DepthMapProcessor, + localizationProcessor: LocalizationProcessor, + captureImageData: CaptureImageData, + deviceLocation: CLLocationCoordinate2D, + accessibilityFeature: EditableAccessibilityFeature + ) throws -> LocationRequestResult { + let polygonPoints = accessibilityFeature.contourDetails.normalizedPoints + let leftMostPoint = polygonPoints.min { $0.x < $1.x } + let rightMostPoint = polygonPoints.max { $0.x < $1.x } + guard let leftMostPoint, let rightMostPoint else { + throw AttributeEstimationPipelineError.invalidAttributeData + } + let centerPoint = simd_float2( + x: (leftMostPoint.x + rightMostPoint.x) / 2, + y: (leftMostPoint.y + rightMostPoint.y) / 2 + ) + let points = [leftMostPoint, centerPoint, rightMostPoint] /// Closing the polygon + let pointDepthValues = try depthMapProcessor.getFeatureDepthsAtNormalizedPoints(points) + let pointsWithDepth: [PointWithDepth] = zip(points, pointDepthValues).map { + return PointWithDepth(point: CGPoint(x: CGFloat($0.0.x), y: CGFloat($0.0.y)), depth: $0.1) + } + /// For debugging + let locationDeltas: [SIMD2] = pointsWithDepth.map { pointWithDepth in + return localizationProcessor.calculateDelta( + point: pointWithDepth.point, depth: pointWithDepth.depth, + imageSize: captureImageData.originalSize, + cameraTransform: captureImageData.cameraTransform, + cameraIntrinsics: captureImageData.cameraIntrinsics + ) + } + let locationCoordinates: [CLLocationCoordinate2D] = pointsWithDepth.map { pointWithDepth in + return localizationProcessor.calculateLocation( + point: pointWithDepth.point, depth: pointWithDepth.depth, + imageSize: captureImageData.originalSize, + cameraTransform: captureImageData.cameraTransform, + cameraIntrinsics: captureImageData.cameraIntrinsics, + deviceLocation: deviceLocation + ) + } + let coordinates: [[CLLocationCoordinate2D]] = [locationCoordinates] + let locationDelta = locationDeltas.reduce(SIMD2(0, 0), +) / Float(locationDeltas.count) + let lidarDepth = pointDepthValues.reduce(0, +) / Float(pointDepthValues.count) + return LocationRequestResult( + coordinates: coordinates, locationDelta: locationDelta, lidarDepth: lidarDepth + ) + } +} diff --git a/IOSAccessAssessment/AttributeEstimation/Extensions/OtherAttributeExtension.swift b/IOSAccessAssessment/AttributeEstimation/Extensions/OtherAttributeExtension.swift new file mode 100644 index 0000000..c75aa02 --- /dev/null +++ b/IOSAccessAssessment/AttributeEstimation/Extensions/OtherAttributeExtension.swift @@ -0,0 +1,129 @@ +// +// OtherAttributeExtensionLegacy.swift +// IOSAccessAssessment +// +// Created by Himanshu on 1/25/26. +// +import SwiftUI +import CoreLocation + +/** + Extension for attribute calculation with rudimentary methods. + TODO: Improve upon these methods with more robust implementations. + */ +extension AttributeEstimationPipeline { + func calculateWidth( + accessibilityFeature: EditableAccessibilityFeature + ) throws -> AccessibilityFeatureAttribute.Value { + guard let depthMapProcessor = self.depthMapProcessor else { + throw AttributeEstimationPipelineError.configurationError(Constants.Texts.depthMapProcessorKey) + } + guard let localizationProcessor = self.localizationProcessor else { + throw AttributeEstimationPipelineError.configurationError(Constants.Texts.localizationProcessorKey) + } + guard let captureImageData = self.captureImageData else { + throw AttributeEstimationPipelineError.missingCaptureData + } + let trapezoidBoundPoints = accessibilityFeature.contourDetails.normalizedPoints + guard trapezoidBoundPoints.count == 4 else { + throw AttributeEstimationPipelineError.invalidAttributeData + } + let trapezoidBoundDepthValues = try depthMapProcessor.getFeatureDepthsAtBounds( + detectedFeature: accessibilityFeature + ) + let trapezoidBoundPointsWithDepth: [PointWithDepth] = zip(trapezoidBoundPoints, trapezoidBoundDepthValues).map { + PointWithDepth( + point: CGPoint(x: CGFloat($0.0.x), y: CGFloat($0.0.y)), + depth: $0.1 + ) + } + let widthValue = try localizationProcessor.calculateWidth( + trapezoidBoundsWithDepth: trapezoidBoundPointsWithDepth, + imageSize: captureImageData.originalSize, + cameraTransform: captureImageData.cameraTransform, + cameraIntrinsics: captureImageData.cameraIntrinsics + ) + guard let widthAttributeValue = AccessibilityFeatureAttribute.width.valueFromDouble(Double(widthValue)) else { + throw AttributeEstimationPipelineError.attributeAssignmentError + } + return widthAttributeValue + } + + func calculateRunningSlope( + accessibilityFeature: EditableAccessibilityFeature + ) throws -> AccessibilityFeatureAttribute.Value { + guard let depthMapProcessor = self.depthMapProcessor else { + throw AttributeEstimationPipelineError.configurationError(Constants.Texts.depthMapProcessorKey) + } + guard let localizationProcessor = self.localizationProcessor else { + throw AttributeEstimationPipelineError.configurationError(Constants.Texts.localizationProcessorKey) + } + guard let captureImageData = self.captureImageData else { + throw AttributeEstimationPipelineError.missingCaptureData + } + let trapezoidBoundPoints = accessibilityFeature.contourDetails.normalizedPoints + guard trapezoidBoundPoints.count == 4 else { + throw AttributeEstimationPipelineError.invalidAttributeData + } + let trapezoidBoundDepthValues = try depthMapProcessor.getFeatureDepthsAtBounds( + detectedFeature: accessibilityFeature + ) + let trapezoidBoundPointsWithDepth: [PointWithDepth] = zip(trapezoidBoundPoints, trapezoidBoundDepthValues).map { + PointWithDepth( + point: CGPoint(x: CGFloat($0.0.x), y: CGFloat($0.0.y)), + depth: $0.1 + ) + } + let runningSlopeValue: Float = try localizationProcessor.calculateRunningSlope( + trapezoidBoundsWithDepth: trapezoidBoundPointsWithDepth, + imageSize: captureImageData.originalSize, + cameraTransform: captureImageData.cameraTransform, + cameraIntrinsics: captureImageData.cameraIntrinsics + ) + guard let runningSlopeAttributeValue = AccessibilityFeatureAttribute.runningSlope.valueFromDouble( + Double(runningSlopeValue) + ) else { + throw AttributeEstimationPipelineError.attributeAssignmentError + } + return runningSlopeAttributeValue + } + + func calculateCrossSlope( + accessibilityFeature: EditableAccessibilityFeature + ) throws -> AccessibilityFeatureAttribute.Value { + guard let depthMapProcessor = self.depthMapProcessor else { + throw AttributeEstimationPipelineError.configurationError(Constants.Texts.depthMapProcessorKey) + } + guard let localizationProcessor = self.localizationProcessor else { + throw AttributeEstimationPipelineError.configurationError(Constants.Texts.localizationProcessorKey) + } + guard let captureImageData = self.captureImageData else { + throw AttributeEstimationPipelineError.missingCaptureData + } + let trapezoidBoundPoints = accessibilityFeature.contourDetails.normalizedPoints + guard trapezoidBoundPoints.count == 4 else { + throw AttributeEstimationPipelineError.invalidAttributeData + } + let trapezoidBoundDepthValues = try depthMapProcessor.getFeatureDepthsAtBounds( + detectedFeature: accessibilityFeature + ) + let trapezoidBoundPointsWithDepth: [PointWithDepth] = zip(trapezoidBoundPoints, trapezoidBoundDepthValues).map { + PointWithDepth( + point: CGPoint(x: CGFloat($0.0.x), y: CGFloat($0.0.y)), + depth: $0.1 + ) + } + let crossSlopeValue: Float = try localizationProcessor.calculateCrossSlope( + trapezoidBoundsWithDepth: trapezoidBoundPointsWithDepth, + imageSize: captureImageData.originalSize, + cameraTransform: captureImageData.cameraTransform, + cameraIntrinsics: captureImageData.cameraIntrinsics + ) + guard let crossSlopeAttributeValue = AccessibilityFeatureAttribute.crossSlope.valueFromDouble( + Double(crossSlopeValue) + ) else { + throw AttributeEstimationPipelineError.attributeAssignmentError + } + return crossSlopeAttributeValue + } +} diff --git a/IOSAccessAssessment/AttributeEstimation/Extensions/OtherAttributeExtensionLegacy.swift b/IOSAccessAssessment/AttributeEstimation/Extensions/OtherAttributeExtensionLegacy.swift new file mode 100644 index 0000000..eefead4 --- /dev/null +++ b/IOSAccessAssessment/AttributeEstimation/Extensions/OtherAttributeExtensionLegacy.swift @@ -0,0 +1,131 @@ +// +// OtherAttributeExtensionLegacy.swift +// IOSAccessAssessment +// +// Created by Himanshu on 1/25/26. +// +import SwiftUI +import CoreLocation + +/** + Extension for attribute calculation with rudimentary methods. + TODO: Improve upon these methods with more robust implementations. + + NOTE: Legacy Implementation. Needs to be improved. + */ +extension AttributeEstimationPipeline { + func calculateWidthLegacy( + accessibilityFeature: EditableAccessibilityFeature + ) throws -> AccessibilityFeatureAttribute.Value { + guard let depthMapProcessor = self.depthMapProcessor else { + throw AttributeEstimationPipelineError.configurationError(Constants.Texts.depthMapProcessorKey) + } + guard let localizationProcessor = self.localizationProcessor else { + throw AttributeEstimationPipelineError.configurationError(Constants.Texts.localizationProcessorKey) + } + guard let captureImageData = self.captureImageData else { + throw AttributeEstimationPipelineError.missingCaptureData + } + let trapezoidBoundPoints = accessibilityFeature.contourDetails.normalizedPoints + guard trapezoidBoundPoints.count == 4 else { + throw AttributeEstimationPipelineError.invalidAttributeData + } + let trapezoidBoundDepthValues = try depthMapProcessor.getFeatureDepthsAtBounds( + detectedFeature: accessibilityFeature + ) + let trapezoidBoundPointsWithDepth: [PointWithDepth] = zip(trapezoidBoundPoints, trapezoidBoundDepthValues).map { + PointWithDepth( + point: CGPoint(x: CGFloat($0.0.x), y: CGFloat($0.0.y)), + depth: $0.1 + ) + } + let widthValue = try localizationProcessor.calculateWidth( + trapezoidBoundsWithDepth: trapezoidBoundPointsWithDepth, + imageSize: captureImageData.originalSize, + cameraTransform: captureImageData.cameraTransform, + cameraIntrinsics: captureImageData.cameraIntrinsics + ) + guard let widthAttributeValue = AccessibilityFeatureAttribute.width.valueFromDouble(Double(widthValue)) else { + throw AttributeEstimationPipelineError.attributeAssignmentError + } + return widthAttributeValue + } + + func calculateRunningSlopeLegacy( + accessibilityFeature: EditableAccessibilityFeature + ) throws -> AccessibilityFeatureAttribute.Value { + guard let depthMapProcessor = self.depthMapProcessor else { + throw AttributeEstimationPipelineError.configurationError(Constants.Texts.depthMapProcessorKey) + } + guard let localizationProcessor = self.localizationProcessor else { + throw AttributeEstimationPipelineError.configurationError(Constants.Texts.localizationProcessorKey) + } + guard let captureImageData = self.captureImageData else { + throw AttributeEstimationPipelineError.missingCaptureData + } + let trapezoidBoundPoints = accessibilityFeature.contourDetails.normalizedPoints + guard trapezoidBoundPoints.count == 4 else { + throw AttributeEstimationPipelineError.invalidAttributeData + } + let trapezoidBoundDepthValues = try depthMapProcessor.getFeatureDepthsAtBounds( + detectedFeature: accessibilityFeature + ) + let trapezoidBoundPointsWithDepth: [PointWithDepth] = zip(trapezoidBoundPoints, trapezoidBoundDepthValues).map { + PointWithDepth( + point: CGPoint(x: CGFloat($0.0.x), y: CGFloat($0.0.y)), + depth: $0.1 + ) + } + let runningSlopeValue: Float = try localizationProcessor.calculateRunningSlope( + trapezoidBoundsWithDepth: trapezoidBoundPointsWithDepth, + imageSize: captureImageData.originalSize, + cameraTransform: captureImageData.cameraTransform, + cameraIntrinsics: captureImageData.cameraIntrinsics + ) + guard let runningSlopeAttributeValue = AccessibilityFeatureAttribute.runningSlope.valueFromDouble( + Double(runningSlopeValue) + ) else { + throw AttributeEstimationPipelineError.attributeAssignmentError + } + return runningSlopeAttributeValue + } + + func calculateCrossSlopeLegacy( + accessibilityFeature: EditableAccessibilityFeature + ) throws -> AccessibilityFeatureAttribute.Value { + guard let depthMapProcessor = self.depthMapProcessor else { + throw AttributeEstimationPipelineError.configurationError(Constants.Texts.depthMapProcessorKey) + } + guard let localizationProcessor = self.localizationProcessor else { + throw AttributeEstimationPipelineError.configurationError(Constants.Texts.localizationProcessorKey) + } + guard let captureImageData = self.captureImageData else { + throw AttributeEstimationPipelineError.missingCaptureData + } + let trapezoidBoundPoints = accessibilityFeature.contourDetails.normalizedPoints + guard trapezoidBoundPoints.count == 4 else { + throw AttributeEstimationPipelineError.invalidAttributeData + } + let trapezoidBoundDepthValues = try depthMapProcessor.getFeatureDepthsAtBounds( + detectedFeature: accessibilityFeature + ) + let trapezoidBoundPointsWithDepth: [PointWithDepth] = zip(trapezoidBoundPoints, trapezoidBoundDepthValues).map { + PointWithDepth( + point: CGPoint(x: CGFloat($0.0.x), y: CGFloat($0.0.y)), + depth: $0.1 + ) + } + let crossSlopeValue: Float = try localizationProcessor.calculateCrossSlope( + trapezoidBoundsWithDepth: trapezoidBoundPointsWithDepth, + imageSize: captureImageData.originalSize, + cameraTransform: captureImageData.cameraTransform, + cameraIntrinsics: captureImageData.cameraIntrinsics + ) + guard let crossSlopeAttributeValue = AccessibilityFeatureAttribute.crossSlope.valueFromDouble( + Double(crossSlopeValue) + ) else { + throw AttributeEstimationPipelineError.attributeAssignmentError + } + return crossSlopeAttributeValue + } +} diff --git a/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFitting.metal b/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFit.metal similarity index 95% rename from IOSAccessAssessment/AttributeEstimation/Plane/PlaneFitting.metal rename to IOSAccessAssessment/AttributeEstimation/Plane/PlaneFit.metal index f537e9a..fe972a6 100644 --- a/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFitting.metal +++ b/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFit.metal @@ -36,8 +36,8 @@ kernel void computeWorldPoints( texture2d segmentationTexture [[texture(0)]], texture2d depthTexture [[texture(1)]], constant uint8_t& targetValue [[buffer(0)]], - constant PlanePointsParams& params [[buffer(1)]], - device PlanePoint* points [[buffer(2)]], + constant WorldPointsParams& params [[buffer(1)]], + device WorldPoint* points [[buffer(2)]], device atomic_uint* pointCount [[buffer(3)]], uint2 gid [[thread_position_in_grid]] ) { diff --git a/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFit.swift b/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFitProcessor.swift similarity index 64% rename from IOSAccessAssessment/AttributeEstimation/Plane/PlaneFit.swift rename to IOSAccessAssessment/AttributeEstimation/Plane/PlaneFitProcessor.swift index 980ac11..cd741f1 100644 --- a/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFit.swift +++ b/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFitProcessor.swift @@ -1,5 +1,5 @@ // -// PlaneFit.swift +// PlaneFitProcessor.swift // IOSAccessAssessment // // Created by Himanshu on 1/24/26. @@ -9,7 +9,7 @@ import simd import Accelerate import CoreImage -enum PlaneFitError: Error, LocalizedError { +enum PlaneFitProcessorError: Error, LocalizedError { case initializationError(message: String) var errorDescription: String? { @@ -20,7 +20,7 @@ enum PlaneFitError: Error, LocalizedError { } } -struct PlaneFit { +struct PlaneFitProcessor { private let worldPointsProcessor: WorldPointsProcessor init() throws { @@ -33,7 +33,10 @@ struct PlaneFit { targetValue: UInt8, cameraTransform: simd_float4x4, cameraIntrinsics: simd_float3x3 - ) { - + ) throws { + let worldPoints = try self.worldPointsProcessor.getWorldPoints( + segmentationLabelImage: segmentationLabelImage, depthImage: depthImage, + targetValue: targetValue, cameraTransform: cameraTransform, cameraIntrinsics: cameraIntrinsics + ) } } diff --git a/IOSAccessAssessment/AttributeEstimation/Plane/WorldPointsProcessor.swift b/IOSAccessAssessment/AttributeEstimation/Plane/WorldPointsProcessor.swift index 3eb36ff..00c0e1e 100644 --- a/IOSAccessAssessment/AttributeEstimation/Plane/WorldPointsProcessor.swift +++ b/IOSAccessAssessment/AttributeEstimation/Plane/WorldPointsProcessor.swift @@ -71,12 +71,12 @@ struct WorldPointsProcessor { targetValue: UInt8, cameraTransform: simd_float4x4, cameraIntrinsics: simd_float3x3 - ) throws -> [PlanePoint] { + ) throws -> [WorldPoint] { guard let commandBuffer = self.commandQueue.makeCommandBuffer() else { throw WorldPointsProcessorError.metalPipelineCreationError } - print("PlanePoint Alignment and stride: \(MemoryLayout.alignment), \(MemoryLayout.alignment)") + print("WorldPoint Alignment and stride: \(MemoryLayout.alignment), \(MemoryLayout.alignment)") let imageSize = simd_uint2(UInt32(segmentationLabelImage.extent.width), UInt32(segmentationLabelImage.extent.height)) let invIntrinsics = simd_inverse(cameraIntrinsics) @@ -92,7 +92,7 @@ struct WorldPointsProcessor { colorSpace: CGColorSpaceCreateDeviceRGB() /// Dummy color space ) var targetValueVar = targetValue - var params = PlanePointsParams( + var params = WorldPointsParams( imageSize: imageSize, minDepthThreshold: Constants.DepthConstants.depthMinThreshold, maxDepthThreshold: Constants.DepthConstants.depthMaxThreshold, @@ -104,7 +104,7 @@ struct WorldPointsProcessor { ) let maxPoints = imageSize.x * imageSize.y let pointsBuffer: MTLBuffer = try MetalBufferUtils.makeBuffer( - device: self.device, length: MemoryLayout.stride * Int(maxPoints), options: .storageModeShared + device: self.device, length: MemoryLayout.stride * Int(maxPoints), options: .storageModeShared ) /** @@ -127,7 +127,7 @@ struct WorldPointsProcessor { commandEncoder.setTexture(segmentationLabelTexture, index: 0) commandEncoder.setTexture(depthTexture, index: 1) commandEncoder.setBytes(&targetValueVar, length: MemoryLayout.size, index: 0) - commandEncoder.setBytes(¶ms, length: MemoryLayout.stride, index: 1) + commandEncoder.setBytes(¶ms, length: MemoryLayout.stride, index: 1) commandEncoder.setBuffer(pointsBuffer, offset: 0, index: 2) commandEncoder.setBuffer(pointCount, offset: 0, index: 3) @@ -143,9 +143,9 @@ struct WorldPointsProcessor { let pointsCountPointer = pointCount.contents().bindMemory(to: UInt32.self, capacity: 1).pointee let actualPointCount = Int(pointsCountPointer) - var worldPoints: [PlanePoint] = [] + var worldPoints: [WorldPoint] = [] if actualPointCount > 0 { - let pointsPointer = pointsBuffer.contents().bindMemory(to: PlanePoint.self, capacity: actualPointCount) + let pointsPointer = pointsBuffer.contents().bindMemory(to: WorldPoint.self, capacity: actualPointCount) for i in 0.. Date: Sun, 25 Jan 2026 22:24:31 -0800 Subject: [PATCH 08/11] Extend usage of plane fitting and add debugging to Metal-based world point calculation --- .../AttributeEstimationPipeline.swift | 5 ++-- .../Extensions/OtherAttributeExtension.swift | 30 +++++++------------ .../AttributeEstimation/Plane/PlaneFit.metal | 27 +++++++++++++++-- .../Plane/WorldPointsProcessor.swift | 20 +++++++++++-- 4 files changed, 56 insertions(+), 26 deletions(-) diff --git a/IOSAccessAssessment/AttributeEstimation/AttributeEstimationPipeline.swift b/IOSAccessAssessment/AttributeEstimation/AttributeEstimationPipeline.swift index 1bc0e8e..5fce8f9 100644 --- a/IOSAccessAssessment/AttributeEstimation/AttributeEstimationPipeline.swift +++ b/IOSAccessAssessment/AttributeEstimation/AttributeEstimationPipeline.swift @@ -45,12 +45,13 @@ class AttributeEstimationPipeline: ObservableObject { enum Texts { static let depthMapProcessorKey = "Depth Map Processor" static let localizationProcessorKey = "Localization Processor" + static let planeFitProcessorKey = "Plane Fit Processor" } } var depthMapProcessor: DepthMapProcessor? var localizationProcessor: LocalizationProcessor? - var planeFitProcesor: PlaneFitProcessor? + var planeFitProcessor: PlaneFitProcessor? var captureImageData: (any CaptureImageDataProtocol)? var captureMeshData: (any CaptureMeshDataProtocol)? @@ -65,7 +66,7 @@ class AttributeEstimationPipeline: ObservableObject { } self.depthMapProcessor = try DepthMapProcessor(depthImage: depthImage) self.localizationProcessor = LocalizationProcessor() - self.planeFitProcesor = try PlaneFitProcessor() + self.planeFitProcessor = try PlaneFitProcessor() self.captureImageData = captureImageData self.captureMeshData = captureMeshData } diff --git a/IOSAccessAssessment/AttributeEstimation/Extensions/OtherAttributeExtension.swift b/IOSAccessAssessment/AttributeEstimation/Extensions/OtherAttributeExtension.swift index c75aa02..5354dfe 100644 --- a/IOSAccessAssessment/AttributeEstimation/Extensions/OtherAttributeExtension.swift +++ b/IOSAccessAssessment/AttributeEstimation/Extensions/OtherAttributeExtension.swift @@ -1,5 +1,5 @@ // -// OtherAttributeExtensionLegacy.swift +// OtherAttributeExtension.swift // IOSAccessAssessment // // Created by Himanshu on 1/25/26. @@ -18,32 +18,22 @@ extension AttributeEstimationPipeline { guard let depthMapProcessor = self.depthMapProcessor else { throw AttributeEstimationPipelineError.configurationError(Constants.Texts.depthMapProcessorKey) } - guard let localizationProcessor = self.localizationProcessor else { - throw AttributeEstimationPipelineError.configurationError(Constants.Texts.localizationProcessorKey) + guard let planeFitProcesor = self.planeFitProcessor else { + throw AttributeEstimationPipelineError.configurationError(Constants.Texts.planeFitProcessorKey) } guard let captureImageData = self.captureImageData else { throw AttributeEstimationPipelineError.missingCaptureData } - let trapezoidBoundPoints = accessibilityFeature.contourDetails.normalizedPoints - guard trapezoidBoundPoints.count == 4 else { - throw AttributeEstimationPipelineError.invalidAttributeData - } - let trapezoidBoundDepthValues = try depthMapProcessor.getFeatureDepthsAtBounds( - detectedFeature: accessibilityFeature - ) - let trapezoidBoundPointsWithDepth: [PointWithDepth] = zip(trapezoidBoundPoints, trapezoidBoundDepthValues).map { - PointWithDepth( - point: CGPoint(x: CGFloat($0.0.x), y: CGFloat($0.0.y)), - depth: $0.1 - ) - } - let widthValue = try localizationProcessor.calculateWidth( - trapezoidBoundsWithDepth: trapezoidBoundPointsWithDepth, - imageSize: captureImageData.originalSize, + + try planeFitProcesor.fitPlanePCAWithImage( + segmentationLabelImage: captureImageData.captureImageDataResults.segmentationLabelImage, + depthImage: depthMapProcessor.depthImage, + targetValue: accessibilityFeature.accessibilityFeatureClass.labelValue, cameraTransform: captureImageData.cameraTransform, cameraIntrinsics: captureImageData.cameraIntrinsics ) - guard let widthAttributeValue = AccessibilityFeatureAttribute.width.valueFromDouble(Double(widthValue)) else { + + guard let widthAttributeValue = AccessibilityFeatureAttribute.width.valueFromDouble(Double(0)) else { throw AttributeEstimationPipelineError.attributeAssignmentError } return widthAttributeValue diff --git a/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFit.metal b/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFit.metal index fe972a6..595692b 100644 --- a/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFit.metal +++ b/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFit.metal @@ -10,6 +10,16 @@ using namespace metal; #import "ShaderTypes.h" +enum PlaneDebugSlot : uint { + outsideImage = 0, + unmatchedSegmentation = 1, + belowDepthRange = 2, + aboveDepthRange = 3, + wrotePoint = 4, + depthIsZero = 5, + // add more if needed +}; + inline float3 projectPixelToWorld( float2 pixelCoord, float depthValue, @@ -39,10 +49,13 @@ kernel void computeWorldPoints( constant WorldPointsParams& params [[buffer(1)]], device WorldPoint* points [[buffer(2)]], device atomic_uint* pointCount [[buffer(3)]], + device atomic_uint* debugCounts [[buffer(4)]], uint2 gid [[thread_position_in_grid]] ) { - if (gid.x >= segmentationTexture.get_width() || gid.y >= segmentationTexture.get_height()) + if (gid.x >= segmentationTexture.get_width() || gid.y >= segmentationTexture.get_height()) { + atomic_fetch_add_explicit(&debugCounts[outsideImage], 1, memory_order_relaxed); return; + } float4 pixelColor = segmentationTexture.read(gid); float grayscale = pixelColor.r; @@ -50,12 +63,21 @@ kernel void computeWorldPoints( // Normalize grayscale to the range of the LUT uint index = min(uint(round(grayscale * 255.0)), 255u); if (index != targetValue) { + atomic_fetch_add_explicit(&debugCounts[unmatchedSegmentation], 1u, memory_order_relaxed); return; } float depthValue = depthTexture.read(gid).r; - if (depthValue <= params.minDepthThreshold || depthValue >= params.maxDepthThreshold) { + if (depthValue < params.minDepthThreshold) { + atomic_fetch_add_explicit(&debugCounts[belowDepthRange], 1u, memory_order_relaxed); return; } + if (depthValue > params.maxDepthThreshold) { + atomic_fetch_add_explicit(&debugCounts[aboveDepthRange], 1u, memory_order_relaxed); + return; + } + if (depthValue == 0.0f) { + atomic_fetch_add_explicit(&debugCounts[depthIsZero], 1u, memory_order_relaxed); + } float3 worldPoint = projectPixelToWorld( float2(gid), @@ -63,6 +85,7 @@ kernel void computeWorldPoints( params.cameraTransform, params.invIntrinsics ); + atomic_fetch_add_explicit(&debugCounts[wrotePoint], 1u, memory_order_relaxed); uint idx = atomic_fetch_add_explicit(pointCount, 1u, memory_order_relaxed); points[idx].p = worldPoint; diff --git a/IOSAccessAssessment/AttributeEstimation/Plane/WorldPointsProcessor.swift b/IOSAccessAssessment/AttributeEstimation/Plane/WorldPointsProcessor.swift index 00c0e1e..268c4e0 100644 --- a/IOSAccessAssessment/AttributeEstimation/Plane/WorldPointsProcessor.swift +++ b/IOSAccessAssessment/AttributeEstimation/Plane/WorldPointsProcessor.swift @@ -76,8 +76,6 @@ struct WorldPointsProcessor { throw WorldPointsProcessorError.metalPipelineCreationError } - print("WorldPoint Alignment and stride: \(MemoryLayout.alignment), \(MemoryLayout.alignment)") - let imageSize = simd_uint2(UInt32(segmentationLabelImage.extent.width), UInt32(segmentationLabelImage.extent.height)) let invIntrinsics = simd_inverse(cameraIntrinsics) @@ -106,6 +104,12 @@ struct WorldPointsProcessor { let pointsBuffer: MTLBuffer = try MetalBufferUtils.makeBuffer( device: self.device, length: MemoryLayout.stride * Int(maxPoints), options: .storageModeShared ) + let debugCountSlots = 6 + let debugBuffer = try MetalBufferUtils.makeBuffer( + device: self.device, + length: MemoryLayout.stride * debugCountSlots, + options: .storageModeShared + ) /** Initialize point count to zero. @@ -114,6 +118,7 @@ struct WorldPointsProcessor { throw WorldPointsProcessorError.meshPipelineBlitEncoderError } blit.fill(buffer: pointCount, range: 0...stride, value: 0) + blit.fill(buffer: debugBuffer, range: 0..<(MemoryLayout.stride * debugCountSlots), value: 0) blit.endEncoding() /** @@ -130,6 +135,7 @@ struct WorldPointsProcessor { commandEncoder.setBytes(¶ms, length: MemoryLayout.stride, index: 1) commandEncoder.setBuffer(pointsBuffer, offset: 0, index: 2) commandEncoder.setBuffer(pointCount, offset: 0, index: 3) + commandEncoder.setBuffer(debugBuffer, offset: 0, index: 4) let threadgroupSize = MTLSize(width: pipeline.threadExecutionWidth, height: pipeline.maxTotalThreadsPerThreadgroup / pipeline.threadExecutionWidth, depth: 1) let threadgroups = MTLSize(width: (Int(imageSize.x) + threadgroupSize.width - 1) / threadgroupSize.width, @@ -151,6 +157,16 @@ struct WorldPointsProcessor { worldPoints.append(point) } } + + let dbg = debugBuffer.contents().bindMemory(to: UInt32.self, capacity: debugCountSlots) + print("outsideImage:", dbg[0]) + print("unmatchedSegmentation:", dbg[1]) + print("belowDepthRange:", dbg[2]) + print("aboveDepthRange:", dbg[3]) + print("wrotePoint:", dbg[4]) + print("depthIsZero:", dbg[5]) + print("pointCount:", pointsCountPointer) + return worldPoints } } From c9ed7cf133ac2513fa64e71a90245397843eb22f Mon Sep 17 00:00:00 2001 From: himanshunaidu Date: Mon, 26 Jan 2026 12:10:05 -0800 Subject: [PATCH 09/11] Add CPU based plane detection method for testing --- IOSAccessAssessment.xcodeproj/project.pbxproj | 4 +- .../Extensions/OtherAttributeExtension.swift | 2 +- .../Localization/LocalizationProcessor.swift | 2 +- .../AttributeEstimation/Plane/PlaneFit.metal | 2 +- .../Plane/PlaneFitProcessor.swift | 104 +++++++++++++++- .../Plane/WorldPointsProcessor.swift | 115 ++++++++++++++++-- 6 files changed, 213 insertions(+), 16 deletions(-) diff --git a/IOSAccessAssessment.xcodeproj/project.pbxproj b/IOSAccessAssessment.xcodeproj/project.pbxproj index fd1faa3..dc6d027 100644 --- a/IOSAccessAssessment.xcodeproj/project.pbxproj +++ b/IOSAccessAssessment.xcodeproj/project.pbxproj @@ -1498,7 +1498,7 @@ LD_RUNPATH_SEARCH_PATHS = "$(inherited)"; MARKETING_VERSION = 0.3; MTL_HEADER_SEARCH_PATHS = "$(SRCROOT)/IOSAccessAssessment/**"; - OTHER_CFLAGS = ""; + OTHER_CFLAGS = "-DACCELERATE_NEW_LAPACK"; PRODUCT_BUNDLE_IDENTIFIER = edu.uw.pointmapper; PRODUCT_NAME = "$(TARGET_NAME)"; SUPPORTED_PLATFORMS = "iphoneos iphonesimulator"; @@ -1542,7 +1542,7 @@ LD_RUNPATH_SEARCH_PATHS = "$(inherited)"; MARKETING_VERSION = 0.3; MTL_HEADER_SEARCH_PATHS = "$(SRCROOT)/IOSAccessAssessment/**"; - OTHER_CFLAGS = ""; + OTHER_CFLAGS = "-DACCELERATE_NEW_LAPACK"; PRODUCT_BUNDLE_IDENTIFIER = edu.uw.pointmapper; PRODUCT_NAME = "$(TARGET_NAME)"; SUPPORTED_PLATFORMS = "iphoneos iphonesimulator"; diff --git a/IOSAccessAssessment/AttributeEstimation/Extensions/OtherAttributeExtension.swift b/IOSAccessAssessment/AttributeEstimation/Extensions/OtherAttributeExtension.swift index 5354dfe..325c447 100644 --- a/IOSAccessAssessment/AttributeEstimation/Extensions/OtherAttributeExtension.swift +++ b/IOSAccessAssessment/AttributeEstimation/Extensions/OtherAttributeExtension.swift @@ -25,7 +25,7 @@ extension AttributeEstimationPipeline { throw AttributeEstimationPipelineError.missingCaptureData } - try planeFitProcesor.fitPlanePCAWithImage( + let plane = try planeFitProcesor.fitPlanePCAWithImage( segmentationLabelImage: captureImageData.captureImageDataResults.segmentationLabelImage, depthImage: depthMapProcessor.depthImage, targetValue: accessibilityFeature.accessibilityFeatureClass.labelValue, diff --git a/IOSAccessAssessment/AttributeEstimation/Localization/LocalizationProcessor.swift b/IOSAccessAssessment/AttributeEstimation/Localization/LocalizationProcessor.swift index 4d5ef30..d9bdd8a 100644 --- a/IOSAccessAssessment/AttributeEstimation/Localization/LocalizationProcessor.swift +++ b/IOSAccessAssessment/AttributeEstimation/Localization/LocalizationProcessor.swift @@ -144,7 +144,7 @@ struct LocalizationProcessor { /// Transform the point from camera space to world space let worldPoint4 = cameraTransform * cameraPoint4 - let worldPoint = SIMD3(worldPoint4.x, worldPoint4.y, worldPoint4.z) + let worldPoint = SIMD3(worldPoint4.x, worldPoint4.y, worldPoint4.z) / worldPoint4.w // Get camera world coordinates let cameraOriginPoint = simd_make_float3(cameraTransform.columns.3.x, diff --git a/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFit.metal b/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFit.metal index 595692b..d86bb9c 100644 --- a/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFit.metal +++ b/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFit.metal @@ -53,7 +53,7 @@ kernel void computeWorldPoints( uint2 gid [[thread_position_in_grid]] ) { if (gid.x >= segmentationTexture.get_width() || gid.y >= segmentationTexture.get_height()) { - atomic_fetch_add_explicit(&debugCounts[outsideImage], 1, memory_order_relaxed); + atomic_fetch_add_explicit(&debugCounts[outsideImage], 1u, memory_order_relaxed); return; } diff --git a/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFitProcessor.swift b/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFitProcessor.swift index cd741f1..7f8a230 100644 --- a/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFitProcessor.swift +++ b/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFitProcessor.swift @@ -5,21 +5,37 @@ // Created by Himanshu on 1/24/26. // -import simd import Accelerate import CoreImage enum PlaneFitProcessorError: Error, LocalizedError { case initializationError(message: String) + case invalidPointData + case invalidPlaneData var errorDescription: String? { switch self { case .initializationError(let message): return "PlaneFit Initialization Error: \(message)" + case .invalidPointData: + return "The calculated point data is invalid." + case .invalidPlaneData: + return "The calculated plane data is invalid." } } } +struct Plane: Sendable, CustomStringConvertible { + var firstEigenVector: simd_float3 + var secondEigenVector: simd_float3 + var n: simd_float3 // Normal vector + var d: Float // Offset from origin + + var description: String { + return "Plane(n: \(n), d: \(d), firstEigenVector: \(firstEigenVector), secondEigenVector: \(secondEigenVector))" + } +} + struct PlaneFitProcessor { private let worldPointsProcessor: WorldPointsProcessor @@ -27,16 +43,100 @@ struct PlaneFitProcessor { self.worldPointsProcessor = try WorldPointsProcessor() } + private func fitPlanePCA(worldPoints: [WorldPoint]) throws -> Plane { + guard worldPoints.count>=3 else { + throw PlaneFitProcessorError.invalidPointData + } + let worldPointMean = worldPoints.reduce(simd_float3(0,0,0), { $0 + $1.p }) / Float(worldPoints.count) + let centeredWorldPoints = worldPoints.map { $0.p - worldPointMean } + + var covarianceMatrix = simd_float3x3(0) + /// Compute covariance matrix + for point in centeredWorldPoints { + let outerProduct = simd_float3x3(rows: [ + simd_float3(point.x * point.x, point.x * point.y, point.x * point.z), + simd_float3(point.y * point.x, point.y * point.y, point.y * point.z), + simd_float3(point.z * point.x, point.z * point.y, point.z * point.z) + ]) + covarianceMatrix += outerProduct + } + covarianceMatrix = simd_float3x3(rows: [ + covarianceMatrix[0] / Float(worldPoints.count), + covarianceMatrix[1] / Float(worldPoints.count), + covarianceMatrix[2] / Float(worldPoints.count), + ]) + var a = [ + covarianceMatrix[0][0], covarianceMatrix[0][1], covarianceMatrix[0][2], + covarianceMatrix[1][0], covarianceMatrix[1][1], covarianceMatrix[1][2], + covarianceMatrix[2][0], covarianceMatrix[2][1], covarianceMatrix[2][2] + ] + var eigenvalues = [Float](repeating: 0, count: 3) + var jobz: Character = "V" /* 'V' */, uplo: Character = "U" /* 'L' */ + var n = Int32(3), lda = Int32(3), info = Int32(0) + var lwork: Int32 = 8 + var work = [Float](repeating: 0, count: Int(lwork)) + /// TODO: Deprecated. Replace with newer Accelerate APIs. + ssyev_(&jobz, &uplo, &n, &a, &lda, &eigenvalues, &work, &lwork, &info) + + guard info == 0 else { + throw PlaneFitProcessorError.invalidPlaneData + } + + /// Eigen values in ascending order + let firstK = 2 + let firstEigenVector = simd_normalize(simd_float3(a[firstK * 3 + 0], a[firstK * 3 + 1], a[firstK * 3 + 2])) + let secondK = 1 + let secondEigenVector = simd_normalize(simd_float3(a[secondK * 3 + 0], a[secondK * 3 + 1], a[secondK * 3 + 2])) + let normalK = 0 + let normalVector = simd_normalize(simd_float3(a[normalK * 3 + 0], a[normalK * 3 + 1], a[normalK * 3 + 2])) + let d = -simd_dot(normalVector, worldPointMean) + + let plane = Plane( + firstEigenVector: firstEigenVector, + secondEigenVector: secondEigenVector, + n: normalVector, + d: d + ) + return plane + } + func fitPlanePCAWithImage( segmentationLabelImage: CIImage, depthImage: CIImage, targetValue: UInt8, cameraTransform: simd_float4x4, cameraIntrinsics: simd_float3x3 - ) throws { + ) throws -> Plane { let worldPoints = try self.worldPointsProcessor.getWorldPoints( segmentationLabelImage: segmentationLabelImage, depthImage: depthImage, targetValue: targetValue, cameraTransform: cameraTransform, cameraIntrinsics: cameraIntrinsics ) + + let worldPointsCPU = try self.worldPointsProcessor.getWorldPointsCPU( + segmentationLabelImage: segmentationLabelImage, depthImage: depthImage, + targetValue: targetValue, cameraTransform: cameraTransform, cameraIntrinsics: cameraIntrinsics + ) + /** + Find distributional differences between GPU and CPU world points + */ + let gpuCount = worldPoints.count + let cpuCount = worldPointsCPU.count + print("PlaneFitProcessor: GPU World Points Count: \(gpuCount), CPU World Points Count: \(cpuCount)") + /// Sort by magnitude and do a chi-squared test + let worldPointsGPUSorted = worldPoints.map { simd_length($0.p) }.sorted() + let worldPointsCPUSorted = worldPointsCPU.map { simd_length($0.p) }.sorted() + let minCount = min(gpuCount, cpuCount) + var chiSum: Float = 0 + for i in 0.., + depthValue: Float, + cameraTransform: simd_float4x4, + invIntrinsics: simd_float3x3 + ) -> WorldPoint { + let imagePoint = simd_float3(Float(pixelCoord.x), Float(pixelCoord.y), 1.0) + let ray = invIntrinsics * imagePoint + let rayDirection = simd_normalize(ray) + + var cameraPoint = rayDirection * depthValue + cameraPoint.y = -cameraPoint.y + cameraPoint.z = -cameraPoint.z + let cameraPoint4 = simd_float4(cameraPoint, 1.0) + + let worldPoint4 = cameraTransform * cameraPoint4 + let worldPoint = SIMD3(worldPoint4.x, worldPoint4.y, worldPoint4.z) / worldPoint4.w + + return WorldPoint(p: worldPoint) + } + + /** + Extract world points from segmentation and depth images (CPU version). + */ + func getWorldPointsCPU( + segmentationLabelImage: CIImage, + depthImage: CIImage, + targetValue: UInt8, + cameraTransform: simd_float4x4, + cameraIntrinsics: simd_float3x3 + ) throws -> [WorldPoint] { + let minDepthThreshold = Constants.DepthConstants.depthMinThreshold + let maxDepthThreshold = Constants.DepthConstants.depthMaxThreshold + let invIntrinsics = simd_inverse(cameraIntrinsics) + + /// Get CVPixelBuffer from segmentation image + let segmentationLabelPixelBuffer = try segmentationLabelImage.toPixelBuffer( + context: self.ciContext, + pixelFormatType: kCVPixelFormatType_OneComponent8, + colorSpace: nil + ) + let segmentationWidth = CVPixelBufferGetWidth(segmentationLabelPixelBuffer) + let segmentationHeight = CVPixelBufferGetHeight(segmentationLabelPixelBuffer) + let resizedDepthImage = depthImage.resized(to: segmentationLabelImage.extent.size) + let depthBuffer = try resizedDepthImage.toPixelBuffer( + context: self.ciContext, + pixelFormatType: kCVPixelFormatType_DepthFloat32, + colorSpace: nil + ) + + CVPixelBufferLockBaseAddress(segmentationLabelPixelBuffer, .readOnly) + CVPixelBufferLockBaseAddress(depthBuffer, .readOnly) + defer { + CVPixelBufferUnlockBaseAddress(segmentationLabelPixelBuffer, .readOnly) + CVPixelBufferUnlockBaseAddress(depthBuffer, .readOnly) + } + + guard let segmentationBaseAddress = CVPixelBufferGetBaseAddress(segmentationLabelPixelBuffer), + let depthBaseAddress = CVPixelBufferGetBaseAddress(depthBuffer) else { + throw WorldPointsProcessorError.unableToProcessBufferData + } + let segmentationBytesPerRow = CVPixelBufferGetBytesPerRow(segmentationLabelPixelBuffer) + let depthBytesPerRow = CVPixelBufferGetBytesPerRow(depthBuffer) + let segmentationPtr = segmentationBaseAddress.assumingMemoryBound(to: UInt8.self) + let depthPtr = depthBaseAddress.assumingMemoryBound(to: Float.self) + + var worldPoints: [WorldPoint] = [] + for y in 0...stride + x + if segmentationPtr[segmentationIndex] != targetValue { + continue + } + let depthIndex = y * depthBytesPerRow / MemoryLayout.stride + x + let depthValue = depthPtr[depthIndex] + if depthValue < minDepthThreshold || depthValue > maxDepthThreshold { + continue + } + let worldPoint = self.computeWorldPointCPU( + pixelCoord: SIMD2(x, y), + depthValue: depthValue, + cameraTransform: cameraTransform, + invIntrinsics: invIntrinsics + ) + worldPoints.append(worldPoint) + } + } + + return worldPoints + } + + } From 1a6f0f7c13972c49737f8fad00bf7e8d462f0a10 Mon Sep 17 00:00:00 2001 From: himanshunaidu Date: Mon, 26 Jan 2026 12:30:05 -0800 Subject: [PATCH 10/11] Add ways to compare CPU results with the GPU results for plane detection --- .../Plane/PlaneFitProcessor.swift | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFitProcessor.swift b/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFitProcessor.swift index 7f8a230..576fd26 100644 --- a/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFitProcessor.swift +++ b/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFitProcessor.swift @@ -90,6 +90,7 @@ struct PlaneFitProcessor { let normalK = 0 let normalVector = simd_normalize(simd_float3(a[normalK * 3 + 0], a[normalK * 3 + 1], a[normalK * 3 + 2])) let d = -simd_dot(normalVector, worldPointMean) + print("Eigen values: \(eigenvalues)") let plane = Plane( firstEigenVector: firstEigenVector, @@ -123,20 +124,27 @@ struct PlaneFitProcessor { let cpuCount = worldPointsCPU.count print("PlaneFitProcessor: GPU World Points Count: \(gpuCount), CPU World Points Count: \(cpuCount)") /// Sort by magnitude and do a chi-squared test - let worldPointsGPUSorted = worldPoints.map { simd_length($0.p) }.sorted() - let worldPointsCPUSorted = worldPointsCPU.map { simd_length($0.p) }.sorted() + let worldPointsMagGPUSorted = worldPoints.map { simd_length($0.p) }.sorted() + let worldPointsMagCPUSorted = worldPointsCPU.map { simd_length($0.p) }.sorted() let minCount = min(gpuCount, cpuCount) var chiSum: Float = 0 for i in 0.. Date: Mon, 26 Jan 2026 13:27:17 -0800 Subject: [PATCH 11/11] Bring plane detection results onto the Annotation View --- .../Annotation/AnnotationImageManager.swift | 5 +- .../AttributeEstimationPipeline.swift | 25 +++++- .../Extensions/OtherAttributeExtension.swift | 89 +++++-------------- .../Plane/PlaneFitProcessor.swift | 35 +------- IOSAccessAssessment/View/AnnotationView.swift | 10 ++- 5 files changed, 58 insertions(+), 106 deletions(-) diff --git a/IOSAccessAssessment/Annotation/AnnotationImageManager.swift b/IOSAccessAssessment/Annotation/AnnotationImageManager.swift index fa76942..15b9f70 100644 --- a/IOSAccessAssessment/Annotation/AnnotationImageManager.swift +++ b/IOSAccessAssessment/Annotation/AnnotationImageManager.swift @@ -142,7 +142,10 @@ final class AnnotationImageManager: NSObject, ObservableObject, AnnotationImageP /** Updates the camera image, and recreates the overlay image. */ - func updateFeatureClass(accessibilityFeatureClass: AccessibilityFeatureClass) throws -> [EditableAccessibilityFeature] { + func updateFeatureClass( + accessibilityFeatureClass: AccessibilityFeatureClass, + updateFeatureClassResults: AnnotationFeatureClassUpdateResults + ) throws -> [EditableAccessibilityFeature] { guard isConfigured else { throw AnnotationImageManagerError.notConfigured } diff --git a/IOSAccessAssessment/AttributeEstimation/AttributeEstimationPipeline.swift b/IOSAccessAssessment/AttributeEstimation/AttributeEstimationPipeline.swift index 5fce8f9..2f0827d 100644 --- a/IOSAccessAssessment/AttributeEstimation/AttributeEstimationPipeline.swift +++ b/IOSAccessAssessment/AttributeEstimation/AttributeEstimationPipeline.swift @@ -37,6 +37,10 @@ struct LocationRequestResult: Sendable { let lidarDepth: Float } +struct AttributeRequestResult: Sendable { + let plane: Plane? +} + /** An attribute estimation pipeline that processes editable accessibility features to estimate their attributes. */ @@ -132,17 +136,32 @@ class AttributeEstimationPipeline: ObservableObject { accessibilityFeature: EditableAccessibilityFeature ) throws { var attributeAssignmentFlagError = false + + /// If the attributes include width, runningSlope or crossSlope, pre-calculate the fitting plane for efficiency + var plane: Plane? = nil + if accessibilityFeature.accessibilityFeatureClass.attributes.contains(where: { + $0 == .width || $0 == .runningSlope || $0 == .crossSlope + }) { + plane = try self.calculatePlane(accessibilityFeature: accessibilityFeature) + } + for attribute in accessibilityFeature.accessibilityFeatureClass.attributes { do { switch attribute { case .width: - let widthAttributeValue = try self.calculateWidth(accessibilityFeature: accessibilityFeature) + let widthAttributeValue = try self.calculateWidth( + accessibilityFeature: accessibilityFeature, plane: plane + ) try accessibilityFeature.setAttributeValue(widthAttributeValue, for: .width, isCalculated: true) case .runningSlope: - let runningSlopeAttributeValue = try self.calculateRunningSlope(accessibilityFeature: accessibilityFeature) + let runningSlopeAttributeValue = try self.calculateRunningSlope( + accessibilityFeature: accessibilityFeature, plane: plane + ) try accessibilityFeature.setAttributeValue(runningSlopeAttributeValue, for: .runningSlope, isCalculated: true) case .crossSlope: - let crossSlopeAttributeValue = try self.calculateCrossSlope(accessibilityFeature: accessibilityFeature) + let crossSlopeAttributeValue = try self.calculateCrossSlope( + accessibilityFeature: accessibilityFeature, plane: plane + ) try accessibilityFeature.setAttributeValue(crossSlopeAttributeValue, for: .crossSlope, isCalculated: true) case .widthLegacy: let widthAttributeValue = try self.calculateWidthLegacy(accessibilityFeature: accessibilityFeature) diff --git a/IOSAccessAssessment/AttributeEstimation/Extensions/OtherAttributeExtension.swift b/IOSAccessAssessment/AttributeEstimation/Extensions/OtherAttributeExtension.swift index 325c447..c13e795 100644 --- a/IOSAccessAssessment/AttributeEstimation/Extensions/OtherAttributeExtension.swift +++ b/IOSAccessAssessment/AttributeEstimation/Extensions/OtherAttributeExtension.swift @@ -12,9 +12,12 @@ import CoreLocation TODO: Improve upon these methods with more robust implementations. */ extension AttributeEstimationPipeline { - func calculateWidth( + /** + Intermediary method to calculate the plane of the feature. + */ + func calculatePlane( accessibilityFeature: EditableAccessibilityFeature - ) throws -> AccessibilityFeatureAttribute.Value { + ) throws -> Plane { guard let depthMapProcessor = self.depthMapProcessor else { throw AttributeEstimationPipelineError.configurationError(Constants.Texts.depthMapProcessorKey) } @@ -32,6 +35,14 @@ extension AttributeEstimationPipeline { cameraTransform: captureImageData.cameraTransform, cameraIntrinsics: captureImageData.cameraIntrinsics ) + return plane + } + + func calculateWidth( + accessibilityFeature: EditableAccessibilityFeature, + plane: Plane? = nil + ) throws -> AccessibilityFeatureAttribute.Value { + var plane = try (plane ?? calculatePlane(accessibilityFeature: accessibilityFeature)) guard let widthAttributeValue = AccessibilityFeatureAttribute.width.valueFromDouble(Double(0)) else { throw AttributeEstimationPipelineError.attributeAssignmentError @@ -40,78 +51,24 @@ extension AttributeEstimationPipeline { } func calculateRunningSlope( - accessibilityFeature: EditableAccessibilityFeature + accessibilityFeature: EditableAccessibilityFeature, + plane: Plane? = nil ) throws -> AccessibilityFeatureAttribute.Value { - guard let depthMapProcessor = self.depthMapProcessor else { - throw AttributeEstimationPipelineError.configurationError(Constants.Texts.depthMapProcessorKey) - } - guard let localizationProcessor = self.localizationProcessor else { - throw AttributeEstimationPipelineError.configurationError(Constants.Texts.localizationProcessorKey) - } - guard let captureImageData = self.captureImageData else { - throw AttributeEstimationPipelineError.missingCaptureData - } - let trapezoidBoundPoints = accessibilityFeature.contourDetails.normalizedPoints - guard trapezoidBoundPoints.count == 4 else { - throw AttributeEstimationPipelineError.invalidAttributeData - } - let trapezoidBoundDepthValues = try depthMapProcessor.getFeatureDepthsAtBounds( - detectedFeature: accessibilityFeature - ) - let trapezoidBoundPointsWithDepth: [PointWithDepth] = zip(trapezoidBoundPoints, trapezoidBoundDepthValues).map { - PointWithDepth( - point: CGPoint(x: CGFloat($0.0.x), y: CGFloat($0.0.y)), - depth: $0.1 - ) - } - let runningSlopeValue: Float = try localizationProcessor.calculateRunningSlope( - trapezoidBoundsWithDepth: trapezoidBoundPointsWithDepth, - imageSize: captureImageData.originalSize, - cameraTransform: captureImageData.cameraTransform, - cameraIntrinsics: captureImageData.cameraIntrinsics - ) - guard let runningSlopeAttributeValue = AccessibilityFeatureAttribute.runningSlope.valueFromDouble( - Double(runningSlopeValue) - ) else { + var plane = try (plane ?? calculatePlane(accessibilityFeature: accessibilityFeature)) + + guard let runningSlopeAttributeValue = AccessibilityFeatureAttribute.runningSlope.valueFromDouble(0) else { throw AttributeEstimationPipelineError.attributeAssignmentError } return runningSlopeAttributeValue } func calculateCrossSlope( - accessibilityFeature: EditableAccessibilityFeature + accessibilityFeature: EditableAccessibilityFeature, + plane: Plane? = nil ) throws -> AccessibilityFeatureAttribute.Value { - guard let depthMapProcessor = self.depthMapProcessor else { - throw AttributeEstimationPipelineError.configurationError(Constants.Texts.depthMapProcessorKey) - } - guard let localizationProcessor = self.localizationProcessor else { - throw AttributeEstimationPipelineError.configurationError(Constants.Texts.localizationProcessorKey) - } - guard let captureImageData = self.captureImageData else { - throw AttributeEstimationPipelineError.missingCaptureData - } - let trapezoidBoundPoints = accessibilityFeature.contourDetails.normalizedPoints - guard trapezoidBoundPoints.count == 4 else { - throw AttributeEstimationPipelineError.invalidAttributeData - } - let trapezoidBoundDepthValues = try depthMapProcessor.getFeatureDepthsAtBounds( - detectedFeature: accessibilityFeature - ) - let trapezoidBoundPointsWithDepth: [PointWithDepth] = zip(trapezoidBoundPoints, trapezoidBoundDepthValues).map { - PointWithDepth( - point: CGPoint(x: CGFloat($0.0.x), y: CGFloat($0.0.y)), - depth: $0.1 - ) - } - let crossSlopeValue: Float = try localizationProcessor.calculateCrossSlope( - trapezoidBoundsWithDepth: trapezoidBoundPointsWithDepth, - imageSize: captureImageData.originalSize, - cameraTransform: captureImageData.cameraTransform, - cameraIntrinsics: captureImageData.cameraIntrinsics - ) - guard let crossSlopeAttributeValue = AccessibilityFeatureAttribute.crossSlope.valueFromDouble( - Double(crossSlopeValue) - ) else { + var plane = try (plane ?? calculatePlane(accessibilityFeature: accessibilityFeature)) + + guard let crossSlopeAttributeValue = AccessibilityFeatureAttribute.crossSlope.valueFromDouble(0) else { throw AttributeEstimationPipelineError.attributeAssignmentError } return crossSlopeAttributeValue diff --git a/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFitProcessor.swift b/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFitProcessor.swift index 576fd26..66c6364 100644 --- a/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFitProcessor.swift +++ b/IOSAccessAssessment/AttributeEstimation/Plane/PlaneFitProcessor.swift @@ -90,7 +90,6 @@ struct PlaneFitProcessor { let normalK = 0 let normalVector = simd_normalize(simd_float3(a[normalK * 3 + 0], a[normalK * 3 + 1], a[normalK * 3 + 2])) let d = -simd_dot(normalVector, worldPointMean) - print("Eigen values: \(eigenvalues)") let plane = Plane( firstEigenVector: firstEigenVector, @@ -112,39 +111,7 @@ struct PlaneFitProcessor { segmentationLabelImage: segmentationLabelImage, depthImage: depthImage, targetValue: targetValue, cameraTransform: cameraTransform, cameraIntrinsics: cameraIntrinsics ) - - let worldPointsCPU = try self.worldPointsProcessor.getWorldPointsCPU( - segmentationLabelImage: segmentationLabelImage, depthImage: depthImage, - targetValue: targetValue, cameraTransform: cameraTransform, cameraIntrinsics: cameraIntrinsics - ) - /** - Find distributional differences between GPU and CPU world points - */ - let gpuCount = worldPoints.count - let cpuCount = worldPointsCPU.count - print("PlaneFitProcessor: GPU World Points Count: \(gpuCount), CPU World Points Count: \(cpuCount)") - /// Sort by magnitude and do a chi-squared test - let worldPointsMagGPUSorted = worldPoints.map { simd_length($0.p) }.sorted() - let worldPointsMagCPUSorted = worldPointsCPU.map { simd_length($0.p) }.sorted() - let minCount = min(gpuCount, cpuCount) - var chiSum: Float = 0 - for i in 0..