… for j in 0..<height { for i in 0..<width { var sum = 0 for ry in -R...R { for rx in -R...R { sum += Int(input[j + ry][i + rx]) } } output[j][i] = UInt8(sum / (2 * R + 1) * (2 * R + 1)) } } /PUF5IFDPEFGPSCPVOEBSZDPOEJUJPOJTPNJUUFE
for j in 0..<height { for i in 0..<width { var sum = 0 for ry in -R...R { for rx in -R...R { sum += Int(input[j + ry][i + rx]) } } output[j][i] = UInt8(sum / (2 * R + 1) * (2 * R + 1)) } } #PY'JMUFS
for j in 0..<height { for i in 0..<width { var sum = 0 for ry in -R...R { for rx in -R...R { sum += Int(input[j + ry][i + rx]) } } output[j][i] = UInt8(sum / (2 * R + 1) * (2 * R + 1)) } } #PY'JMUFS
for j in 0..<height { for i in 0..<width { var sum = 0 for ry in -R...R { for rx in -R...R { sum += Int(input[j + ry][i + rx]) } } output[j][i] = UInt8(sum / (2 * R + 1) * (2 * R + 1)) } } #PY'JMUFS
for j in 0..<height { for i in 0..<width { var sum = 0 for ry in -R...R { for rx in -R...R { sum += Int(input[j + ry][i + rx]) } } output[j][i] = UInt8(sum / (2 * R + 1) * (2 * R + 1)) } } #PY'JMUFS
for j in 0..<height { for i in 0..<width { var sum = 0 for ry in -R...R { for rx in -R...R { sum += Int(input[j + ry][i + rx]) } } output[j][i] = UInt8(sum / (2 * R + 1) * (2 * R + 1)) } } #PY'JMUFS 3
… for j in 0..<height { for i in 0..<width { var sum = 0 for ry in -R...R { for rx in -R...R { sum += Int(input[j + ry][i + rx]) } } output[j][i] = UInt8(sum / (2 * R + 1) * (2 * R + 1)) } } "WFSBHF
… for j in 0..<height { for i in 0..<width { var sum = 0 for ry in -R...R { for rx in -R...R { sum += Int(input[j + ry][i + rx]) } } output[j][i] = UInt8(sum / (2 * R + 1) * (2 * R + 1)) } }
… for j in 0..<height { for i in 0..<width { var sum = 0 for ry in -R...R { for rx in -R...R { sum += Int(input[j + ry][i + rx]) } } output[j][i] = UInt8(sum / (2 * R + 1) * (2 * R + 1)) } }
… for j in 0..<height { for i in 0..<width { var sum = 0 for ry in -R...R { for rx in -R...R { sum += Int(input[j + ry][i + rx]) } } output[j][i] = UInt8(sum / (2 * R + 1) * (2 * R + 1)) } }
… for j in 0..<height { for i in 0..<width { var sum = 0 for ry in -R...R { for rx in -R...R { sum += Int(input[j + ry][i + rx]) } } output[j][i] = UInt8(sum / (2 * R + 1) * (2 * R + 1)) } }
… for j in 0..<height { for i in 0..<width { var sum = 0 for ry in -R...R { for rx in -R...R { sum += Int(input[j + ry][i + rx]) } } output[j][i] = UInt8(sum / (2 * R + 1) * (2 * R + 1)) } }
… for j in 0..<height { for i in 0..<width { var sum = 0 for ry in -R...R { for rx in -R...R { sum += Int(input[j + ry][i + rx]) } } output[j][i] = UInt8(sum / (2 * R + 1) * (2 * R + 1)) } }
… for j in 0..<height { for i in 0..<width { var sum = 0 for ry in -R...R { for rx in -R...R { sum += Int(input[j + ry][i + rx]) } } output[j][i] = UInt8(sum / (2 * R + 1) * (2 * R + 1)) } }
… for j in 0..<height { for i in 0..<width { var sum = 0 for ry in -R...R { for rx in -R...R { sum += Int(input[j + ry][i + rx]) } } output[j][i] = UInt8(sum / (2 * R + 1) * (2 * R + 1)) } }
// xfilter do { var x = 0 while x < width { var sum = SIMD8<UInt16>.zero for k in 0..<L { let startIndex = x + k sum &+= SIMD8<UInt16>(yresult[startIndex..<startIndex+8]) } sum /= weightSIMD for k in 0..<8 { output[y][x+k] = sum[k] } x += 8
… let weightSIMD = SIMD16<UInt16>(repeating: UInt16(L * L)) let widthExtended = width + 2 * radius // ຖճdeallocate͠ͳ͍ͱϝϞϦϦʔΫ͢Δ͕ // Ұ൪࠷ޙͷ࣮ݧ݁ՌඞཁͳͨΊઌ಄Ͱߦ͏ resultPointer.deinitialize() resultPointer.deallocate() resultPointer = .allocate(capacity: width * height) resultPointer.initialize(repeating: .zero) let extendedPointer: UnsafeMutableBufferPointer<UnsafeMutableBufferPointer<UInt16>> = .allocate(capacity: L) for k in 0..<L { extendedPointer[k] = .allocate(capacity: widthExtended) } for k in 0..<L-1 { extendImage( from: imagePointer.advanced(by: max(0, k - radius) * width), srcWidth: width, extendTo: extendedPointer[k], extendRadius: radius ) } let yresultPointer = UnsafeMutableBufferPointer<UInt16>.allocate(capacity: widthExtended) yresultPointer.initialize(repeating: .zero) for y in 0..<height { extendImage( from: imagePointer.advanced(by: min(height - 1, y + radius) * width), srcWidth: width, extendTo: extendedPointer[L-1], extendRadius: radius ) // yfilter do { var x = 0 while x < widthExtended - 16 { var sum = SIMD16<UInt16>.zero for k in 0..<L { sum &+= SIMD16<UInt16>(extendedPointer[k][x..<x+16]) } for k in 0..<16 { yresultPointer[x+k] = sum[k] } x += 16 } } // yfilter ͋·Γॲཧ do { let offset = widthExtended - 16 var sum = SIMD16<UInt16>.zero for k in 0..<L { sum &+= SIMD16<UInt16>(extendedPointer[k][offset..<offset+16]) } // ͜ΕΛSIMDͷstoreʹ͍ͨ͠ for k in 0..<16 { yresultPointer[offset+k] = sum[k] } } // xfilter do { var x = 0 while x < width - 16 { var sum = SIMD16<UInt16>.zero for k in 0..<L { let startIndex = x + k sum &+= SIMD16<UInt16>(yresultPointer[startIndex..<startIndex+16]) } sum /= weightSIMD for k in 0..<16 { // ͜͜Ͳ͏ʹ͔͍ͨ͠ resultPointer[width * y + x + k] = sum[k] } x += 16 } } // xfilter ͋·Γॲཧ do { let offset = width - 16 var sum = SIMD16<UInt16>.zero for k in 0..<L { let startIndex = offset + k sum &+= SIMD16<UInt16>(yresultPointer[startIndex..<startIndex+16]) } sum /= weightSIMD // ͜ΕΛSIMDͷstoreʹ͍ͨ͠ for k in 0..<16 { resultPointer[width * y + offset + k] = sum[k] } } // ringBuffering let temp = extendedPointer.moveElement(from: 0) for k in 0..<L-1 { extendedPointer[k] = extendedPointer[k+1] } extendedPointer[L-1] = temp } yresultPointer.deinitialize() yresultPointer.deallocate() for k in 0..<L { extendedPointer[k].deinitialize() extendedPointer[k].deallocate() } extendedPointer.deinitialize() extendedPointer.deallocate()
… let weightSIMD = SIMD16<UInt16>(repeating: UInt16(L * L)) let widthExtended = width + 2 * radius // ຖճdeallocate͠ͳ͍ͱϝϞϦϦʔΫ͢Δ͕ // Ұ൪࠷ޙͷ࣮ݧ݁ՌඞཁͳͨΊઌ಄Ͱߦ͏ resultPointer.deinitialize() resultPointer.deallocate() resultPointer = .allocate(capacity: width * height) resultPointer.initialize(repeating: .zero) let extendedPointer: UnsafeMutableBufferPointer<UnsafeMutableBufferPointer<UInt16>> = .allocate(capacity: L) for k in 0..<L { extendedPointer[k] = .allocate(capacity: widthExtended) } for k in 0..<L-1 { extendImage( from: imagePointer.advanced(by: max(0, k - radius) * width), srcWidth: width, extendTo: extendedPointer[k], extendRadius: radius ) } let yresultPointer = UnsafeMutableBufferPointer<UInt16>.allocate(capacity: widthExtended) yresultPointer.initialize(repeating: .zero) for y in 0..<height { extendImage( from: imagePointer.advanced(by: min(height - 1, y + radius) * width), srcWidth: width, extendTo: extendedPointer[L-1], extendRadius: radius ) // yfilter do { var x = 0 while x < widthExtended - 16 { var sum = SIMD16<UInt16>.zero for k in 0..<L { sum &+= SIMD16<UInt16>(extendedPointer[k][x..<x+16]) } for k in 0..<16 { yresultPointer[x+k] = sum[k] } x += 16 } } // yfilter ͋·Γॲཧ do { let offset = widthExtended - 16 var sum = SIMD16<UInt16>.zero for k in 0..<L { sum &+= SIMD16<UInt16>(extendedPointer[k][offset..<offset+16]) } // ͜ΕΛSIMDͷstoreʹ͍ͨ͠ for k in 0..<16 { yresultPointer[offset+k] = sum[k] } } // xfilter do { var x = 0 while x < width - 16 { var sum = SIMD16<UInt16>.zero for k in 0..<L { let startIndex = x + k sum &+= SIMD16<UInt16>(yresultPointer[startIndex..<startIndex+16]) } sum /= weightSIMD for k in 0..<16 { // ͜͜Ͳ͏ʹ͔͍ͨ͠ resultPointer[width * y + x + k] = sum[k] } x += 16 } } // xfilter ͋·Γॲཧ do { let offset = width - 16 var sum = SIMD16<UInt16>.zero for k in 0..<L { let startIndex = offset + k sum &+= SIMD16<UInt16>(yresultPointer[startIndex..<startIndex+16]) } sum /= weightSIMD // ͜ΕΛSIMDͷstoreʹ͍ͨ͠ for k in 0..<16 { resultPointer[width * y + offset + k] = sum[k] } } // ringBuffering let temp = extendedPointer.moveElement(from: 0) for k in 0..<L-1 { extendedPointer[k] = extendedPointer[k+1] } extendedPointer[L-1] = temp } yresultPointer.deinitialize() yresultPointer.deallocate() for k in 0..<L { extendedPointer[k].deinitialize() extendedPointer[k].deallocate() } extendedPointer.deinitialize() extendedPointer.deallocate() Y HSBZTDBMF 3 LJOE UJNF<NT> /BJWF 4FQBSBCMF 4FQBSBCMF 4*.%1PJOUFS Y Y
… let weightSIMD = SIMD16<UInt16>(repeating: UInt16(L * L)) let widthExtended = width + 2 * radius // ຖճdeallocate͠ͳ͍ͱϝϞϦϦʔΫ͢Δ͕ // Ұ൪࠷ޙͷ࣮ݧ݁ՌඞཁͳͨΊઌ಄Ͱߦ͏ resultPointer.deinitialize() resultPointer.deallocate() resultPointer = .allocate(capacity: width * height) resultPointer.initialize(repeating: .zero) let extendedPointer: UnsafeMutableBufferPointer<UnsafeMutableBufferPointer<UInt16>> = .allocate(capacity: L) for k in 0..<L { extendedPointer[k] = .allocate(capacity: widthExtended) } for k in 0..<L-1 { extendImage( from: imagePointer.advanced(by: max(0, k - radius) * width), srcWidth: width, extendTo: extendedPointer[k], extendRadius: radius ) } let yresultPointer = UnsafeMutableBufferPointer<UInt16>.allocate(capacity: widthExtended) yresultPointer.initialize(repeating: .zero) for y in 0..<height { extendImage( from: imagePointer.advanced(by: min(height - 1, y + radius) * width), srcWidth: width, extendTo: extendedPointer[L-1], extendRadius: radius ) // yfilter do { var x = 0 while x < widthExtended - 16 { var sum = SIMD16<UInt16>.zero for k in 0..<L { sum &+= SIMD16<UInt16>(extendedPointer[k][x..<x+16]) } for k in 0..<16 { yresultPointer[x+k] = sum[k] } x += 16 } } // yfilter ͋·Γॲཧ do { let offset = widthExtended - 16 var sum = SIMD16<UInt16>.zero for k in 0..<L { sum &+= SIMD16<UInt16>(extendedPointer[k][offset..<offset+16]) } // ͜ΕΛSIMDͷstoreʹ͍ͨ͠ for k in 0..<16 { yresultPointer[offset+k] = sum[k] } } // xfilter do { var x = 0 while x < width - 16 { var sum = SIMD16<UInt16>.zero for k in 0..<L { let startIndex = x + k sum &+= SIMD16<UInt16>(yresultPointer[startIndex..<startIndex+16]) } sum /= weightSIMD for k in 0..<16 { // ͜͜Ͳ͏ʹ͔͍ͨ͠ resultPointer[width * y + x + k] = sum[k] } x += 16 } } // xfilter ͋·Γॲཧ do { let offset = width - 16 var sum = SIMD16<UInt16>.zero for k in 0..<L { let startIndex = offset + k sum &+= SIMD16<UInt16>(yresultPointer[startIndex..<startIndex+16]) } sum /= weightSIMD // ͜ΕΛSIMDͷstoreʹ͍ͨ͠ for k in 0..<16 { resultPointer[width * y + offset + k] = sum[k] } } // ringBuffering let temp = extendedPointer.moveElement(from: 0) for k in 0..<L-1 { extendedPointer[k] = extendedPointer[k+1] } extendedPointer[L-1] = temp } yresultPointer.deinitialize() yresultPointer.deallocate() for k in 0..<L { extendedPointer[k].deinitialize() extendedPointer[k].deallocate() } extendedPointer.deinitialize() extendedPointer.deallocate() Y HSBZTDBMF 3 Y LJOE UJNF<NT> /BJWF 4FQBSBCMF 4FQBSBCMF 4*.%1PJOUFS
let imagePointer: UnsafeMutablePointer<UInt16> = … var resultPointer: UnsafeMutableBufferPointer<UInt16> = … let weightSIMD = SIMD16<UInt16>(repeating: UInt16(L * L)) let widthExtended = width + 2 * radius // ຖճdeallocate͠ͳ͍ͱϝϞϦϦʔΫ͢Δ͕ // Ұ൪࠷ޙͷ࣮ݧ݁ՌඞཁͳͨΊઌ಄Ͱߦ͏ resultPointer.deinitialize() resultPointer.deallocate() resultPointer = .allocate(capacity: width * height) resultPointer.initialize(repeating: .zero) let extendedPointer: UnsafeMutableBufferPointer<UnsafeMutableBufferPointer<UInt16>> = .allocate(capacity: L) for k in 0..<L { extendedPointer[k] = .allocate(capacity: widthExtended) } for k in 0..<L-1 { extendImage( from: imagePointer.advanced(by: max(0, k - radius) * width), srcWidth: width, extendTo: extendedPointer[k], extendRadius: radius ) } let yresultPointer = UnsafeMutableBufferPointer<UInt16>.allocate(capacity: widthExtended) yresultPointer.initialize(repeating: .zero) for y in 0..<height { extendImage( from: imagePointer.advanced(by: min(height - 1, y + radius) * width), srcWidth: width, extendTo: extendedPointer[L-1], extendRadius: radius ) // yfilter do { var x = 0 while x < widthExtended - 16 { var sum = SIMD16<UInt16>.zero for k in 0..<L { sum &+= SIMD16<UInt16>(extendedPointer[k][x..<x+16]) } for k in 0..<16 { yresultPointer[x+k] = sum[k] } x += 16 } } // yfilter ͋·Γॲཧ do { let offset = widthExtended - 16 var sum = SIMD16<UInt16>.zero for k in 0..<L { sum &+= SIMD16<UInt16>(extendedPointer[k][offset..<offset+16]) } // ͜ΕΛSIMDͷstoreʹ͍ͨ͠ for k in 0..<16 { yresultPointer[offset+k] = sum[k] } } // xfilter do { var x = 0 while x < width - 16 { var sum = SIMD16<UInt16>.zero for k in 0..<L { let startIndex = x + k sum &+= SIMD16<UInt16>(yresultPointer[startIndex..<startIndex+16]) } sum /= weightSIMD for k in 0..<16 { // ͜͜Ͳ͏ʹ͔͍ͨ͠ resultPointer[width * y + x + k] = sum[k] } x += 16 } } // xfilter ͋·Γॲཧ do { let offset = width - 16 var sum = SIMD16<UInt16>.zero for k in 0..<L { let startIndex = offset + k sum &+= SIMD16<UInt16>(yresultPointer[startIndex..<startIndex+16]) } sum /= weightSIMD // ͜ΕΛSIMDͷstoreʹ͍ͨ͠ for k in 0..<16 { resultPointer[width * y + offset + k] = sum[k] } } // ringBuffering let temp = extendedPointer.moveElement(from: 0) for k in 0..<L-1 { extendedPointer[k] = extendedPointer[k+1] } extendedPointer[L-1] = temp } yresultPointer.deinitialize() yresultPointer.deallocate() for k in 0..<L { extendedPointer[k].deinitialize() extendedPointer[k].deallocate() } extendedPointer.deinitialize() extendedPointer.deallocate() Y HSBZTDBMF 3 Y '14TNT '14T㲈NT