Commit a249f11e authored by Tobias Due Munk's avatar Tobias Due Munk

Update for MobiConf

parent c42e9663
......@@ -23,19 +23,16 @@ extension Array where Element == Slide {
.add(color)
.add(resolution)
.add(faceTracker)
// .add(halfFloat) // TODO: Re-enable after ADDC
// .add(musicDepth) // TODO: Re-enable after ADDC
.add(musicDepthNoCode)
// .add(normalizePixelBuffer) // TODO: Re-enable after ADDC
.add(normalizePixelBufferLessCode)
// .add(normalizeCoreImage) // TODO: Re-enable after ADDC
.add(normalizeCoreImageLessCode)
.add(halfFloat)
.add(musicDepth)
.add(normalizePixelBuffer)
.add(normalizeCoreImage)
.add(halideEffect)
// .add(variableBlur)
.add(portraitMode)
.add(references)
.add(openSource)
// .add(openSource)
.add(theEnd)
}
......@@ -84,15 +81,18 @@ private let intro: [Slide] = [
speakerNotes: """
· Turn screen brightness to <80%
· Plug everything in
·· HDMI + jack to Mac
·· Lightning to iPhone
· Disable Wi-Fi on Mac + iPhone
· Disable notifications on Mac + iPhone
· AirPlay using Reflector 3
· Turn volume up on Mac + iPhone
· Disable Wi-Fi
· Disable notifications
· Check audio
·· 'Chime' + 'Lite'
"""
// · Plug everything in
// ·· HDMI + jack to Mac
// ·· Lightning to iPhone
// · Disable Wi-Fi on Mac + iPhone
// · Disable notifications on Mac + iPhone
// · AirPlay using Reflector 3
// · Turn volume up on Mac + iPhone
),
Slide(
scene: SceneConfiguration(
......@@ -101,12 +101,8 @@ private let intro: [Slide] = [
),
speakerNotes: """
· I'm Tobias Due Munk.
· I'm from Copenhagen in Denmark.
· [Walk out on the stage]
· Just a quick advice on taking the metro here in Barcelona:
· If the gates don't open when you feed your ticket to the machine,
· maybe try the gate on the other side of you.
· I'm part of a two person software studio,
· in Copenhagen in Denmark.
"""
),
Slide(
......@@ -115,10 +111,8 @@ private let intro: [Slide] = [
sun: .dawn
),
speakerNotes: """
· I'm here to talk about depth sensors, and how they can be used for Augmented Reality (AR) on mobile.
· under the title 'Into the Deep',
· I'm a developer and designer for iOS,
· so examples in this talk will be based on the iOS platform running on this iPhone X.
· I'm here to talk about depth sensors and their API on iOS,
· under the title 'Into the Deep'.
"""
),
Slide(
......@@ -133,7 +127,7 @@ private let intro: [Slide] = [
],
speakerNotes: """
· OK.
· I always wanted to have intro music, like when walking on to a big stage.
· I want to start big, so excuse me...
· [Enable 'Lite']
· [Run out of stage and come back]
"""
......@@ -152,23 +146,25 @@ private let intro: [Slide] = [
],
speakerNotes: """
· [Dance a bit around]
· [Go to next slide synced to music]
"""
),
Slide(
scene: SceneConfiguration(
node: singleDotDepthNodeConfiguration
),
features: [
LiveDepthMainFeature(),
],
speakerNotes: """
· Just to get a quick sense what a depth sensor before we dive in,
· see how the square is reacting to the distance my hand is from the iPhone.
· And now to our first topic:
· Let's get started:
· [Go to next slide synced to music]
"""
),
// Slide(
// scene: SceneConfiguration(
// node: singleDotDepthNodeConfiguration
// ),
// features: [
// LiveDepthMainFeature(),
// ],
// speakerNotes: """
// · Just to get a quick sense what a depth sensor before we dive in,
// · see how the square is reacting to the distance my hand is from the iPhone.
//
// · And now to the first step:
// """
// ),
]
private let liveStreamDepth: [Slide] = [
......@@ -180,9 +176,10 @@ private let liveStreamDepth: [Slide] = [
sun: .morning
),
speakerNotes: """
· You can capture depth as single frame photos, but
· in this talk I'll show live streaming data, but
· most things I show can be applied to both domain.
· You can capture depth as a single frame photo, but
· in this talk we'll use live streamed data.
· Most things that we'll see can be applied to both domains.
· Let's look at how that is done.
"""
),
......@@ -201,6 +198,26 @@ private let liveStreamDepth: [Slide] = [
· manipulation of the data can also be done on the Mac and Apple TV.
"""
),
Slide(
scene: SceneConfiguration(
mesh: .floor,
sun: .morning,
code: """
// Depth support
// Front: X, Xs, Xs Max, Xr
// Back: X, Xs, Xs Max, Xr, 7 Plus, 8 Plus
"""
),
speakerNotes: """
· Device support for depth sensing goes like this:
· If the iPhone has FaceID, the front camera can capture depth,
· if the iPhone has Dual Camera, it supports depth on the back.
· The odd one is the iPhone Xr which has a single back camera,
· but still has support depth.
· It hasn't shipped yet, so we'll see how when that happens.
"""
),
Slide(
scene: SceneConfiguration(
mesh: .floor,
......@@ -210,8 +227,8 @@ private let liveStreamDepth: [Slide] = [
let discoverySession = AVCaptureDevice.DiscoverySession(
deviceTypes: [
.builtInTrueDepthCamera,
.builtInDualCamera
.builtInTrueDepthCamera, // Front
.builtInDualCamera // Back
],
mediaType: .video,
position: .unspecified
......@@ -219,12 +236,11 @@ private let liveStreamDepth: [Slide] = [
"""
),
speakerNotes: """
· Both front camera on iPhone X, and
· the Dual Camera from the iPhone X,
· as well as the last two plus sized iPhones,
· can capture depth.
· Here we setup a discovery session,
· that will find the appropriate cameras those device.
· that will find the appropriate cameras on the those devices.
· TrueDepth is for the front camera,
· DualCamera is for the back.
· As I just mentioned, for this talk we'll use video as the mediaType.
"""
),
Slide(
......@@ -245,8 +261,8 @@ private let liveStreamDepth: [Slide] = [
"""
),
speakerNotes: """
· We ask for the devices that matches the requirements,
· select a device, then
· We ask the session for the devices that matches the requirements,
· pick a device, then
· upgrade it to a `device input`, and
· add the input to an `AVCaptureSession`.
"""
......@@ -264,9 +280,9 @@ private let liveStreamDepth: [Slide] = [
"""
),
speakerNotes: """
· We create a depth data output,
· add to the same session, and
· finally setup the delegate for callback.
· We create a DepthDataOutput,
· add it to the same session, and
· finally assign the delegate.
"""
),
Slide(
......@@ -290,7 +306,7 @@ private let liveStreamDepth: [Slide] = [
),
speakerNotes: """
· The delegate callback is quite a mouthful.
· The only part we're interested in is,
· The only part we're interested in is...
· [skip to next slide]
"""
),
......@@ -314,7 +330,7 @@ private let liveStreamDepth: [Slide] = [
"""
),
speakerNotes: """
· ... is the `AVDepthData` object.
· ... the `AVDepthData` object.
· So let's look at what it contains.
"""
),
......@@ -330,7 +346,7 @@ private let liveStreamDepth: [Slide] = [
),
speakerNotes: """
· `AVDepthData` is backed by a `CVPixelBuffer`.
· This buffer might be familiar to you if you've worked with images or videos on Apple's platforms before.
· This buffer type might be familiar to you if you've worked with images or videos on Apple's platforms before.
"""
),
Slide(
......@@ -348,9 +364,10 @@ private let liveStreamDepth: [Slide] = [
"""
),
speakerNotes: """
· The default format of the buffer is a bit peculiar.
· Here we convert it to a normal 32 bit Float.
· Each "pixel" now contains a single value,
· The default format of the buffer in the AVDepthData object is a bit peculiar.
· Here we convert it to a normal 32 bit Float,
· before accessing the buffer.
· Each "pixel" in the buffer now contains a value type
· that we can read in Swift out of the box.
"""
),
......@@ -378,7 +395,7 @@ private let liveStreamDepth: [Slide] = [
speakerNotes: """
· Here is a function to read the value for a certain point in the `CVPixelBuffer`.
· As you can see, we have to use a lot of C-styled APIs.
· Let's go through is step by step:
· Let's break it down and go through this step by step:
· [skip to next slide]
"""
),
......@@ -454,7 +471,7 @@ private let liveStreamDepth: [Slide] = [
"""
),
speakerNotes: """
· We have to remember to lock the buffer when reading from it.
· We have to remember to lock the buffer while we're reading from it.
"""
),
Slide(
......@@ -503,15 +520,13 @@ private let liveStreamDepth: [Slide] = [
),
speakerNotes: """
· We need to introduce some new terminology,
· just one word: Disparity.
· It's the inverse of the distance.
· A value of 1 equals a distance of 1 meter for both depth and disparity.
· Value bigger than 1 is close than one meter,
· but don't worry, it's just one word: Disparity.
· Disparity is the inverse of the distance.
· A value of 1 equals a distance of 1 meter.
· A value bigger than 1 is closer than one meter,
· and infinite distance has a value of 0 in disparity.
· Be sure to name things whether code is handling one or the other.
· I find it very easy to get confused about this.
· Maybe I should have named my talk 'Into the Disparity'?
· I find it very easy to get confused about this, so you might too.
"""
),
Slide(
......@@ -525,9 +540,10 @@ private let liveStreamDepth: [Slide] = [
])
],
speakerNotes: """
· Let's see it in action.
· [Hammer Time] "Demo Time".
· "Are you ready?"
· [Enable some music]
· [Skip to next slide]
"""
),
Slide(
......@@ -553,7 +569,7 @@ private let frontBack: [Slide] = [
title: "TrueDepth\nvs\nDual Camera",
mesh: .floor),
speakerNotes: """
· Let's do a quick comparison between the two types of cameras,
· Let's do a quick comparison between two of the camera types
· that can capture depth:
· A showdown between TrueDepth and Dual Camera.
"""
......@@ -567,9 +583,10 @@ private let frontBack: [Slide] = [
],
speakerNotes: """
· This is when using the TrueDepth front camera.
· It's iPhone X only, and
· uses an infrared transmitter and sensor.
· It works in the dark.
· It's the one used for FaceID, and
· it uses an infrared transmitter and sensor.
· So it works in the dark.
· Now, let's take a look at the Dual Camera on the back.
"""
),
......@@ -585,10 +602,10 @@ private let frontBack: [Slide] = [
· [Restart app if stuck]
· Can I please have some light on the audience?
· [Demo pointing to audience]
· Inferred from the stereo effect captured by the two cameras.
· The depth maps is inferred from the stereo effect captured by the two cameras.
· Performs poorly in low light.
· The output has a significantly lower quality when live streaming,
· making it pretty much useless.
· When live streaming, the quality is quite low,
· making it as you can see.
· It's still fine for static photos though.
"""
),
......@@ -602,7 +619,8 @@ private let filtering: [Slide] = [
mesh: .floor),
speakerNotes: """
· Depth capture has fairly limited amount of settings.
· The most important one is for filtering.
· The most important one is for filtering,
· so let's take a look at how it works.
"""
),
Slide(
......@@ -621,14 +639,14 @@ private let filtering: [Slide] = [
LiveDepthMainFeature(features: [Filtering(isEnabled: false)])
],
speakerNotes: """
· Here, filtering is disabled.
· You can see all the spots where the sensor didn't get a proper reading.
· Here, filtering is *disabled*, so we see the raw data coming straight from the sensor.
· The empty spots we see is where the sensor didn't get a proper reading.
· [Hold arm across to create sensor-shadow]
· You can see a shadow on the lower side of my arm, and
· this happens because the infrared transmitter and receiver aren't located in the same position.
· [Move hand very close to break]
· You can also see that it breaks when we move really close,
· so the sensor has a minimum distance of about 15 cm.
· You can also see that it breaks when we get too close.
· The sensor has a minimum distance of ~15 cm.
"""
),
Slide(
......@@ -648,10 +666,10 @@ private let filtering: [Slide] = [
],
speakerNotes: """
· Here filtering is enabled.
· It's mainly interpolation that fills our the black spots.
· It's mainly interpolation that fills out the black spots.
· [Move hand close] Breaks differently than before.
· There's also some temporal filtering,
· resulting in and temporal artifacts (which I might be able to provoke).
· resulting in temporal artifacts (which I might be able to provoke).
· We also loose information on what readings are valid,
· so you might want to roll your own filtering.
"""
......@@ -684,7 +702,7 @@ private let color: [Slide] = [
],
speakerNotes: """
· Just as for depth, we can request video output from `AVCaptureSession`.
· Here we add an video (also known as color) output to the session.
· Here we add a video (also known as color) output to the session.
"""
),
Slide(
......@@ -706,7 +724,7 @@ private let color: [Slide] = [
· but the delegate callback is a different one than for depth.
· We really don't want to mess with these separate delegate callbacks,
· and have a struggle keeping the depth and color streams in sync.
· Fortunately, `AVFoundation` has a built in solution called a synchronizer:
· Fortunately, `AVFoundation` has a built in solution called a synchronizer, so let's use that:
"""
),
Slide(
......@@ -953,7 +971,7 @@ private let resolution: [Slide] = [
· So here I'm digitally zooming in on my right eye.
· [Wink wink]
· This doesn't show the full extend of the resolution,
· so let's go one step further along the scale using embarrasing facial features and projecting them up on a big screen.
· so let's go one step further along the scale using embarrasing facial features and projecting them up on a big screen in front of large group of people.
"""
),
Slide(
......@@ -972,7 +990,7 @@ private let resolution: [Slide] = [
])
],
speakerNotes: """
· So there's my now.
· So there's my nose.
· [Move face slowly closer]
· No interpolation is used here.
· [Cover nose with hand]
......@@ -1056,7 +1074,7 @@ private let faceTracker: [Slide] = [
),
speakerNotes: """
· ...then we set the delegate, but this time on the session.
· Works for TrueDepth camera on the front of the iPhone X only.
· Works for TrueDepth camera on FaceID capable devices only.
"""
),
Slide(
......@@ -1077,6 +1095,9 @@ private let faceTracker: [Slide] = [
speakerNotes: """
· Here we have the delegate callback.
· This is the same callback that is used for things like plane and image detection.
· So I actually tried plane detection in ARKit the other day when flying here to Krakow,
· and I'm sorry to say this, but it really doesn't work.
"""
),
Slide(
......@@ -1095,8 +1116,8 @@ private let faceTracker: [Slide] = [
"""
),
speakerNotes: """
· The main in this callback are the anchors.
· It provides a vaguely typed array of `ARAnchor`'s
· The main part in this callback are the anchors.
· It's a vaguely typed array of `ARAnchor`'s
· All synchronized.
"""
),
......@@ -1135,7 +1156,7 @@ private let faceTracker: [Slide] = [
· Here I'm rendering a geometry property that `ARFaceAnchor` has.
· It includeds all animations and changes,
· so I'm not doing anything extra,
· besides rendering the geometry in 3D.
· besides rendering that geometry in a 3D scene.
"""
),
Slide(
......@@ -1166,7 +1187,7 @@ private let faceTracker: [Slide] = [
),
speakerNotes: """
· The `ARFaceAnchor` also has some more detailed values called blend shapes.
· These are the values that powers Animojis and Meojis.
· These are the values that powers Animojis and Memojis.
· There are 50+ values,
· like position of brows, eyes, or
· nose sneer, and how much you are squinting on each eye.
......@@ -1295,7 +1316,8 @@ private let halfFloat: [Slide] = [
title: "Half Float",
mesh: .floor),
speakerNotes: """
· In the very beginning, I showed you how to convert a `CVPixelBuffer` to contain Swift-native Floats.
· Ok, so back to some more cody parts.
· In the very beginning, we saw how to convert a `CVPixelBuffer` to contain Swift-native Floats.
· In the interest of performance,
· let's look at how we can read the "raw" data we get from the `AVDepthData` object,
· to completely skip the conversion.
......@@ -1311,10 +1333,9 @@ private let halfFloat: [Slide] = [
"""
),
speakerNotes: """
· It turn out the data is of this type: DisparityFloat 16 bit.
· Half float, not available in Swift Foundation.
· It turns out the data is of this type: DisparityFloat 16 bit.
· Half float, not available in Swift Foundation (it is available in the Metal framework though).
· 16 bit, instead of 32 bit as Float or 64 bit as Double, like we're used to in Swift.
· "So I went online" to figure out how to parse it.
"""
),
Slide(
......@@ -1335,15 +1356,48 @@ private let halfFloat: [Slide] = [
"""
),
speakerNotes: """
· Let's take a look at a simple API that we can make for it,
· so we can use this type for the following examples.
· Let's take a look at a simple extension API that we can make for it,
· so we can use them for the following examples.
· To convert one way we use the first extension, and
· for going the other way we use the second one.
· The first contains "Magic",
· so the second—of course—contains the "Reverse Magic".
· Just imagine some bitshifting, along with exponent and significand calculations here.
· If you're thinking:
· "aw man, I really wanted to see some magic".
· Okay, here you go:
"""
),
Slide(
scene: SceneConfiguration(
mesh: .floor,
code: """
// Magic
let sign = Float((self & 0b1000_0000_0000_0000) >> 15)
let exponent = Float((self & 0b0111_1100_0000_0000) >> 10)
let signific = Float(self & 0b0000_0011_1111_1111)
if exponent == 0b0_0000 {
if signific == 0 { return pow(-1, sign) * 0 } else {
let last = 0 + signif / 0b0011_1111_1111
return pow(-1, sign) * pow(2, -14) * last
}
} else if exponent == 0b1_1111 {
return signific == 0 ? pow(-1, sign) * .infinity : .nan
} else {
let last = 1 + signific / 0b0011_1111_1111
return pow(-1, sign) * pow(2, exponent - 15) * last
}
"""
),
speakerNotes: """
· I'm not gonna go through this.
· I went "online" to figure out how to parse it.
· It's basically some bitmasking, bitshifting, along with exponent and significand calculations.
"""
),
Slide(
scene: SceneConfiguration(
mesh: .offScene,
......@@ -1408,58 +1462,7 @@ private let musicDepth: [Slide] = [
· Use the `get` function we just implemented on the `CVPixelBuffer`.
"""
),
Slide(
scene: SceneConfiguration(
mesh: .floor,
code: """
if rightDisparity > 2 {
synth.play(note: 58)
} else {
synth.play(note: 57)
}
"""
),
speakerNotes: """
· Let's change the note of an instrument if right hand is close enough to device.
"""
),
Slide(
scene: SceneConfiguration(
mesh: .floor,
code: """
synth.lfoResonance = leftDisparity / 4
"""
),
speakerNotes: """
· Similarly we change a filter characteristic depending on the distance of the left hand.
"""
),
Slide(
scene: SceneConfiguration(
mesh: .front),
features: [
LiveDepthMainFeature(),
MusicMainFeature(features: [
DepthSynth()
], isEnabled: false),
],
speakerNotes: """
· [Enable 'Music']
"""
)
]
private let musicDepthNoCode: [Slide] = [
Slide(
isSectionStart: true,
scene: SceneConfiguration(
title: "Input",
mesh: .floor),
speakerNotes: """
· OK.
· Let's take a look at another way the depth sensor can be used as an input device.
"""
),
Slide(
scene: SceneConfiguration(
mesh: .floor,
......@@ -1474,7 +1477,7 @@ private let musicDepthNoCode: [Slide] = [
"""
),
speakerNotes: """
· Let's change the note of an instrument if left hand is close enough to device.
· Let's change the note of the instrument when the left hand is close to the device.
"""
),
Slide(
......@@ -1521,6 +1524,8 @@ private let normalizePixelBuffer: [Slide] = [
scene: SceneConfiguration(
mesh: .floor,
code: """
// Write Value Back to Depth Buffer
extension CVPixelBuffer {
func set(x: Int, y: Int, value: Float) {
// ...
......@@ -1530,10 +1535,13 @@ private let normalizePixelBuffer: [Slide] = [
"""
),
speakerNotes: """
· First we need a setter for putting normalized values back into the buffer of depth data.
· This is very similar to what we saw before (for the setter).
· First we need a setter for putting normalized values back into the buffer.
· This is very similar to what we have done for the getter where we:
· did a bunch of conversions,
· calculated the index in the buffer based on the width, and
· had to remember to lock when reading from the buffer.
· The main difference is that we use our half float API to create a valid bit pattern *from* a floating value, and
· then we just store that bit pattern in to the buffer in the correct position.
· then we store that bit pattern back in to the buffer in the correct position.
"""
),
Slide(
......@@ -1541,7 +1549,7 @@ private let normalizePixelBuffer: [Slide] = [
mesh: .floor,
code: """
extension CVPixelBuffer
func normalize() {
func minMax() -> (Float, Float) {
let width = CVPixelBufferGetWidth(self)
let height = CVPixelBufferGetHeight(self)
var minValue: Float = .greatestFiniteMagnitude
......@@ -1553,94 +1561,16 @@ private let normalizePixelBuffer: [Slide] = [
maxValue = max(value, maxValue)
}
}
// ...
return (minValue, maxValue)
}
}
"""
),
speakerNotes: """
· Here's the first part of a normalize function.
· It loops over all the pixels to find the minimum and maximum values, and
"""
),
Slide(
scene: SceneConfiguration(
mesh: .floor,
code: """
// ...
let range = maxValue - minValue
for y in 0 ..< height {
for x in 0 ..< width {
let value = get(x: x, y: y)
let newValue = (value - minValue) / range
set(x: x, y: y, value: newValue)
}
}
"""
),
speakerNotes: """
· ...and then we adjust the values,
· to normalize the data,
· based on the min-max.
"""
),
Slide(
scene: SceneConfiguration(
mesh: .offScene,
node: pixelBufferNodeConfiguration),
features: [
LiveDepthMainFeature(),
PixelBufferMainFeature(features: [
Normalize(isEnabled: false)
])
],
speakerNotes: """
· Before I enable normalization, see how things closer than 1 meter clips to white.
· Now I'm enabling normalization. [Enable 'Norm']
· To improve performance further, you should make sure to only lock and unlock the buffer once for the entire operation.
· [Demonstrate the low framerate]
· You can see some dropped frames here.
· Despite reading raw data, we can do better for normalizing.
"""
)
]
private let normalizePixelBufferLessCode: [Slide] = [
Slide(
isSectionStart: true,
scene: SceneConfiguration(
title: "Normalize\nCVPixelBuffer",
mesh: .floor),
speakerNotes: """
· A typical first step for using depth data, would be to normalize it.
· Either for a preview to show the full range (so it doesn't clip any values and show as high contrast as possible), or
· for whatever your next processing step might be, normalized data makes sense.
· Some of the next parts will be a bit code heavy,
· but hopefully the demoes will soften the blow a bit.
"""
),
Slide(
scene: SceneConfiguration(
mesh: .floor,
code: """
// Write Value Back to Depth Buffer
extension CVPixelBuffer {
func set(x: Int, y: Int, value: Float) {
// ...
buffer[index]
}
}
"""
),