rtiow: add aabb tests and benchmark along with terrible SIMD impl.

2023-01-22 12:03:17 -08:00 · 2023-01-22 12:03:17 -08:00 · 2d696932e3
commit 2d696932e3
parent 27d6c1280b
5 changed files with 145 additions and 38 deletions
--- a/rtiow/renderer/Cargo.toml
+++ b/rtiow/renderer/Cargo.toml
@ -8,6 +8,9 @@ edition = "2021"
 [[bench]]
 harness = false
 name = "spheres"
 [[bench]]
 harness = false
 name = "aabb"
 [dependencies]
 chrono = "*"
--- a/rtiow/renderer/benches/aabb.rs
+++ b/rtiow/renderer/benches/aabb.rs
@ -1,16 +1,45 @@
 use criterion::*;
-
+use renderer::{aabb::AABB, ray::Ray};
 fn decode(bytes: &[u8]) {
    // Decode the bytes
    //...
 }
 fn bench(c: &mut Criterion) {
-    let bytes: &[u8] = b"some bytes";
+    let bb = AABB::new([1., -1., -1.], [3., 1., 1.]);
    let r_hit = Ray::new([0., 0., 0.], [1., 0., 0.], 0.);
    let r_miss = Ray::new([0., 0., 0.], [-1., 0., 0.], 0.);
    let t_min = 0.001;
    let t_max = f32::MAX;
    let mut group = c.benchmark_group("aabb");
    group.throughput(Throughput::Elements(1));
    group.bench_with_input(BenchmarkId::new("hit_naive", "r_hit"), &r_hit, |b, r| {
        b.iter(|| bb.hit_naive(*r, t_min, t_max))
    });
    group.bench_with_input(BenchmarkId::new("hit2", "r_hit"), &r_hit, |b, r| {
        b.iter(|| bb.hit2(*r, t_min, t_max))
    });
    //group.bench_with_input(BenchmarkId::new("hit_precompute", "r_hit"), &r_hit, |b, r| { b.iter(|| bb.hit_precompute(*r, t_min, t_max)) });
    group.bench_with_input(BenchmarkId::new("hit_fast", "r_hit"), &r_hit, |b, r| {
        b.iter(|| bb.hit_fast(*r, t_min, t_max))
    });
    #[cfg(target_arch = "x86_64")]
    group.bench_with_input(BenchmarkId::new("hit_simd", "r_hit"), &r_hit, |b, r| {
        b.iter(|| bb.hit_simd(*r, t_min, t_max))
    });
    group.bench_with_input(BenchmarkId::new("hit_naive", "r_miss"), &r_miss, |b, r| {
        b.iter(|| bb.hit_naive(*r, t_min, t_max))
    });
    group.bench_with_input(BenchmarkId::new("hit2", "r_miss"), &r_miss, |b, r| {
        b.iter(|| bb.hit2(*r, t_min, t_max))
    });
    //group.bench_with_input(BenchmarkId::new("hit_precompute", "r_miss"), &r_miss, |b, r| { b.iter(|| bb.hit_precompute(*r, t_min, t_max)) });
    group.bench_with_input(BenchmarkId::new("hit_fast", "r_miss"), &r_miss, |b, r| {
        b.iter(|| bb.hit_fast(*r, t_min, t_max))
    });
    #[cfg(target_arch = "x86_64")]
    group.bench_with_input(BenchmarkId::new("hit_simd", "r_miss"), &r_miss, |b, r| {
        b.iter(|| bb.hit_simd(*r, t_min, t_max))
    });
    let mut group = c.benchmark_group("throughput-example");
    group.throughput(Throughput::Bytes(bytes.len() as u64));
    group.bench_function("decode", |b| b.iter(|| decode(bytes)));
    group.finish();
 }
--- a/rtiow/renderer/benches/spheres.rs
+++ b/rtiow/renderer/benches/spheres.rs
@ -1,6 +1,3 @@
 #[macro_use]
 extern crate criterion;
 use criterion::*;
 use renderer::{
@ -21,13 +18,13 @@ fn criterion_benchmark(c: &mut Criterion) {
        Ray::new([0., 0., -2.], [0., 0., -1.], 0.),
    ];
    let mut group = c.benchmark_group("sphere");
-    for r in rays {
+    group.throughput(Throughput::Elements(1));
-        group.bench_with_input(
+    group.bench_with_input(BenchmarkId::new("Sphere", "hit"), &rays[0], |b, r| {
-            BenchmarkId::new("Sphere", format!("{:?}", r)),
+        b.iter(|| sphere.hit(*r, 0., 1.))
-            &r,
+    });
-            |b, r| b.iter(|| sphere.hit(*r, 0., 1.)),
+    group.bench_with_input(BenchmarkId::new("Sphere", "miss"), &rays[1], |b, r| {
-        );
+        b.iter(|| sphere.hit(*r, 0., 1.))
-    }
+    });
    group.finish()
 }
--- a/rtiow/renderer/src/aabb.rs
+++ b/rtiow/renderer/src/aabb.rs
@ -2,7 +2,7 @@ use std::fmt;
 use crate::{ray::Ray, vec3::Vec3};
-#[derive(Debug, Copy, Clone, PartialEq)]
+#[derive(Default, Debug, Copy, Clone, PartialEq)]
 pub struct AABB {
    bounds: [Vec3; 2],
 }
@ -30,7 +30,12 @@ fn max(x: f32, y: f32) -> f32 {
 }
 impl AABB {
-    pub fn new(min: Vec3, max: Vec3) -> AABB {
+    pub fn new<V: Into<Vec3>>(min: V, max: V) -> AABB {
        let min: Vec3 = min.into();
        let max: Vec3 = max.into();
        assert!(min.x < max.x);
        assert!(min.y < max.y);
        assert!(min.z < max.z);
        AABB { bounds: [min, max] }
    }
@ -61,10 +66,33 @@ impl AABB {
    pub fn min(&self) -> Vec3 {
        self.bounds[0]
    }
    pub fn max(&self) -> Vec3 {
        self.bounds[1]
    }
    pub fn hit(&self, r: Ray, t_min: f32, t_max: f32) -> bool {
        self.hit_simd(r, t_min, t_max)
        //self.hit_naive(r, t_min, t_max)
    }
    pub fn hit_naive(&self, r: Ray, t_min: f32, t_max: f32) -> bool {
        let mut t_min = t_min;
        let mut t_max = t_max;
        for axis in 0..3 {
            let t0 = ((self.min()[axis] - r.origin[axis]) * r.inv_direction[axis])
                .min((self.max()[axis] - r.origin[axis]) * r.inv_direction[axis]);
            let t1 = ((self.min()[axis] - r.origin[axis]) * r.inv_direction[axis])
                .max((self.max()[axis] - r.origin[axis]) * r.inv_direction[axis]);
            t_min = t0.max(t_min);
            t_max = t1.min(t_max);
            if t_max <= t_min {
                return false;
            }
        }
        true
    }
    pub fn hit2(&self, r: Ray, t_min: f32, t_max: f32) -> bool {
        let mut t_min = t_min;
        let mut t_max = t_max;
@ -119,21 +147,26 @@ impl AABB {
        t_min < t1 && t_max > t0
    }
-    pub fn hit(&self, r: Ray, t_min: f32, t_max: f32) -> bool {
+    pub fn hit_simd(&self, r: Ray, t_min: f32, t_max: f32) -> bool {
-        let mut t_min = t_min;
+        #[cfg(target_arch = "x86_64")]
-        let mut t_max = t_max;
+        unsafe {
-        for axis in 0..3 {
+            use std::arch::x86_64::*;
-            let t0 = ((self.min()[axis] - r.origin[axis]) * r.inv_direction[axis])
+            let o4 = _mm_set_ps(0., r.origin.z, r.origin.y, r.origin.x);
-                .min((self.max()[axis] - r.origin[axis]) * r.inv_direction[axis]);
+            let d4 = _mm_set_ps(0., r.direction.z, r.direction.y, r.direction.x);
-            let t1 = ((self.min()[axis] - r.origin[axis]) * r.inv_direction[axis])
+            let bmin4 = _mm_set_ps(0., self.min().z, self.min().y, self.min().x);
-                .max((self.max()[axis] - r.origin[axis]) * r.inv_direction[axis]);
+            let bmax4 = _mm_set_ps(0., self.max().z, self.max().y, self.max().x);
-            t_min = t0.max(t_min);
+            let mask4 = _mm_cmpeq_ps(_mm_setzero_ps(), _mm_set_ps(1., 0., 0., 0.));
-            t_max = t1.min(t_max);
+            let t1 = _mm_mul_ps(_mm_sub_ps(_mm_and_ps(bmin4, mask4), o4), d4);
-            if t_max <= t_min {
+            let t2 = _mm_mul_ps(_mm_sub_ps(_mm_and_ps(bmax4, mask4), o4), d4);
-                return false;
+            let vmax4 = _mm_max_ps(t1, t2);
-            }
+            let vmin4 = _mm_min_ps(t1, t2);
            let vmax4: (f32, f32, f32, f32) = std::mem::transmute(vmax4);
            let vmin4: (f32, f32, f32, f32) = std::mem::transmute(vmin4);
            let tmax = min(vmax4.0, min(vmax4.1, vmax4.2));
            let tmin = max(vmin4.0, max(vmin4.1, vmin4.2));
            //tmax >= tmin && tmin < r.time && tmax > t_min
            t_min <= tmin && tmin <= t_max
        }
        true
    }
    pub fn hit_fast(&self, r: Ray, _t_min: f32, _t_max: f32) -> bool {
@ -169,3 +202,48 @@ pub fn surrounding_box(box0: &AABB, box1: &AABB) -> AABB {
    );
    AABB::new(min, max)
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    macro_rules! hit_test {
        ($($name:ident,)*) => {
            mod hit {
                use super::*;
                $(
                    #[test]
                    fn $name() {
                        let t_min = 0.001;
                        let t_max = f32::MAX;
                        let bb = AABB::new([1., -1., -1.], [3., 1., 1.]);
                        // Hit
                        let r = Ray::new([0., 0., 0.], [1., 0., 0.], 0.5);
                        assert!(bb.$name(r, t_min, t_max));
                    }
                )*
            }
            mod miss {
                use super::*;
                $(
                    #[test]
                    fn $name() {
                        let t_min = 0.001;
                        let t_max = f32::MAX;
                        let bb = AABB::new([1., -1., -1.], [3., 1., 1.]);
                        // Miss
                        let r = Ray::new([0., 0., 0.], [-1., 0., 0.], 0.5);
                        assert!(!bb.$name(r, t_min, t_max));
                    }
                )*
            }
        }
    }
    hit_test! {
        hit_naive,
        hit2,
        hit_fast,
        hit_simd,
    }
 }
--- a/rtiow/renderer/src/bvh_triangles.rs
+++ b/rtiow/renderer/src/bvh_triangles.rs
@ -77,7 +77,7 @@ where
    fn build_bvh(&mut self) {
        // assign all triangles to root node
        let root = BVHNode {
-            aabb: AABB::new(0f32.into(), 0f32.into()),
+            aabb: AABB::default(),
            left_child: 0,
            first_prim: 0,
            prim_count: self.triangles.len() - 1,
@ -139,13 +139,13 @@ where
        let left_child_idx = self.bvh_nodes.len();
        let right_child_idx = left_child_idx + 1;
        let left = BVHNode {
-            aabb: AABB::new(0f32.into(), 0f32.into()),
+            aabb: AABB::default(),
            left_child: 0,
            first_prim: first_prim,
            prim_count: left_count,
        };
        let right = BVHNode {
-            aabb: AABB::new(0f32.into(), 0f32.into()),
+            aabb: AABB::default(),
            left_child: 0,
            first_prim: i as usize,
            prim_count: prim_count - left_count,