diff --git a/rtiow/renderer/Cargo.toml b/rtiow/renderer/Cargo.toml index a9a0da2..b245290 100644 --- a/rtiow/renderer/Cargo.toml +++ b/rtiow/renderer/Cargo.toml @@ -8,6 +8,9 @@ edition = "2021" [[bench]] harness = false name = "spheres" +[[bench]] +harness = false +name = "aabb" [dependencies] chrono = "*" diff --git a/rtiow/renderer/benches/aabb.rs b/rtiow/renderer/benches/aabb.rs index cc4560a..9a8aa0a 100644 --- a/rtiow/renderer/benches/aabb.rs +++ b/rtiow/renderer/benches/aabb.rs @@ -1,16 +1,45 @@ use criterion::*; - -fn decode(bytes: &[u8]) { - // Decode the bytes - //... -} +use renderer::{aabb::AABB, ray::Ray}; fn bench(c: &mut Criterion) { - let bytes: &[u8] = b"some bytes"; + let bb = AABB::new([1., -1., -1.], [3., 1., 1.]); + let r_hit = Ray::new([0., 0., 0.], [1., 0., 0.], 0.); + let r_miss = Ray::new([0., 0., 0.], [-1., 0., 0.], 0.); + let t_min = 0.001; + let t_max = f32::MAX; + + let mut group = c.benchmark_group("aabb"); + group.throughput(Throughput::Elements(1)); + group.bench_with_input(BenchmarkId::new("hit_naive", "r_hit"), &r_hit, |b, r| { + b.iter(|| bb.hit_naive(*r, t_min, t_max)) + }); + group.bench_with_input(BenchmarkId::new("hit2", "r_hit"), &r_hit, |b, r| { + b.iter(|| bb.hit2(*r, t_min, t_max)) + }); + //group.bench_with_input(BenchmarkId::new("hit_precompute", "r_hit"), &r_hit, |b, r| { b.iter(|| bb.hit_precompute(*r, t_min, t_max)) }); + group.bench_with_input(BenchmarkId::new("hit_fast", "r_hit"), &r_hit, |b, r| { + b.iter(|| bb.hit_fast(*r, t_min, t_max)) + }); + #[cfg(target_arch = "x86_64")] + group.bench_with_input(BenchmarkId::new("hit_simd", "r_hit"), &r_hit, |b, r| { + b.iter(|| bb.hit_simd(*r, t_min, t_max)) + }); + + group.bench_with_input(BenchmarkId::new("hit_naive", "r_miss"), &r_miss, |b, r| { + b.iter(|| bb.hit_naive(*r, t_min, t_max)) + }); + group.bench_with_input(BenchmarkId::new("hit2", "r_miss"), &r_miss, |b, r| { + b.iter(|| bb.hit2(*r, t_min, t_max)) + }); + //group.bench_with_input(BenchmarkId::new("hit_precompute", "r_miss"), &r_miss, |b, r| { b.iter(|| bb.hit_precompute(*r, t_min, t_max)) }); + group.bench_with_input(BenchmarkId::new("hit_fast", "r_miss"), &r_miss, |b, r| { + b.iter(|| bb.hit_fast(*r, t_min, t_max)) + }); + #[cfg(target_arch = "x86_64")] + group.bench_with_input(BenchmarkId::new("hit_simd", "r_miss"), &r_miss, |b, r| { + b.iter(|| bb.hit_simd(*r, t_min, t_max)) + }); - let mut group = c.benchmark_group("throughput-example"); - group.throughput(Throughput::Bytes(bytes.len() as u64)); - group.bench_function("decode", |b| b.iter(|| decode(bytes))); group.finish(); } diff --git a/rtiow/renderer/benches/spheres.rs b/rtiow/renderer/benches/spheres.rs index b8b93c3..1b26e47 100644 --- a/rtiow/renderer/benches/spheres.rs +++ b/rtiow/renderer/benches/spheres.rs @@ -1,6 +1,3 @@ -#[macro_use] -extern crate criterion; - use criterion::*; use renderer::{ @@ -21,13 +18,13 @@ fn criterion_benchmark(c: &mut Criterion) { Ray::new([0., 0., -2.], [0., 0., -1.], 0.), ]; let mut group = c.benchmark_group("sphere"); - for r in rays { - group.bench_with_input( - BenchmarkId::new("Sphere", format!("{:?}", r)), - &r, - |b, r| b.iter(|| sphere.hit(*r, 0., 1.)), - ); - } + group.throughput(Throughput::Elements(1)); + group.bench_with_input(BenchmarkId::new("Sphere", "hit"), &rays[0], |b, r| { + b.iter(|| sphere.hit(*r, 0., 1.)) + }); + group.bench_with_input(BenchmarkId::new("Sphere", "miss"), &rays[1], |b, r| { + b.iter(|| sphere.hit(*r, 0., 1.)) + }); group.finish() } diff --git a/rtiow/renderer/src/aabb.rs b/rtiow/renderer/src/aabb.rs index 95bf2a1..0aded58 100644 --- a/rtiow/renderer/src/aabb.rs +++ b/rtiow/renderer/src/aabb.rs @@ -2,7 +2,7 @@ use std::fmt; use crate::{ray::Ray, vec3::Vec3}; -#[derive(Debug, Copy, Clone, PartialEq)] +#[derive(Default, Debug, Copy, Clone, PartialEq)] pub struct AABB { bounds: [Vec3; 2], } @@ -30,7 +30,12 @@ fn max(x: f32, y: f32) -> f32 { } impl AABB { - pub fn new(min: Vec3, max: Vec3) -> AABB { + pub fn new>(min: V, max: V) -> AABB { + let min: Vec3 = min.into(); + let max: Vec3 = max.into(); + assert!(min.x < max.x); + assert!(min.y < max.y); + assert!(min.z < max.z); AABB { bounds: [min, max] } } @@ -61,10 +66,33 @@ impl AABB { pub fn min(&self) -> Vec3 { self.bounds[0] } + pub fn max(&self) -> Vec3 { self.bounds[1] } + pub fn hit(&self, r: Ray, t_min: f32, t_max: f32) -> bool { + self.hit_simd(r, t_min, t_max) + //self.hit_naive(r, t_min, t_max) + } + + pub fn hit_naive(&self, r: Ray, t_min: f32, t_max: f32) -> bool { + let mut t_min = t_min; + let mut t_max = t_max; + for axis in 0..3 { + let t0 = ((self.min()[axis] - r.origin[axis]) * r.inv_direction[axis]) + .min((self.max()[axis] - r.origin[axis]) * r.inv_direction[axis]); + let t1 = ((self.min()[axis] - r.origin[axis]) * r.inv_direction[axis]) + .max((self.max()[axis] - r.origin[axis]) * r.inv_direction[axis]); + t_min = t0.max(t_min); + t_max = t1.min(t_max); + if t_max <= t_min { + return false; + } + } + true + } + pub fn hit2(&self, r: Ray, t_min: f32, t_max: f32) -> bool { let mut t_min = t_min; let mut t_max = t_max; @@ -119,21 +147,26 @@ impl AABB { t_min < t1 && t_max > t0 } - pub fn hit(&self, r: Ray, t_min: f32, t_max: f32) -> bool { - let mut t_min = t_min; - let mut t_max = t_max; - for axis in 0..3 { - let t0 = ((self.min()[axis] - r.origin[axis]) * r.inv_direction[axis]) - .min((self.max()[axis] - r.origin[axis]) * r.inv_direction[axis]); - let t1 = ((self.min()[axis] - r.origin[axis]) * r.inv_direction[axis]) - .max((self.max()[axis] - r.origin[axis]) * r.inv_direction[axis]); - t_min = t0.max(t_min); - t_max = t1.min(t_max); - if t_max <= t_min { - return false; - } + pub fn hit_simd(&self, r: Ray, t_min: f32, t_max: f32) -> bool { + #[cfg(target_arch = "x86_64")] + unsafe { + use std::arch::x86_64::*; + let o4 = _mm_set_ps(0., r.origin.z, r.origin.y, r.origin.x); + let d4 = _mm_set_ps(0., r.direction.z, r.direction.y, r.direction.x); + let bmin4 = _mm_set_ps(0., self.min().z, self.min().y, self.min().x); + let bmax4 = _mm_set_ps(0., self.max().z, self.max().y, self.max().x); + let mask4 = _mm_cmpeq_ps(_mm_setzero_ps(), _mm_set_ps(1., 0., 0., 0.)); + let t1 = _mm_mul_ps(_mm_sub_ps(_mm_and_ps(bmin4, mask4), o4), d4); + let t2 = _mm_mul_ps(_mm_sub_ps(_mm_and_ps(bmax4, mask4), o4), d4); + let vmax4 = _mm_max_ps(t1, t2); + let vmin4 = _mm_min_ps(t1, t2); + let vmax4: (f32, f32, f32, f32) = std::mem::transmute(vmax4); + let vmin4: (f32, f32, f32, f32) = std::mem::transmute(vmin4); + let tmax = min(vmax4.0, min(vmax4.1, vmax4.2)); + let tmin = max(vmin4.0, max(vmin4.1, vmin4.2)); + //tmax >= tmin && tmin < r.time && tmax > t_min + t_min <= tmin && tmin <= t_max } - true } pub fn hit_fast(&self, r: Ray, _t_min: f32, _t_max: f32) -> bool { @@ -169,3 +202,48 @@ pub fn surrounding_box(box0: &AABB, box1: &AABB) -> AABB { ); AABB::new(min, max) } + +#[cfg(test)] +mod tests { + use super::*; + + macro_rules! hit_test { + ($($name:ident,)*) => { + mod hit { + use super::*; + $( + #[test] + fn $name() { + let t_min = 0.001; + let t_max = f32::MAX; + let bb = AABB::new([1., -1., -1.], [3., 1., 1.]); + // Hit + let r = Ray::new([0., 0., 0.], [1., 0., 0.], 0.5); + assert!(bb.$name(r, t_min, t_max)); + } + )* + } + mod miss { + use super::*; + $( + #[test] + fn $name() { + let t_min = 0.001; + let t_max = f32::MAX; + let bb = AABB::new([1., -1., -1.], [3., 1., 1.]); + // Miss + let r = Ray::new([0., 0., 0.], [-1., 0., 0.], 0.5); + assert!(!bb.$name(r, t_min, t_max)); + } + )* + } + } + } + + hit_test! { + hit_naive, + hit2, + hit_fast, + hit_simd, + } +} diff --git a/rtiow/renderer/src/bvh_triangles.rs b/rtiow/renderer/src/bvh_triangles.rs index 67a598b..f7e7246 100644 --- a/rtiow/renderer/src/bvh_triangles.rs +++ b/rtiow/renderer/src/bvh_triangles.rs @@ -77,7 +77,7 @@ where fn build_bvh(&mut self) { // assign all triangles to root node let root = BVHNode { - aabb: AABB::new(0f32.into(), 0f32.into()), + aabb: AABB::default(), left_child: 0, first_prim: 0, prim_count: self.triangles.len() - 1, @@ -139,13 +139,13 @@ where let left_child_idx = self.bvh_nodes.len(); let right_child_idx = left_child_idx + 1; let left = BVHNode { - aabb: AABB::new(0f32.into(), 0f32.into()), + aabb: AABB::default(), left_child: 0, first_prim: first_prim, prim_count: left_count, }; let right = BVHNode { - aabb: AABB::new(0f32.into(), 0f32.into()), + aabb: AABB::default(), left_child: 0, first_prim: i as usize, prim_count: prim_count - left_count,