rtiow: add aabb tests and benchmark along with terrible SIMD impl.

This commit is contained in:
Bill Thiede 2023-01-22 12:03:17 -08:00
parent 27d6c1280b
commit 2d696932e3
5 changed files with 145 additions and 38 deletions

View File

@ -8,6 +8,9 @@ edition = "2021"
[[bench]] [[bench]]
harness = false harness = false
name = "spheres" name = "spheres"
[[bench]]
harness = false
name = "aabb"
[dependencies] [dependencies]
chrono = "*" chrono = "*"

View File

@ -1,16 +1,45 @@
use criterion::*; use criterion::*;
use renderer::{aabb::AABB, ray::Ray};
fn decode(bytes: &[u8]) {
// Decode the bytes
//...
}
fn bench(c: &mut Criterion) { fn bench(c: &mut Criterion) {
let bytes: &[u8] = b"some bytes"; let bb = AABB::new([1., -1., -1.], [3., 1., 1.]);
let r_hit = Ray::new([0., 0., 0.], [1., 0., 0.], 0.);
let r_miss = Ray::new([0., 0., 0.], [-1., 0., 0.], 0.);
let t_min = 0.001;
let t_max = f32::MAX;
let mut group = c.benchmark_group("aabb");
group.throughput(Throughput::Elements(1));
group.bench_with_input(BenchmarkId::new("hit_naive", "r_hit"), &r_hit, |b, r| {
b.iter(|| bb.hit_naive(*r, t_min, t_max))
});
group.bench_with_input(BenchmarkId::new("hit2", "r_hit"), &r_hit, |b, r| {
b.iter(|| bb.hit2(*r, t_min, t_max))
});
//group.bench_with_input(BenchmarkId::new("hit_precompute", "r_hit"), &r_hit, |b, r| { b.iter(|| bb.hit_precompute(*r, t_min, t_max)) });
group.bench_with_input(BenchmarkId::new("hit_fast", "r_hit"), &r_hit, |b, r| {
b.iter(|| bb.hit_fast(*r, t_min, t_max))
});
#[cfg(target_arch = "x86_64")]
group.bench_with_input(BenchmarkId::new("hit_simd", "r_hit"), &r_hit, |b, r| {
b.iter(|| bb.hit_simd(*r, t_min, t_max))
});
group.bench_with_input(BenchmarkId::new("hit_naive", "r_miss"), &r_miss, |b, r| {
b.iter(|| bb.hit_naive(*r, t_min, t_max))
});
group.bench_with_input(BenchmarkId::new("hit2", "r_miss"), &r_miss, |b, r| {
b.iter(|| bb.hit2(*r, t_min, t_max))
});
//group.bench_with_input(BenchmarkId::new("hit_precompute", "r_miss"), &r_miss, |b, r| { b.iter(|| bb.hit_precompute(*r, t_min, t_max)) });
group.bench_with_input(BenchmarkId::new("hit_fast", "r_miss"), &r_miss, |b, r| {
b.iter(|| bb.hit_fast(*r, t_min, t_max))
});
#[cfg(target_arch = "x86_64")]
group.bench_with_input(BenchmarkId::new("hit_simd", "r_miss"), &r_miss, |b, r| {
b.iter(|| bb.hit_simd(*r, t_min, t_max))
});
let mut group = c.benchmark_group("throughput-example");
group.throughput(Throughput::Bytes(bytes.len() as u64));
group.bench_function("decode", |b| b.iter(|| decode(bytes)));
group.finish(); group.finish();
} }

View File

@ -1,6 +1,3 @@
#[macro_use]
extern crate criterion;
use criterion::*; use criterion::*;
use renderer::{ use renderer::{
@ -21,13 +18,13 @@ fn criterion_benchmark(c: &mut Criterion) {
Ray::new([0., 0., -2.], [0., 0., -1.], 0.), Ray::new([0., 0., -2.], [0., 0., -1.], 0.),
]; ];
let mut group = c.benchmark_group("sphere"); let mut group = c.benchmark_group("sphere");
for r in rays { group.throughput(Throughput::Elements(1));
group.bench_with_input( group.bench_with_input(BenchmarkId::new("Sphere", "hit"), &rays[0], |b, r| {
BenchmarkId::new("Sphere", format!("{:?}", r)), b.iter(|| sphere.hit(*r, 0., 1.))
&r, });
|b, r| b.iter(|| sphere.hit(*r, 0., 1.)), group.bench_with_input(BenchmarkId::new("Sphere", "miss"), &rays[1], |b, r| {
); b.iter(|| sphere.hit(*r, 0., 1.))
} });
group.finish() group.finish()
} }

View File

@ -2,7 +2,7 @@ use std::fmt;
use crate::{ray::Ray, vec3::Vec3}; use crate::{ray::Ray, vec3::Vec3};
#[derive(Debug, Copy, Clone, PartialEq)] #[derive(Default, Debug, Copy, Clone, PartialEq)]
pub struct AABB { pub struct AABB {
bounds: [Vec3; 2], bounds: [Vec3; 2],
} }
@ -30,7 +30,12 @@ fn max(x: f32, y: f32) -> f32 {
} }
impl AABB { impl AABB {
pub fn new(min: Vec3, max: Vec3) -> AABB { pub fn new<V: Into<Vec3>>(min: V, max: V) -> AABB {
let min: Vec3 = min.into();
let max: Vec3 = max.into();
assert!(min.x < max.x);
assert!(min.y < max.y);
assert!(min.z < max.z);
AABB { bounds: [min, max] } AABB { bounds: [min, max] }
} }
@ -61,10 +66,33 @@ impl AABB {
pub fn min(&self) -> Vec3 { pub fn min(&self) -> Vec3 {
self.bounds[0] self.bounds[0]
} }
pub fn max(&self) -> Vec3 { pub fn max(&self) -> Vec3 {
self.bounds[1] self.bounds[1]
} }
pub fn hit(&self, r: Ray, t_min: f32, t_max: f32) -> bool {
self.hit_simd(r, t_min, t_max)
//self.hit_naive(r, t_min, t_max)
}
pub fn hit_naive(&self, r: Ray, t_min: f32, t_max: f32) -> bool {
let mut t_min = t_min;
let mut t_max = t_max;
for axis in 0..3 {
let t0 = ((self.min()[axis] - r.origin[axis]) * r.inv_direction[axis])
.min((self.max()[axis] - r.origin[axis]) * r.inv_direction[axis]);
let t1 = ((self.min()[axis] - r.origin[axis]) * r.inv_direction[axis])
.max((self.max()[axis] - r.origin[axis]) * r.inv_direction[axis]);
t_min = t0.max(t_min);
t_max = t1.min(t_max);
if t_max <= t_min {
return false;
}
}
true
}
pub fn hit2(&self, r: Ray, t_min: f32, t_max: f32) -> bool { pub fn hit2(&self, r: Ray, t_min: f32, t_max: f32) -> bool {
let mut t_min = t_min; let mut t_min = t_min;
let mut t_max = t_max; let mut t_max = t_max;
@ -119,21 +147,26 @@ impl AABB {
t_min < t1 && t_max > t0 t_min < t1 && t_max > t0
} }
pub fn hit(&self, r: Ray, t_min: f32, t_max: f32) -> bool { pub fn hit_simd(&self, r: Ray, t_min: f32, t_max: f32) -> bool {
let mut t_min = t_min; #[cfg(target_arch = "x86_64")]
let mut t_max = t_max; unsafe {
for axis in 0..3 { use std::arch::x86_64::*;
let t0 = ((self.min()[axis] - r.origin[axis]) * r.inv_direction[axis]) let o4 = _mm_set_ps(0., r.origin.z, r.origin.y, r.origin.x);
.min((self.max()[axis] - r.origin[axis]) * r.inv_direction[axis]); let d4 = _mm_set_ps(0., r.direction.z, r.direction.y, r.direction.x);
let t1 = ((self.min()[axis] - r.origin[axis]) * r.inv_direction[axis]) let bmin4 = _mm_set_ps(0., self.min().z, self.min().y, self.min().x);
.max((self.max()[axis] - r.origin[axis]) * r.inv_direction[axis]); let bmax4 = _mm_set_ps(0., self.max().z, self.max().y, self.max().x);
t_min = t0.max(t_min); let mask4 = _mm_cmpeq_ps(_mm_setzero_ps(), _mm_set_ps(1., 0., 0., 0.));
t_max = t1.min(t_max); let t1 = _mm_mul_ps(_mm_sub_ps(_mm_and_ps(bmin4, mask4), o4), d4);
if t_max <= t_min { let t2 = _mm_mul_ps(_mm_sub_ps(_mm_and_ps(bmax4, mask4), o4), d4);
return false; let vmax4 = _mm_max_ps(t1, t2);
} let vmin4 = _mm_min_ps(t1, t2);
let vmax4: (f32, f32, f32, f32) = std::mem::transmute(vmax4);
let vmin4: (f32, f32, f32, f32) = std::mem::transmute(vmin4);
let tmax = min(vmax4.0, min(vmax4.1, vmax4.2));
let tmin = max(vmin4.0, max(vmin4.1, vmin4.2));
//tmax >= tmin && tmin < r.time && tmax > t_min
t_min <= tmin && tmin <= t_max
} }
true
} }
pub fn hit_fast(&self, r: Ray, _t_min: f32, _t_max: f32) -> bool { pub fn hit_fast(&self, r: Ray, _t_min: f32, _t_max: f32) -> bool {
@ -169,3 +202,48 @@ pub fn surrounding_box(box0: &AABB, box1: &AABB) -> AABB {
); );
AABB::new(min, max) AABB::new(min, max)
} }
#[cfg(test)]
mod tests {
use super::*;
macro_rules! hit_test {
($($name:ident,)*) => {
mod hit {
use super::*;
$(
#[test]
fn $name() {
let t_min = 0.001;
let t_max = f32::MAX;
let bb = AABB::new([1., -1., -1.], [3., 1., 1.]);
// Hit
let r = Ray::new([0., 0., 0.], [1., 0., 0.], 0.5);
assert!(bb.$name(r, t_min, t_max));
}
)*
}
mod miss {
use super::*;
$(
#[test]
fn $name() {
let t_min = 0.001;
let t_max = f32::MAX;
let bb = AABB::new([1., -1., -1.], [3., 1., 1.]);
// Miss
let r = Ray::new([0., 0., 0.], [-1., 0., 0.], 0.5);
assert!(!bb.$name(r, t_min, t_max));
}
)*
}
}
}
hit_test! {
hit_naive,
hit2,
hit_fast,
hit_simd,
}
}

View File

@ -77,7 +77,7 @@ where
fn build_bvh(&mut self) { fn build_bvh(&mut self) {
// assign all triangles to root node // assign all triangles to root node
let root = BVHNode { let root = BVHNode {
aabb: AABB::new(0f32.into(), 0f32.into()), aabb: AABB::default(),
left_child: 0, left_child: 0,
first_prim: 0, first_prim: 0,
prim_count: self.triangles.len() - 1, prim_count: self.triangles.len() - 1,
@ -139,13 +139,13 @@ where
let left_child_idx = self.bvh_nodes.len(); let left_child_idx = self.bvh_nodes.len();
let right_child_idx = left_child_idx + 1; let right_child_idx = left_child_idx + 1;
let left = BVHNode { let left = BVHNode {
aabb: AABB::new(0f32.into(), 0f32.into()), aabb: AABB::default(),
left_child: 0, left_child: 0,
first_prim: first_prim, first_prim: first_prim,
prim_count: left_count, prim_count: left_count,
}; };
let right = BVHNode { let right = BVHNode {
aabb: AABB::new(0f32.into(), 0f32.into()), aabb: AABB::default(),
left_child: 0, left_child: 0,
first_prim: i as usize, first_prim: i as usize,
prim_count: prim_count - left_count, prim_count: prim_count - left_count,