camera: make rendering strategy configurable, add workerpool version.

This commit is contained in:
2021-07-18 11:48:12 -07:00
parent 94ea724344
commit 4f88d2c101
3 changed files with 402 additions and 25 deletions

View File

@@ -1,9 +1,33 @@
use std::sync::Mutex;
use std::{
str::FromStr,
sync::{
mpsc::{sync_channel, Receiver, SyncSender},
{Arc, Mutex},
},
thread,
};
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use serde::Deserialize;
use structopt::StructOpt;
use crate::{canvas::Canvas, matrices::Matrix4x4, rays::Ray, tuples::Tuple, world::World, BLACK};
#[derive(Copy, Clone, StructOpt, Debug, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub enum RenderStrategy {
Serial,
Rayon,
WorkerPool,
}
impl FromStr for RenderStrategy {
type Err = serde_json::error::Error;
fn from_str(s: &str) -> Result<RenderStrategy, serde_json::error::Error> {
Ok(serde_json::from_str(&format!("\"{}\"", s))?)
}
}
#[derive(Clone)]
pub struct Camera {
hsize: usize,
vsize: usize,
@@ -13,6 +37,15 @@ pub struct Camera {
pixel_size: f32,
half_width: f32,
half_height: f32,
pub render_strategy: RenderStrategy,
}
enum Request {
Line { width: usize, y: usize },
}
enum Response {
Line { y: usize, pixels: Canvas },
}
impl Camera {
@@ -59,6 +92,7 @@ impl Camera {
pixel_size,
half_height,
half_width,
render_strategy: RenderStrategy::WorkerPool,
}
}
pub fn hsize(&self) -> usize {
@@ -176,31 +210,88 @@ impl Camera {
/// assert_eq!(image.get(5, 5), Color::new(0.38066, 0.47583, 0.2855));
/// ```
pub fn render(&self, w: &World) -> Canvas {
self.render_serial(w)
use RenderStrategy::*;
match self.render_strategy {
Serial => self.render_serial(w),
Rayon => self.render_parallel_rayon(w),
WorkerPool => self.render_parallel_one_tread_per_core(w),
}
}
#[allow(dead_code)]
fn render_parallel_one_tread_per_core(&self, w: &World) -> Canvas {
let image_mu = Mutex::new(Canvas::new(self.hsize, self.vsize, BLACK));
/// This render function spins up one thread per core, and pins the thread
/// to the core. It then sends work requests to the worker threads,
/// requesting a full line of the image by rendered. The main thread
/// collects results and stores them in the canvas returned to the user.
fn render_parallel_one_tread_per_core(&self, world: &World) -> Canvas {
let mut image = Canvas::new(self.hsize, self.vsize, BLACK);
let num_threads = num_cpus::get();
let (pixel_req_tx, pixel_req_rx) = sync_channel(2 * num_threads);
let (pixel_resp_tx, pixel_resp_rx) = sync_channel(2 * num_threads);
let pixel_req_rx = Arc::new(Mutex::new(pixel_req_rx));
(0..self.vsize).into_par_iter().for_each(|y| {
let mut row_image = Canvas::new(self.hsize, 1, BLACK);
for x in 0..self.hsize {
let ray = self.ray_for_pixel(x, y);
let color = w.color_at(&ray);
row_image.set(x, 0, color);
// Create copy of world and camera we can share with all workers.
// It's probably okay to clone camera, but world could get large (think
// textures and high poly count models).
// TODO(wathiede): prevent second copy of world when they start getting
// large.
let world = Arc::new(world.clone());
let camera = Arc::new(self.clone());
let core_ids = core_affinity::get_core_ids().unwrap();
println!("Creating {} render threads", core_ids.len());
// Create a worker thread for each CPU core and pin the thread to the core.
let mut handles = core_ids
.into_iter()
.enumerate()
.map(|(i, id)| {
let w = Arc::clone(&world);
let c = Arc::clone(&camera);
let pixel_req_rx = pixel_req_rx.clone();
let pixel_resp_tx = pixel_resp_tx.clone();
thread::spawn(move || {
core_affinity::set_for_current(id);
render_worker(i, &c, &w, pixel_req_rx, &pixel_resp_tx);
})
})
.collect::<Vec<_>>();
drop(pixel_req_rx);
drop(pixel_resp_tx);
// Send render requests over channels to worker threads.
let (w, h) = (camera.hsize, camera.vsize);
handles.push(thread::spawn(move || {
for y in 0..h {
pixel_req_tx
.send(Request::Line { width: w, y })
.expect("failed to send line request");
}
// TODO(wathiede): create a row based setter for memcpying the row as a whole.
let mut image = image_mu.lock().expect("failed to lock image mutex");
for x in 0..self.hsize {
image.set(x, y, row_image.get(x, 0));
drop(pixel_req_tx);
}));
// Read responses from channel and blit image data.
for resp in pixel_resp_rx {
match resp {
Response::Line { y, pixels } => {
for x in 0..camera.hsize {
image.set(x, y, pixels.get(x, 0));
}
}
}
});
image_mu
.into_inner()
.expect("failed to get image out of mutex")
}
// Wait for all the threads to exit.
for thr in handles {
thr.join().expect("thread join");
}
image
}
#[allow(dead_code)]
/// This renderer use rayon to split each row into a seperate thread. It
/// seems to have really bad performance (only ~6x speedup over serial), and
/// the flame graph looks a mess. A strength over
/// `render_parallel_one_tread_per_core` is that it doesn't require `Camera`
/// and `World` to be cloneable.
fn render_parallel_rayon(&self, w: &World) -> Canvas {
let image_mu = Mutex::new(Canvas::new(self.hsize, self.vsize, BLACK));
@@ -222,6 +313,7 @@ impl Camera {
.expect("failed to get image out of mutex")
}
/// Reference render implementation from the book. Single threaded, nothing fancy.
fn render_serial(&self, w: &World) -> Canvas {
let mut image = Canvas::new(self.hsize, self.vsize, BLACK);
for y in 0..self.vsize {
@@ -234,3 +326,34 @@ impl Camera {
image
}
}
fn render_worker(
tid: usize,
c: &Camera,
w: &World,
input_chan: Arc<Mutex<Receiver<Request>>>,
output_chan: &SyncSender<Response>,
) {
loop {
let job = { input_chan.lock().unwrap().recv() };
match job {
Err(err) => {
eprintln!("Shutting down render_worker {}: {}", tid, err);
return;
}
Ok(req) => match req {
Request::Line { width, y } => {
let mut pixels = Canvas::new(width, 1, BLACK);
for x in 0..width {
let ray = c.ray_for_pixel(x, y);
let color = w.color_at(&ray);
pixels.set(x, 0, color);
}
output_chan
.send(Response::Line { y, pixels })
.expect("failed to send pixel response");
}
},
}
}
}