camera: make rendering strategy configurable, add workerpool version.

2021-07-18 11:48:12 -07:00
parent 94ea724344
commit 4f88d2c101
3 changed files with 402 additions and 25 deletions
--- a/rtchallenge/src/camera.rs
+++ b/rtchallenge/src/camera.rs
@@ -1,9 +1,33 @@
-use std::sync::Mutex;
+use std::{
+    str::FromStr,
+    sync::{
+        mpsc::{sync_channel, Receiver, SyncSender},
+        {Arc, Mutex},
+    },
+    thread,
+};

 use rayon::iter::{IntoParallelIterator, ParallelIterator};
+use serde::Deserialize;
+use structopt::StructOpt;

 use crate::{canvas::Canvas, matrices::Matrix4x4, rays::Ray, tuples::Tuple, world::World, BLACK};

+#[derive(Copy, Clone, StructOpt, Debug, Deserialize)]
+#[serde(rename_all = "kebab-case")]
+pub enum RenderStrategy {
+    Serial,
+    Rayon,
+    WorkerPool,
+}
+impl FromStr for RenderStrategy {
+    type Err = serde_json::error::Error;
+    fn from_str(s: &str) -> Result<RenderStrategy, serde_json::error::Error> {
+        Ok(serde_json::from_str(&format!("\"{}\"", s))?)
+    }
+}
+
+#[derive(Clone)]
 pub struct Camera {
    hsize: usize,
    vsize: usize,
@@ -13,6 +37,15 @@ pub struct Camera {
    pixel_size: f32,
    half_width: f32,
    half_height: f32,
+    pub render_strategy: RenderStrategy,
+}
+
+enum Request {
+    Line { width: usize, y: usize },
+}
+
+enum Response {
+    Line { y: usize, pixels: Canvas },
 }

 impl Camera {
@@ -59,6 +92,7 @@ impl Camera {
            pixel_size,
            half_height,
            half_width,
+            render_strategy: RenderStrategy::WorkerPool,
        }
    }
    pub fn hsize(&self) -> usize {
@@ -176,31 +210,88 @@ impl Camera {
    /// assert_eq!(image.get(5, 5), Color::new(0.38066, 0.47583, 0.2855));
    /// ```
    pub fn render(&self, w: &World) -> Canvas {
-        self.render_serial(w)
+        use RenderStrategy::*;
+
+        match self.render_strategy {
+            Serial => self.render_serial(w),
+            Rayon => self.render_parallel_rayon(w),
+            WorkerPool => self.render_parallel_one_tread_per_core(w),
+        }
    }

-    #[allow(dead_code)]
-    fn render_parallel_one_tread_per_core(&self, w: &World) -> Canvas {
-        let image_mu = Mutex::new(Canvas::new(self.hsize, self.vsize, BLACK));
+    /// This render function spins up one thread per core, and pins the thread
+    /// to the core.  It then sends work requests to the worker threads,
+    /// requesting a full line of the image by rendered.  The main thread
+    /// collects results and stores them in the canvas returned to the user.
+    fn render_parallel_one_tread_per_core(&self, world: &World) -> Canvas {
+        let mut image = Canvas::new(self.hsize, self.vsize, BLACK);
+        let num_threads = num_cpus::get();
+        let (pixel_req_tx, pixel_req_rx) = sync_channel(2 * num_threads);
+        let (pixel_resp_tx, pixel_resp_rx) = sync_channel(2 * num_threads);
+        let pixel_req_rx = Arc::new(Mutex::new(pixel_req_rx));

-        (0..self.vsize).into_par_iter().for_each(|y| {
-            let mut row_image = Canvas::new(self.hsize, 1, BLACK);
-            for x in 0..self.hsize {
-                let ray = self.ray_for_pixel(x, y);
-                let color = w.color_at(&ray);
-                row_image.set(x, 0, color);
+        // Create copy of world and camera we can share with all workers.
+        // It's probably okay to clone camera, but world could get large (think
+        // textures and high poly count models).
+        // TODO(wathiede): prevent second copy of world when they start getting
+        // large.
+        let world = Arc::new(world.clone());
+        let camera = Arc::new(self.clone());
+
+        let core_ids = core_affinity::get_core_ids().unwrap();
+        println!("Creating {} render threads", core_ids.len());
+        // Create a worker thread for each CPU core and pin the thread to the core.
+        let mut handles = core_ids
+            .into_iter()
+            .enumerate()
+            .map(|(i, id)| {
+                let w = Arc::clone(&world);
+                let c = Arc::clone(&camera);
+                let pixel_req_rx = pixel_req_rx.clone();
+                let pixel_resp_tx = pixel_resp_tx.clone();
+                thread::spawn(move || {
+                    core_affinity::set_for_current(id);
+                    render_worker(i, &c, &w, pixel_req_rx, &pixel_resp_tx);
+                })
+            })
+            .collect::<Vec<_>>();
+        drop(pixel_req_rx);
+        drop(pixel_resp_tx);
+
+        // Send render requests over channels to worker threads.
+        let (w, h) = (camera.hsize, camera.vsize);
+        handles.push(thread::spawn(move || {
+            for y in 0..h {
+                pixel_req_tx
+                    .send(Request::Line { width: w, y })
+                    .expect("failed to send line request");
            }
-            // TODO(wathiede): create a row based setter for memcpying the row as a whole.
-            let mut image = image_mu.lock().expect("failed to lock image mutex");
-            for x in 0..self.hsize {
-                image.set(x, y, row_image.get(x, 0));
+            drop(pixel_req_tx);
+        }));
+
+        // Read responses from channel and blit image data.
+        for resp in pixel_resp_rx {
+            match resp {
+                Response::Line { y, pixels } => {
+                    for x in 0..camera.hsize {
+                        image.set(x, y, pixels.get(x, 0));
+                    }
+                }
            }
-        });
-        image_mu
-            .into_inner()
-            .expect("failed to get image out of mutex")
+        }
+
+        // Wait for all the threads to exit.
+        for thr in handles {
+            thr.join().expect("thread join");
+        }
+        image
    }
-    #[allow(dead_code)]
+
+    /// This renderer use rayon to split each row into a seperate thread.  It
+    /// seems to have really bad performance (only ~6x speedup over serial), and
+    /// the flame graph looks a mess.  A strength over
+    /// `render_parallel_one_tread_per_core` is that it doesn't require `Camera`
+    /// and `World` to be cloneable.
    fn render_parallel_rayon(&self, w: &World) -> Canvas {
        let image_mu = Mutex::new(Canvas::new(self.hsize, self.vsize, BLACK));

@@ -222,6 +313,7 @@ impl Camera {
            .expect("failed to get image out of mutex")
    }

+    /// Reference render implementation from the book.  Single threaded, nothing fancy.
    fn render_serial(&self, w: &World) -> Canvas {
        let mut image = Canvas::new(self.hsize, self.vsize, BLACK);
        for y in 0..self.vsize {
@@ -234,3 +326,34 @@ impl Camera {
        image
    }
 }
+
+fn render_worker(
+    tid: usize,
+    c: &Camera,
+    w: &World,
+    input_chan: Arc<Mutex<Receiver<Request>>>,
+    output_chan: &SyncSender<Response>,
+) {
+    loop {
+        let job = { input_chan.lock().unwrap().recv() };
+        match job {
+            Err(err) => {
+                eprintln!("Shutting down render_worker {}: {}", tid, err);
+                return;
+            }
+            Ok(req) => match req {
+                Request::Line { width, y } => {
+                    let mut pixels = Canvas::new(width, 1, BLACK);
+                    for x in 0..width {
+                        let ray = c.ray_for_pixel(x, y);
+                        let color = w.color_at(&ray);
+                        pixels.set(x, 0, color);
+                    }
+                    output_chan
+                        .send(Response::Line { y, pixels })
+                        .expect("failed to send pixel response");
+                }
+            },
+        }
+    }
+}