From 7344b12a783dd8fb5de38516bc1550ade849c6df Mon Sep 17 00:00:00 2001
From: Michael Lamparski <diagonaldevice@gmail.com>
Date: Thu, 6 Jun 2019 15:39:45 -0400
Subject: [PATCH] add a benchmark for to_vec

I tried a variety of things to optimize this function:

* Replacing usage of get_unchecked with reuse of the remainder returned
  by read_one, so that the stride can be statically known rather than
  having to be looked up. (this is what optimized the old read benchmark)
* Putting an assertion up front to prove that the data vector is long enough.

But whatever I do, performance won't budge.  In the f32 benchmark, a very
hot bounds check still occurs on every read to ensure that the length of
the data is at least 4 bytes.

So I'm adding the benchmark, but leaving the function itself alone.
---
 .gitignore       |  1 +
 benches/bench.rs | 13 +++++++++++++
 2 files changed, 14 insertions(+)
diff --git a/.gitignore b/.gitignore
index baa1df9..183ce61 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 target
 Cargo.lock
 tests/*.npy
+benches/*.npy
diff --git a/benches/bench.rs b/benches/bench.rs
index 9a46809..001641a 100644
--- a/benches/bench.rs
+++ b/benches/bench.rs
@@ -38,6 +38,19 @@ macro_rules! gen_benches {
             });
         }
 
+        #[bench]
+        fn read_to_vec(b: &mut Bencher) {
+            // FIXME: Write to a Cursor<Vec<u8>> once #16 is merged
+            let path = concat!("benches/bench_", stringify!($T), ".npy");
+
+            npy::to_file(path, (0usize..NITER).map($new)).unwrap();
+            let bytes = std::fs::read(path).unwrap();
+
+            b.iter(|| {
+                bb(npy::NpyData::<$T>::from_bytes(&bytes).unwrap().to_vec())
+            });
+        }
+
         #[bench]
         fn write(b: &mut Bencher) {
             b.iter(|| {