add a benchmark for to_vec

I tried a variety of things to optimize this function: * Replacing usage of get_unchecked with reuse of the remainder returned by read_one, so that the stride can be statically known rather than having to be looked up. (this is what optimized the old read benchmark) * Putting an assertion up front to prove that the data vector is long enough. But whatever I do, performance won't budge. In the f32 benchmark, a very hot bounds check still occurs on every read to ensure that the length of the data is at least 4 bytes. So I'm adding the benchmark, but leaving the function itself alone.
potocpav · Jun 6, 2019 · 7344b12 · 7344b12
1 parent d177998
commit 7344b12
Show file tree

Hide file tree

Showing 2 changed files with 14 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
 target
 Cargo.lock
 tests/*.npy
+benches/*.npy
diff --git a/benches/bench.rs b/benches/bench.rs
@@ -38,6 +38,19 @@ macro_rules! gen_benches {
             });
         }
 
+        #[bench]
+        fn read_to_vec(b: &mut Bencher) {
+            // FIXME: Write to a Cursor<Vec<u8>> once #16 is merged
+            let path = concat!("benches/bench_", stringify!($T), ".npy");
+
+            npy::to_file(path, (0usize..NITER).map($new)).unwrap();
+            let bytes = std::fs::read(path).unwrap();
+
+            b.iter(|| {
+                bb(npy::NpyData::<$T>::from_bytes(&bytes).unwrap().to_vec())
+            });
+        }
+
         #[bench]
         fn write(b: &mut Bencher) {
             b.iter(|| {