Skip to content

Commit

Permalink
[performance] ParallelGzipReader: Use special case with seek when no …
Browse files Browse the repository at this point in the history
…output file or buffer is given
  • Loading branch information
mxmlnkn committed May 26, 2024
1 parent 6093e82 commit 179752f
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 7 deletions.
4 changes: 4 additions & 0 deletions src/rapidgzip/ParallelGzipReader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -531,6 +531,10 @@ class ParallelGzipReader final :
nBytesDecoded += dataToWriteSize;
};

if ( ( outputFileDescriptor == -1 ) && ( outputBuffer == nullptr ) ) {
/* An empty std::function gives that read method options to optimize, e.g., via seeking. */
return read( WriteFunctor{}, nBytesToRead );
}
return read( writeFunctor, nBytesToRead );
}

Expand Down
12 changes: 5 additions & 7 deletions src/tests/rapidgzip/testParallelGzipReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -671,7 +671,7 @@ testPrefetchingAfterSplit()
reader.setCRC32Enabled( true );

/* Read everything. The data should contain sufficient chunks such that the first ones have been evicted. */
reader.read( -1, nullptr, std::numeric_limits<size_t>::max() );
REQUIRE_EQUAL( reader.read( -1, nullptr, std::numeric_limits<size_t>::max() ), 64_Mi );
REQUIRE_EQUAL( reader.statistics().onDemandFetchCount, 1U );
REQUIRE_EQUAL( reader.tell(), 64_Mi );
REQUIRE_EQUAL( reader.tellCompressed(), compressedRandomDNA.size() * BYTE_SIZE );
Expand All @@ -692,7 +692,7 @@ testPrefetchingAfterSplit()
std::cerr << "File was split into " << reader.blockOffsets().size() - 1 << " chunks\n"; // 70, subject to change

reader2.read( -1, nullptr, std::numeric_limits<size_t>::max() );
REQUIRE_EQUAL( reader2.statistics().onDemandFetchCount, 1U );
REQUIRE_EQUAL( reader2.statistics().onDemandFetchCount, 0U );
}


Expand Down Expand Up @@ -1089,6 +1089,8 @@ main( int argc,
return 1;
}

using namespace std::string_literals;

printClassSizes();

const std::string binaryFilePath( argv[0] );
Expand All @@ -1107,15 +1109,13 @@ main( int argc,
testCRC32AndCleanUnmarkedData();
testPrefetchingAfterSplit();
testCachedChunkReuseAfterSplit();
testParallelDecoderNano();

const auto tmpFolder = createTemporaryDirectory( "rapidgzip.testParallelGzipReader" );

testWindowPruning( tmpFolder );

testPerformance( tmpFolder );

testParallelDecoderNano();

/* The second and last encoded offset should always be at the end of the file, i.e., equal the file size in bits. */
testIndexCreation( rootFolder / "1B.bgz", { { 18 * 8, 0 }, { 60 * 8, 1 } } );
testIndexCreation( rootFolder / "1B.deflate", { { 0, 0 }, { 3 * 8, 1 } } );
Expand All @@ -1126,8 +1126,6 @@ main( int argc,
testIndexCreation( rootFolder / "1B.pigz", { { 13 * 8, 0 }, { 24 * 8, 1 } } );
testIndexCreation( rootFolder / "1B.zlib", { { 2 * 8, 0 }, { 9 * 8, 1 } } );

using namespace std::string_literals;

testChecksummedMultiStreamDecompression( rootFolder / "base64-32KiB.deflate",
rootFolder / "base64-32KiB" );

Expand Down

0 comments on commit 179752f

Please sign in to comment.