Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Vectorized hash grouping on one column #7316

Draft
wants to merge 192 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
192 commits
Select commit Hold shift + click to select a range
b92e622
Vectorized hash grouping on one column
akuzm Oct 2, 2024
4ce0e99
Merge remote-tracking branch 'origin/main' into HEAD
akuzm Oct 2, 2024
74d4419
benchmark vectorized grouping (2024-10-02 no. 6)
akuzm Oct 2, 2024
baedf7f
fixes
akuzm Oct 2, 2024
35dbd36
benchmark vectorized grouping (2024-10-02 no. 7)
akuzm Oct 2, 2024
74fffd3
some ugly stuff
akuzm Oct 2, 2024
f8db454
benchmark vectorized grouping (2024-10-02 no. 9)
akuzm Oct 2, 2024
00a9d11
someething
akuzm Oct 4, 2024
339f91a
reduce indirections
akuzm Oct 4, 2024
f075589
skip null bitmap words
akuzm Oct 8, 2024
88f325d
cleanup
akuzm Oct 9, 2024
15ab443
crc32
akuzm Oct 9, 2024
ff16ec8
license
akuzm Oct 9, 2024
4291b17
benchmark vectorized hash grouping (2024-10-09 no. 10)
akuzm Oct 9, 2024
795ef6b
test deltadelta changes
akuzm Oct 11, 2024
1fabb22
some speedups and simplehash simplifications
akuzm Oct 11, 2024
717abc4
Revert "test deltadelta changes"
akuzm Oct 11, 2024
b03bd6b
test deltadelta changes
akuzm Oct 11, 2024
166d0e8
work with signed types
akuzm Oct 14, 2024
7f578b4
Revert "work with signed types"
akuzm Oct 14, 2024
e70cb0b
bulk stuff specialized to element type
akuzm Oct 14, 2024
0040844
roll back the delta delta stuff
akuzm Oct 14, 2024
7a9c320
more generic -- started to make some changes
akuzm Oct 14, 2024
694faf6
use simplehash
akuzm Oct 14, 2024
3d05674
cleanup
akuzm Oct 14, 2024
d90a90f
benchmark vectorized hash grouping (simple) (2024-10-14 no. 11)
akuzm Oct 14, 2024
4a93549
add more tests
akuzm Oct 15, 2024
3e06b92
remove modified simplehash
akuzm Oct 15, 2024
a7942ed
offsets
akuzm Oct 15, 2024
6fb517f
cleanup
akuzm Oct 15, 2024
ffb28cf
changelog
akuzm Oct 15, 2024
778ca97
cleanup
akuzm Oct 15, 2024
ef3847a
benchmark vectorized hash grouping (simple) (2024-10-15 no. 12)
akuzm Oct 15, 2024
1409c74
32-bit
akuzm Oct 15, 2024
514ae96
some renames
akuzm Oct 15, 2024
22d23b3
cleanup
akuzm Oct 15, 2024
5756319
renames
akuzm Oct 15, 2024
2361e13
merge
akuzm Oct 15, 2024
3c556e2
Merge remote-tracking branch 'akuzm/hash-simple' into HEAD
akuzm Oct 15, 2024
cd7a1dc
spelling
akuzm Oct 15, 2024
02f8320
Merge remote-tracking branch 'akuzm/hash-simple' into HEAD
akuzm Oct 15, 2024
4c833b7
license
akuzm Oct 15, 2024
4b2b425
separate key storage
akuzm Oct 15, 2024
9996861
specializations and text
akuzm Oct 16, 2024
757353d
hashing fixes and more tests
akuzm Oct 17, 2024
af2c1d1
Merge remote-tracking branch 'origin/main' into HEAD
akuzm Oct 17, 2024
cc05e8d
gcc doesn't like my inline
akuzm Oct 17, 2024
238c743
benchmark vectorized hash grouping (with text) (2024-10-17 no. 13)
akuzm Oct 17, 2024
4c71b9c
fixes
akuzm Oct 18, 2024
24e20ab
benchmark vectorized hash grouping (with text) (2024-10-18 no. 14)
akuzm Oct 18, 2024
ae0f865
benchmark vectorized hash grouping (with text) (2024-10-19 no. 15)
akuzm Oct 19, 2024
2e1cb26
consecutive keys optimization
akuzm Oct 19, 2024
f59c540
cleanup
akuzm Oct 19, 2024
d38ca5c
consecutive keys with hash
akuzm Oct 20, 2024
e12f563
consecutive keys w/o hash
akuzm Oct 20, 2024
fdad239
a stub for serialized hashing policy
akuzm Oct 20, 2024
9716313
format
akuzm Oct 20, 2024
87b2ffc
agg filter planning draft
akuzm Oct 20, 2024
a619abb
some prototyping for vectorized filter clause
akuzm Oct 22, 2024
2340280
separate implementation for no filters
akuzm Oct 22, 2024
a2806ea
fixing the unified bitmap
akuzm Oct 22, 2024
c18ded9
benchmark the unified bitmap (2024-10-22 no. 16)
akuzm Oct 22, 2024
c723a5a
cleanup
akuzm Oct 22, 2024
00910de
cleanup
akuzm Oct 22, 2024
4ab9714
serialized -- half done
akuzm Oct 24, 2024
ebd8755
Fix use-after-free in per-batch vectorized grouping policy
akuzm Oct 24, 2024
d592cf8
Merge remote-tracking branch 'akuzm/partial-context' into HEAD
akuzm Oct 24, 2024
46cea32
refactor null key handling
akuzm Oct 24, 2024
a9c9bd2
benchmark groupagg (2024-10-24 no. 17)
akuzm Oct 24, 2024
6975004
benchmark groupagg (2024-10-24 no. 18)
akuzm Oct 24, 2024
343603b
fixes
akuzm Oct 24, 2024
5854815
benchmark groupagg (2024-10-24 no. 19)
akuzm Oct 24, 2024
43cdee9
serialized policy only
akuzm Oct 25, 2024
bbd6f71
benchmark serialized (2024-10-25 no. 20)
akuzm Oct 25, 2024
46d655b
tests
akuzm Oct 25, 2024
9746d79
explain and cleanups
akuzm Oct 27, 2024
411e933
review comments -- vector agg defs array
akuzm Oct 28, 2024
aca0dca
review comments -- GroupingColumn array
akuzm Oct 28, 2024
49e9396
assorted review fixes
akuzm Oct 28, 2024
b570571
try to generate specializations -- bad
akuzm Oct 28, 2024
3aff8a9
specializations
akuzm Oct 28, 2024
2da8591
benchmark specializations (2024-10-29 no. 1)
akuzm Oct 28, 2024
71b0e81
more specializations
akuzm Oct 28, 2024
4a1cce3
optimizations
akuzm Oct 29, 2024
8ec6eb7
benchmark more optimizations (2024-10-29 no. 2)
akuzm Oct 29, 2024
c2d1b7f
benchmark more optimizations (2024-10-29 no. 3)
akuzm Oct 29, 2024
5b745ce
Merge remote-tracking branch 'origin/main' into HEAD
akuzm Oct 29, 2024
25dc4cd
Refactor vectorized grouping to prepare for hash grouping
akuzm Oct 29, 2024
9df52d3
const
akuzm Oct 29, 2024
ae8b513
forgotten reset
akuzm Oct 29, 2024
ac5bf2e
benchmark more optimizations (2024-10-29 no. 4)
akuzm Oct 29, 2024
b347f7d
Merge remote-tracking branch 'akuzm/vector-refactor' into HEAD
akuzm Oct 29, 2024
78ec5b2
reference REL_16_3 transparent_decompression-*
akuzm Oct 29, 2024
09ea9af
reference REL_15_7 transparent_decompression-*
akuzm Oct 29, 2024
533eff6
reference REL_14_11 transparent_decompression-*
akuzm Oct 29, 2024
1cf8158
reference REL_17_0-6-g4e0864af16 transparent_decompression-*
akuzm Oct 29, 2024
3c33743
clean up the combine function
akuzm Oct 29, 2024
29fd889
refs
akuzm Oct 29, 2024
2554bc4
dict
akuzm Oct 29, 2024
7708193
benchmark dict (2024-10-29 no. 5)
akuzm Oct 29, 2024
bb6c1c6
benchmark vector agg refactoring (2024-10-29 no. 6)
akuzm Oct 29, 2024
06b4593
crazier dict
akuzm Oct 29, 2024
90ec068
benchmark dict (2024-10-29 no. 7)
akuzm Oct 29, 2024
794fb93
to revert
akuzm Oct 30, 2024
75b8513
stuff
akuzm Nov 5, 2024
4dfa5c7
Fix variable resolution in vectorized aggregation planning
akuzm Nov 5, 2024
bc30ab8
changelog
akuzm Nov 5, 2024
69ed75d
typo
akuzm Nov 5, 2024
da0e3de
fix
akuzm Nov 5, 2024
eecd1dd
silence the warning
akuzm Nov 5, 2024
edaa7cb
test
akuzm Nov 6, 2024
a74833f
Update tsl/src/nodes/vector_agg/exec.c
akuzm Nov 6, 2024
baa81b1
review fixes
akuzm Nov 6, 2024
5927e1c
cleanup
akuzm Nov 6, 2024
eddcddc
Merge branch 'main' into resolve
akuzm Nov 7, 2024
45e3fd9
Merge branch 'main' into vector-refactor
akuzm Nov 7, 2024
a5dfada
Merge remote-tracking branch 'origin/main' into HEAD
akuzm Nov 7, 2024
d11fec5
fix
akuzm Nov 7, 2024
f8dc56c
Merge remote-tracking branch 'origin/main' into HEAD
akuzm Nov 7, 2024
9f47912
fix
akuzm Nov 7, 2024
92c6fd3
Update tsl/src/nodes/vector_agg/plan.c
akuzm Nov 12, 2024
893ecdd
more tests
akuzm Nov 13, 2024
ef353f0
Merge remote-tracking branch 'origin/main' into HEAD
akuzm Nov 13, 2024
cbdd913
Merge remote-tracking branch 'origin/main' into HEAD
akuzm Nov 13, 2024
7d83fb0
more cosmetic changes
akuzm Nov 13, 2024
de1b6da
int128
akuzm Nov 13, 2024
b7984ca
review fixes
akuzm Nov 13, 2024
cd5bded
another way to check for int128 support
akuzm Nov 13, 2024
2dd4b7c
fix
akuzm Nov 13, 2024
a06e845
Merge remote-tracking branch 'akuzm/vector-refactor' into HEAD
akuzm Nov 13, 2024
206b7b7
test
akuzm Nov 13, 2024
d5ad761
fixes
akuzm Nov 13, 2024
1992b8e
Merge remote-tracking branch 'origin/main' into HEAD
akuzm Nov 13, 2024
c6eb880
copy
akuzm Nov 13, 2024
0c59d9a
fix
akuzm Nov 13, 2024
e71a015
fixes for pg16
akuzm Nov 14, 2024
d032b27
more edge cases
akuzm Nov 14, 2024
8d9cf38
import the vector var resolution changes
akuzm Nov 14, 2024
8022348
Merge remote-tracking branch 'akuzm/resolve' into HEAD
akuzm Nov 14, 2024
46682d7
Merge remote-tracking branch 'akuzm/vector-refactor' into HEAD
akuzm Nov 14, 2024
4ca989b
test
akuzm Nov 14, 2024
c2ac4db
simplified output key creation
akuzm Nov 14, 2024
95a7c6a
Merge commit '96cbfc4912ed3ff6b0f9e806ac4ac8fdc55c934e' into HEAD
akuzm Nov 14, 2024
54f5efa
Merge tag 'tmp' into HEAD
akuzm Nov 14, 2024
6d03845
Merge remote-tracking branch 'origin/main' into HEAD
akuzm Nov 14, 2024
3b03a5c
fixes
akuzm Nov 14, 2024
9bca972
benchmark hash grouping (2024-11-14 no. 8)
akuzm Nov 14, 2024
b8776f8
abbreviated keys :o
akuzm Nov 14, 2024
5aac91f
abbreviated key for integers
akuzm Nov 14, 2024
6daa0a4
abbreviated keys for ints are slightly slower
akuzm Nov 14, 2024
ed795c1
umash
akuzm Nov 15, 2024
d5cb87d
benchmark hash grouping with umash abbreviated keys (2024-11-15 no. 9)
akuzm Nov 15, 2024
40824ef
fix compilation w/o crc
akuzm Nov 15, 2024
47d384c
shorter abbreviated umash key
akuzm Nov 15, 2024
0c19ff7
don't use bitwise operations on bool
akuzm Nov 16, 2024
010d543
incremental umash digest -- throughput much lower
akuzm Nov 16, 2024
3da89a0
revert incremental umash digest
akuzm Nov 16, 2024
dc2ec93
inline array construction
akuzm Nov 16, 2024
8c95744
stuff
akuzm Nov 16, 2024
5279896
Merge remote-tracking branch 'origin/main' into HEAD
akuzm Nov 18, 2024
05ef083
move the code around
akuzm Nov 18, 2024
c9d63ab
destroy_key is a noop for all strategies anyway
akuzm Nov 18, 2024
96b12f3
move more code around
akuzm Nov 18, 2024
d7b331c
it is very important to keep moving the code around
akuzm Nov 18, 2024
7b191cc
add more files
akuzm Nov 18, 2024
c2b5e18
cleanup
akuzm Nov 18, 2024
dcaefee
Merge remote-tracking branch 'origin/main' into HEAD
akuzm Nov 18, 2024
d420e8b
continue moving the code around
akuzm Nov 18, 2024
478c490
new day, new code moved around
akuzm Nov 19, 2024
9e51c19
Vectorize aggregate FILTER clause
akuzm Nov 19, 2024
b43be70
Optimize array creation in vectorized aggregate functions
akuzm Nov 19, 2024
3f8a872
Put vectorized aggregation results in short-lived memory context
akuzm Nov 19, 2024
3070e6a
add a test
akuzm Nov 19, 2024
a1c333d
everything is so broken
akuzm Nov 19, 2024
89e53c9
check the decompression memory usage as well
akuzm Nov 19, 2024
20b37b0
add the ref
akuzm Nov 19, 2024
e0f9dbd
fix for i386
akuzm Nov 19, 2024
e27444e
Merge remote-tracking branch 'akuzm/partial-context' into HEAD
akuzm Nov 19, 2024
1e582e3
Merge remote-tracking branch 'akuzm/array-state' into HEAD
akuzm Nov 19, 2024
7151ecc
experiments with alignment in serialized strategy
akuzm Nov 19, 2024
89e582e
Revert "experiments with alignment in serialized strategy"
akuzm Nov 19, 2024
a3b8613
new bad filtering of all-matching or no-matching bitmap words
akuzm Nov 21, 2024
074195e
bitmap at the beginning in the serialized policy
akuzm Nov 21, 2024
5892267
return the old no-match bitmap skipping
akuzm Nov 21, 2024
61afdb8
cleanup
akuzm Nov 21, 2024
eca6406
caching of validity bitmap in bools (no effect)
akuzm Nov 21, 2024
a890d61
Revert "caching of validity bitmap in bools (no effect)"
akuzm Nov 21, 2024
aadd86b
allow small overflow in serialized key -- dubious?
akuzm Nov 21, 2024
3e6e728
Revert "allow small overflow in serialized key -- dubious?"
akuzm Nov 26, 2024
480d0fe
Merge remote-tracking branch 'origin/main' into HEAD
akuzm Nov 26, 2024
d8b0d73
Merge remote-tracking branch 'origin/main' into HEAD
akuzm Nov 26, 2024
5243d25
Merge remote-tracking branch 'akuzm/vector-filter' into HEAD
akuzm Nov 26, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/gh_matrix_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def build_debug_config(overrides):
"ignored_tests": default_ignored_tests,
"name": "Debug",
"os": "ubuntu-22.04",
"pg_extra_args": "--enable-debug --enable-cassert --with-llvm LLVM_CONFIG=llvm-config-14",
"pg_extra_args": "CFLAGS=-march=native --enable-debug --enable-cassert --with-llvm LLVM_CONFIG=llvm-config-14",
"pg_extensions": "postgres_fdw test_decoding pageinspect pgstattuple",
"pginstallcheck": True,
"tsdb_build_args": "-DWARNINGS_AS_ERRORS=ON -DREQUIRE_ALL_TESTS=ON",
Expand Down
4 changes: 3 additions & 1 deletion .github/workflows/linux-32bit-build-and-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@ jobs:
CC: clang-14
CXX: clang++-14
DEBIAN_FRONTEND: noninteractive
IGNORES: "append-* transparent_decompression-* transparent_decompress_chunk-* pg_dump telemetry bgw_db_scheduler* hypercore_vacuum"
# vectorized_aggregation has different output on i386 because int8 is by
# reference and currently it cannot be used for vectorized hash grouping.
IGNORES: "append-* transparent_decompression-* transparent_decompress_chunk-* pg_dump telemetry bgw_db_scheduler* hypercore_vacuum vectorized_aggregation"
SKIPS: chunk_adaptive histogram_test-*
EXTENSIONS: "postgres_fdw test_decoding pageinspect pgstattuple"
strategy:
Expand Down
1 change: 1 addition & 0 deletions .unreleased/vectorized-grouping-one-fixed
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Implements: #7341 Vectorized aggregation with grouping by one fixed-size by-value compressed column (such as arithmetic types).
2 changes: 1 addition & 1 deletion scripts/clang_format_all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ SCRIPT_DIR=$(cd "$(dirname $0)" || exit; pwd)
BASE_DIR=$(dirname $SCRIPT_DIR)

find ${BASE_DIR} \( -path "${BASE_DIR}/src/*" -or -path "${BASE_DIR}/test/*" -or -path "${BASE_DIR}/tsl/*" \) \
-and -not \( -path "*/.*" -or -path "*CMake*" \) \
-and -not \( -path "*/.*" -or -path "*CMake*" -or -path "${BASE_DIR}/tsl/src/import/*" \) \
-and \( -name '*.c' -or -name '*.h' \) -print0 | xargs -0 ${SCRIPT_DIR}/clang_format_wrapper.sh -style=file -i
47 changes: 42 additions & 5 deletions tsl/src/compression/arrow_c_data_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -184,25 +184,62 @@ arrow_set_row_validity(uint64 *bitmap, size_t row_number, bool value)
}

/*
* AND two optional arrow validity bitmaps into the given storage.
* Combine the validity bitmaps into the given storage.
*/
static inline const uint64 *
arrow_combine_validity(size_t num_words, uint64 *restrict storage, const uint64 *filter1,
const uint64 *filter2)
const uint64 *filter2, const uint64 *filter3)
{
/*
* Any and all of the filters can be null. For simplicity, move the non-null
* filters to the front.
*/
const uint64 *tmp;
#define SWAP(X, Y) \
tmp = (X); \
(X) = (Y); \
(Y) = tmp;

if (filter2 == NULL)
{
SWAP(filter2, filter3);
}

if (filter1 == NULL)
{
return filter2;
SWAP(filter1, filter2);

if (filter2 == NULL)
{
SWAP(filter2, filter3);
}
}
#undef SWAP

Assert(filter2 == NULL || filter1 != NULL);
Assert(filter3 == NULL || filter2 != NULL);

if (filter2 == NULL)
{
/* Either have one non-null filter, or all of them are null. */
return filter1;
}

for (size_t i = 0; i < num_words; i++)
if (filter3 == NULL)
{
/* Have two non-null filters. */
for (size_t i = 0; i < num_words; i++)
{
storage[i] = filter1[i] & filter2[i];
}
}
else
{
storage[i] = filter1[i] & filter2[i];
/* Have three non-null filters. */
for (size_t i = 0; i < num_words; i++)
{
storage[i] = filter1[i] & filter2[i] & filter3[i];
}
}

return storage;
Expand Down
4 changes: 2 additions & 2 deletions tsl/src/import/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
set(SOURCES "")
target_sources(${PROJECT_NAME} PRIVATE ${SOURCES})
set(SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/umash.c)
target_sources(${TSL_LIBRARY_NAME} PRIVATE ${SOURCES})
Loading
Loading