Skip to content

Commit

Permalink
Merge pull request #214 from ashvardanian/main-dev
Browse files Browse the repository at this point in the history
Fused-Multiply-Add
  • Loading branch information
ashvardanian authored Oct 27, 2024
2 parents bb5d63d + 65f8b13 commit 7cea703
Show file tree
Hide file tree
Showing 22 changed files with 5,322 additions and 2,988 deletions.
63 changes: 61 additions & 2 deletions .clang-format
Original file line number Diff line number Diff line change
@@ -1,10 +1,69 @@
Language: Cpp
BasedOnStyle: LLVM
BasedOnStyle: LLVM
IndentWidth: 4
TabWidth: 4
NamespaceIndentation: None
ColumnLimit: 120
ReflowComments: true
UseTab: Never
PointerAlignment: Left

AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
AlignEscapedNewlines: true
AlignOperands: true
AlignTrailingComments: true

AllowAllArgumentsOnNextLine: false
AllowAllConstructorInitializersOnNextLine: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: Always
AllowShortIfStatementsOnASingleLine: Always
AllowShortCaseLabelsOnASingleLine: true
AllowShortFunctionsOnASingleLine: true
AllowShortLambdasOnASingleLine: true
AllowShortLoopsOnASingleLine: true
AlwaysBreakTemplateDeclarations: Yes
AlwaysBreakAfterReturnType: None
PenaltyReturnTypeOnItsOwnLine: 200

BreakBeforeBraces: Custom
BraceWrapping:
AfterCaseLabel: false
AfterClass: false
AfterControlStatement: false
AfterEnum: false
AfterExternBlock: false
AfterFunction: false
AfterStruct: false
AfterNamespace: false
AfterUnion: false
BeforeCatch: true
BeforeElse: true
SplitEmptyFunction: false
SplitEmptyRecord: false
SplitEmptyNamespace: false
IndentBraces: false

SortIncludes: true
SortUsingDeclarations: true

SpaceAfterCStyleCast: false
SpaceAfterLogicalNot: false
SpaceAfterTemplateKeyword: true
SpaceBeforeAssignmentOperators: true
SpaceBeforeCpp11BracedList: true
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceBeforeRangeBasedForLoopColon: true
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 1
SpacesInAngles: false
SpacesInCStyleCastParentheses: false
SpacesInContainerLiterals: false
SpacesInParentheses: false
SpacesInSquareBrackets: false

BinPackArguments: true
BinPackParameters: true
PenaltyBreakBeforeFirstCallParameter: 1
1 change: 1 addition & 0 deletions .git-blame-ignore-revs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
a4022a988287e527757ecc9bc16a4f2e7dc4770e
23 changes: 15 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ project(
simsimd
VERSION 5.8.0
LANGUAGES C CXX
DESCRIPTION "Fastest SIMD-Accelerated Vector Similarity Functions for x86 and Arm"
DESCRIPTION "Portable mixed-precision BLAS-like vector math library for x86 and ARM"
HOMEPAGE_URL "https://github.com/ashvardanian/simsimd"
)

Expand All @@ -16,7 +16,7 @@ set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED YES)
set(CMAKE_CXX_EXTENSIONS NO)

# Determine if StringZilla is built as a subproject (using `add_subdirectory`) or if it is the main project
# Determine if SimSIMD is built as a subproject (using `add_subdirectory`) or if it is the main project
set(SIMSIMD_IS_MAIN_PROJECT OFF)

if (CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
Expand Down Expand Up @@ -79,7 +79,7 @@ if (SIMSIMD_BUILD_BENCHMARKS)
)
FetchContent_MakeAvailable(benchmark)

# Remove the google benchmark built in debug warning
# Remove the Google Benchmark's "built in debug warning"
if (CMAKE_BUILD_TYPE STREQUAL "Release")
target_compile_definitions(benchmark PRIVATE NDEBUG)
endif ()
Expand All @@ -88,12 +88,19 @@ if (SIMSIMD_BUILD_BENCHMARKS)
add_executable(simsimd_bench scripts/bench.cxx)
target_link_libraries(simsimd_bench simsimd Threads::Threads benchmark)

find_package(BLAS)

if (BLAS_FOUND AND SIMSIMD_BUILD_BENCHMARKS_WITH_CBLAS)
target_compile_definitions(simsimd_bench PRIVATE SIMSIMD_BUILD_BENCHMARKS_WITH_CBLAS=1)
target_link_libraries(simsimd_bench ${BLAS_LIBRARIES})
if (SIMSIMD_BUILD_BENCHMARKS_WITH_CBLAS)
find_package(BLAS REQUIRED)
if (BLAS_FOUND)
message(STATUS "BLAS found: ${BLAS_LIBRARIES}")
include_directories(${BLAS_INCLUDE_DIRS})
target_include_directories(simsimd_bench PRIVATE ${BLAS_INCLUDE_DIRS})
target_link_libraries(simsimd_bench ${BLAS_LIBRARIES})
target_compile_definitions(simsimd_bench PRIVATE SIMSIMD_BUILD_BENCHMARKS_WITH_CBLAS=1)
else ()
message(FATAL_ERROR "BLAS not found")
endif ()
endif ()

endif ()

if (SIMSIMD_BUILD_TESTS)
Expand Down
36 changes: 29 additions & 7 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,37 @@ sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 100
sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-12 100
```

On MacOS it's recommended to use Homebrew and install Clang, as opposed to "Apple Clang".
Replacing the default compiler is not recommended, as it may break the system, but you can pass it as an environment variable:
To compile with the default Apple Clang on MacOS, use:

```sh
brew install llvm
cmake -D CMAKE_BUILD_TYPE=Release -D SIMSIMD_BUILD_TESTS=1 \
-D CMAKE_C_COMPILER="$(brew --prefix llvm)/bin/clang" \
-D CMAKE_CXX_COMPILER="$(brew --prefix llvm)/bin/clang++" \
-B build_release
brew install openblas
cmake -D CMAKE_BUILD_TYPE=Release \
-D SIMSIMD_BUILD_TESTS=1 \
-D SIMSIMD_BUILD_BENCHMARKS=1 \
-D SIMSIMD_BUILD_BENCHMARKS_WITH_CBLAS=1 \
-D CMAKE_PREFIX_PATH="$(brew --prefix openblas)" \
-D CMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES="$(brew --prefix openblas)/include" \
-B build_release
cmake --build build_release --config Release
```

On MacOS it's recommended to use Homebrew and install Clang, as opposed to "Apple Clang".
Replacing the default compiler across the entire system is not recommended on MacOS, as it may break the system, but you can pass it as an environment variable:

```sh
brew install llvm openblas
cmake -D CMAKE_BUILD_TYPE=Release \
-D SIMSIMD_BUILD_TESTS=1 \
-D SIMSIMD_BUILD_BENCHMARKS=1 \
-D SIMSIMD_BUILD_BENCHMARKS_WITH_CBLAS=1 \
-D CMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES="$(brew --prefix openblas)/include" \
-D CMAKE_C_LINK_FLAGS="-L$(xcrun --sdk macosx --show-sdk-path)/usr/lib" \
-D CMAKE_EXE_LINKER_FLAGS="-L$(xcrun --sdk macosx --show-sdk-path)/usr/lib" \
-D CMAKE_C_COMPILER="$(brew --prefix llvm)/bin/clang" \
-D CMAKE_CXX_COMPILER="$(brew --prefix llvm)/bin/clang++" \
-D CMAKE_OSX_SYSROOT="$(xcrun --sdk macosx --show-sdk-path)" \
-D CMAKE_OSX_DEPLOYMENT_TARGET=$(sw_vers -productVersion) \
-B build_release
cmake --build build_release --config Release
```

Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "simsimd"
description = "Fastest SIMD-Accelerated Vector Similarity Functions for x86 and Arm"
description = "Portable mixed-precision BLAS-like vector math library for x86 and ARM"
version = "5.8.0"
edition = "2021"
license = "Apache-2.0"
Expand Down
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -729,7 +729,8 @@ To explicitly disable half-precision support, define the following macro before
> But if you are running on different generations of devices, it makes sense to pre-compile the library for all supported generations at once, and dispatch at runtime.
> This flag does just that and is used to produce the `simsimd.so` shared library, as well as the Python and other bindings.
`SIMSIMD_TARGET_ARM` (`SIMSIMD_TARGET_NEON`, `SIMSIMD_TARGET_SVE`, `SIMSIMD_TARGET_SVE2`, `SIMSIMD_TARGET_NEON_F16`, `SIMSIMD_TARGET_SVE_F16`, `SIMSIMD_TARGET_NEON_BF16`, `SIMSIMD_TARGET_SVE_BF16`), `SIMSIMD_TARGET_X86` (`SIMSIMD_TARGET_HASWELL`, `SIMSIMD_TARGET_SKYLAKE`, `SIMSIMD_TARGET_ICE`, `SIMSIMD_TARGET_GENOA`, `SIMSIMD_TARGET_SAPPHIRE`, `SIMSIMD_TARGET_TURIN`, `SIMSIMD_TARGET_SIERRA`):
For Arm: `SIMSIMD_TARGET_NEON`, `SIMSIMD_TARGET_SVE`, `SIMSIMD_TARGET_SVE2`, `SIMSIMD_TARGET_NEON_F16`, `SIMSIMD_TARGET_SVE_F16`, `SIMSIMD_TARGET_NEON_BF16`, `SIMSIMD_TARGET_SVE_BF16`.
For x86: (`SIMSIMD_TARGET_HASWELL`, `SIMSIMD_TARGET_SKYLAKE`, `SIMSIMD_TARGET_ICE`, `SIMSIMD_TARGET_GENOA`, `SIMSIMD_TARGET_SAPPHIRE`, `SIMSIMD_TARGET_TURIN`, `SIMSIMD_TARGET_SIERRA`.
> By default, SimSIMD automatically infers the target architecture and pre-compiles as many kernels as possible.
> In some cases, you may want to explicitly disable some of the kernels.
Expand All @@ -753,6 +754,7 @@ In general there are a few principles that SimSIMD follows:
- Avoid returning from public interfaces, use out-arguments instead.
- Don't over-optimize for old CPUs and single- and double-precision floating-point numbers.
- Prioritize mixed-precision and integer operations, and new ISA extensions.
- Prefer saturated arithmetic and avoid overflows.
Possibly, in the future:
Expand Down
Loading

0 comments on commit 7cea703

Please sign in to comment.