diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b2eaa0b..5a0eb77 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -47,10 +47,9 @@ jobs: fail-fast: false matrix: include: - - otp-version: '27.1.1' - - otp-version: '26.2.5.4' + - otp-version: '27.1.2' + - otp-version: '26.2.5.5' - otp-version: '25.3.2.15' - - otp-version: '24.3.4.17' steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Install Erlang/OTP @@ -58,10 +57,10 @@ jobs: with: otp-version: ${{ matrix.otp-version }} rebar3-version: '3.23.0' - - name: Install redis-cli required by common tests + - name: Install packages for common tests uses: awalsh128/cache-apt-pkgs-action@a6c3917cc929dd0345bfb2d3feaf9101823370ad # v1.4.2 with: - packages: redis-server + packages: redis-server faketime version: 1.0 - name: Compile run: rebar3 compile diff --git a/.github/workflows/db-compatibility.yml b/.github/workflows/db-compatibility.yml index 15ac57f..363ddb0 100644 --- a/.github/workflows/db-compatibility.yml +++ b/.github/workflows/db-compatibility.yml @@ -29,7 +29,7 @@ jobs: rebar3-version: '3.23.0' - name: Build and run common tests env: - REDIS_DOCKER_IMAGE: valkey/valkey:${{ matrix.valkey-version }} + SERVER_DOCKER_IMAGE: valkey/valkey:${{ matrix.valkey-version }} run: | rebar3 ct @@ -45,10 +45,10 @@ jobs: - redis-version: 6.2.14 steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - name: Install redis-cli required by common tests + - name: Install packages for common tests uses: awalsh128/cache-apt-pkgs-action@a6c3917cc929dd0345bfb2d3feaf9101823370ad # v1.4.2 with: - packages: redis-server + packages: redis-server faketime version: 1.0 - name: Install Erlang/OTP uses: erlef/setup-beam@5304e04ea2b355f03681464e683d92e3b2f18451 # v1.18.2 @@ -57,6 +57,6 @@ jobs: rebar3-version: '3.23.0' - name: Build and run common tests env: - REDIS_DOCKER_IMAGE: redis:${{ matrix.redis-version }} + SERVER_DOCKER_IMAGE: redis:${{ matrix.redis-version }} run: | rebar3 ct diff --git a/test/ered_SUITE.erl b/test/ered_SUITE.erl index 125c1c9..956ded9 100644 --- a/test/ered_SUITE.erl +++ b/test/ered_SUITE.erl @@ -1,5 +1,7 @@ -module(ered_SUITE). +-include("ered_test_utils.hrl"). + -compile([export_all, nowarn_export_all]). all() -> @@ -31,29 +33,13 @@ all() -> t_client_map ]. --define(MSG(Pattern, Timeout), - receive - Pattern -> ok - after - Timeout -> error({timeout, ??Pattern, erlang:process_info(self(), messages)}) - end). - --define(MSG(Pattern), ?MSG(Pattern, 1000)). - --define(OPTIONAL_MSG(Pattern), - receive - Pattern -> ok - after - 0 -> ok - end). - -define(PORTS, [30001, 30002, 30003, 30004, 30005, 30006]). --define(DEFAULT_REDIS_DOCKER_IMAGE, "redis:6.2.7"). +-define(DEFAULT_SERVER_DOCKER_IMAGE, "valkey/valkey:8.0.1"). init_per_suite(_Config) -> stop_containers(), % just in case there is junk from previous runs - Image = os:getenv("REDIS_DOCKER_IMAGE", ?DEFAULT_REDIS_DOCKER_IMAGE), + Image = os:getenv("SERVER_DOCKER_IMAGE", ?DEFAULT_SERVER_DOCKER_IMAGE), EnableDebugCommand = case Image of "redis:" ++ [N, $. | _] when N >= $1, N < $7 -> ""; % Option does not exist. @@ -81,7 +67,7 @@ init_per_suite(_Config) -> init_per_testcase(_Testcase, Config) -> %% Quick check that cluster is OK; otherwise restart everything. - case catch check_consistent_cluster(?PORTS) of + case catch ered_test_utils:check_consistent_cluster(?PORTS, []) of ok -> []; _ -> @@ -90,7 +76,7 @@ init_per_testcase(_Testcase, Config) -> end. create_cluster() -> - Image = os:getenv("REDIS_DOCKER_IMAGE", ?DEFAULT_REDIS_DOCKER_IMAGE), + Image = os:getenv("SERVER_DOCKER_IMAGE", ?DEFAULT_SERVER_DOCKER_IMAGE), Hosts = [io_lib:format("127.0.0.1:~p ", [P]) || P <- ?PORTS], Cmd = io_lib:format("echo 'yes' | " "docker run --name redis-cluster --rm --net=host -i ~s " @@ -113,35 +99,7 @@ wait_for_consistent_cluster() -> wait_for_consistent_cluster(?PORTS). wait_for_consistent_cluster(Ports) -> - fun Loop(N) -> - case check_consistent_cluster(Ports) of - ok -> - true; - {error, _} when N > 0 -> - timer:sleep(500), - Loop(N-1); - {error, SlotMaps} -> - error({timeout_consistent_cluster, SlotMaps}) - end - end(20). - -check_consistent_cluster(Ports) -> - SlotMaps = [fun(Port) -> - {ok, Pid} = ered_client:start_link("127.0.0.1", Port, []), - {ok, SlotMap} = ered_client:command(Pid, [<<"CLUSTER">>, <<"SLOTS">>]), - ered_client:stop(Pid), - SlotMap - end(P) || P <- Ports], - Consistent = case lists:usort(SlotMaps) of - [SlotMap] -> - Ports =:= [Port || {_Ip, Port} <- ered_lib:slotmap_all_nodes(SlotMap)]; - _NotAllIdentical -> - false - end, - case Consistent of - true -> ok; - false -> {error, SlotMaps} - end. + ered_test_utils:wait_for_consistent_cluster(Ports, []). end_per_suite(_Config) -> stop_containers(). @@ -766,12 +724,11 @@ t_queue_full(_) -> recv({reply, {error, queue_overflow}}, 1000), [ct:pal("~s\n", [os:cmd("redis-cli -p " ++ integer_to_list(Port) ++ " CLIENT UNPAUSE")]) || Port <- Ports], - msg(msg_type, queue_full), - #{reason := master_queue_full} = msg(msg_type, cluster_not_ok), - + ?MSG(#{msg_type := queue_full}), + ?MSG(#{msg_type := cluster_not_ok, reason := master_queue_full}), - msg(msg_type, queue_ok), - msg(msg_type, cluster_ok), + ?MSG(#{msg_type := queue_ok}), + ?MSG(#{msg_type := cluster_ok}), [recv({reply, {ok, <<"PONG">>}}, 1000) || _ <- lists:seq(1,20)], no_more_msgs(), ok. @@ -782,10 +739,10 @@ t_kill_client(_) -> %% KILL will close the TCP connection to the redis client ct:pal("~p\n",[os:cmd("redis-cli -p " ++ integer_to_list(Port) ++ " CLIENT KILL TYPE NORMAL")]), - #{addr := {_, Port}} = msg(msg_type, socket_closed), + ?MSG(#{msg_type := socket_closed, addr := {_, Port}}), %% connection reestablished - #{addr := {_, Port}} = msg(msg_type, connected), + ?MSG(#{msg_type := connected, addr := {_, Port}}), no_more_msgs(). t_new_cluster_master(_) -> @@ -793,7 +750,7 @@ t_new_cluster_master(_) -> {close_wait, 100}]), %% Create new master - Image = os:getenv("REDIS_DOCKER_IMAGE", ?DEFAULT_REDIS_DOCKER_IMAGE), + Image = os:getenv("SERVER_DOCKER_IMAGE", ?DEFAULT_SERVER_DOCKER_IMAGE), Pod = cmd_log("docker run --name redis-30007 -d --net=host --restart=on-failure "++Image++" redis-server --cluster-enabled yes --port 30007 --cluster-node-timeout 2000"), cmd_until("redis-cli -p 30007 CLUSTER MEET 127.0.0.1 30001", "OK"), cmd_until("redis-cli -p 30007 CLUSTER INFO", "cluster_state:ok"), @@ -1019,45 +976,7 @@ move_key(SourcePort, DestPort, Key) -> start_cluster() -> start_cluster([]). start_cluster(Opts) -> - [Port1, Port2 | PortsRest] = Ports = ?PORTS, - InitialNodes = [{"127.0.0.1", Port} || Port <- [Port1, Port2]], - - wait_for_consistent_cluster(), - {ok, P} = ered:start_link(InitialNodes, [{info_pid, [self()]}] ++ Opts), - - ConnectedInit = [#{msg_type := connected} = msg(addr, {"127.0.0.1", Port}) - || Port <- [Port1, Port2]], - - #{slot_map := SlotMap} = msg(msg_type, slot_map_updated, 1000), - - IdMap = maps:from_list(lists:flatmap( - fun([_,_|Nodes]) -> - [{Port, Id} || [_Addr, Port, Id |_]<- Nodes] - end, SlotMap)), - - ConnectedRest = [#{msg_type := connected} = msg(addr, {"127.0.0.1", Port}) - || Port <- PortsRest], - - ClusterIds = [Id || #{cluster_id := Id} <- ConnectedInit ++ ConnectedRest], - ClusterIds = [maps:get(Port, IdMap) || Port <- Ports], - - ?MSG(#{msg_type := cluster_ok}), - - %% Clear all old data - [{ok, _} = ered:command_client(Client, [<<"FLUSHDB">>]) || Client <- ered:get_clients(P)], - - no_more_msgs(), - P. - -msg(Key, Val) -> - msg(Key, Val, 1000). - -msg(Key, Val, Time) -> - receive - M = #{Key := Val} -> M - after Time -> - error({timeout, {Key, Val}, erlang:process_info(self(), messages)}) - end. + ered_test_utils:start_cluster(?PORTS, Opts). recv(Msg, Time) -> receive diff --git a/test/ered_test_utils.erl b/test/ered_test_utils.erl new file mode 100644 index 0000000..69a940e --- /dev/null +++ b/test/ered_test_utils.erl @@ -0,0 +1,84 @@ +-module(ered_test_utils). + +-include("ered_test_utils.hrl"). + +-export([start_cluster/2, + check_consistent_cluster/2, + wait_for_consistent_cluster/2]). + +%% Start a cluster client and wait for cluster_ok. +start_cluster(Ports, Opts) -> + [Port1, Port2 | PortsRest] = Ports, + InitialNodes = [{"127.0.0.1", Port} || Port <- [Port1, Port2]], + + {ok, P} = ered:start_link(InitialNodes, [{info_pid, [self()]}] ++ Opts), + + ConnectedInit = [?MSG(#{msg_type := connected, addr := {"127.0.0.1", Port}}) + || Port <- [Port1, Port2]], + + #{slot_map := SlotMap} = ?MSG(#{msg_type := slot_map_updated}, 1000), + + IdMap = maps:from_list(lists:flatmap( + fun([_,_|Nodes]) -> + [{Port, Id} || [_Addr, Port, Id |_]<- Nodes] + end, SlotMap)), + + ConnectedRest = [#{msg_type := connected} = ?MSG(#{addr := {"127.0.0.1", Port}}) + || Port <- PortsRest], + + ClusterIds = [Id || #{cluster_id := Id} <- ConnectedInit ++ ConnectedRest], + ClusterIds = [maps:get(Port, IdMap) || Port <- Ports], + + ?MSG(#{msg_type := cluster_ok}), + + %% Clear all old data + [{ok, _} = ered:command_client(Client, [<<"FLUSHDB">>]) || Client <- ered:get_clients(P)], + + no_more_msgs(), + P. + +%% Check if all nodes have the same single view of the slot map and that +%% all cluster nodes are included in the slot map. +check_consistent_cluster(Ports, ClientOpts) -> + SlotMaps = [fun(Port) -> + {ok, Pid} = ered_client:start_link("127.0.0.1", Port, ClientOpts), + {ok, SlotMap} = ered_client:command(Pid, [<<"CLUSTER">>, <<"SLOTS">>]), + ered_client:stop(Pid), + SlotMap + end(P) || P <- Ports], + Consistent = case lists:usort(SlotMaps) of + [SlotMap] -> + Ports =:= [Port || {_Ip, Port} <- ered_lib:slotmap_all_nodes(SlotMap)]; + _NotAllIdentical -> + false + end, + case Consistent of + true -> ok; + false -> {error, SlotMaps} + end. + +%% Wait until cluster is consistent, i.e all nodes have the same single view +%% of the slot map and all cluster nodes are included in the slot map. +wait_for_consistent_cluster(Ports, ClientOpts) -> + fun Loop(N) -> + case ered_test_utils:check_consistent_cluster(Ports, ClientOpts) of + ok -> + true; + {error, _} when N > 0 -> + timer:sleep(500), + Loop(N-1); + {error, SlotMaps} -> + error({timeout_consistent_cluster, SlotMaps}) + end + end(20). + +no_more_msgs() -> + {messages,Msgs} = erlang:process_info(self(), messages), + case Msgs of + [] -> + ok; + Msgs -> + error({unexpected,Msgs}) + end. + + diff --git a/test/ered_test_utils.hrl b/test/ered_test_utils.hrl new file mode 100644 index 0000000..19adc39 --- /dev/null +++ b/test/ered_test_utils.hrl @@ -0,0 +1,20 @@ +%% Expect to receive a message within timeout. +-define(MSG(Pattern, Timeout), + (fun () -> + receive + Pattern = M -> M + after + Timeout -> error({timeout, ??Pattern, erlang:process_info(self(), messages)}) + end + end)()). + +%% Expect to receive a message within a second. +-define(MSG(Pattern), ?MSG(Pattern, 1000)). + +%% Check message queue for optional messages. +-define(OPTIONAL_MSG(Pattern), + receive + Pattern -> ok + after + 0 -> ok + end). diff --git a/test/ered_tls_SUITE.erl b/test/ered_tls_SUITE.erl new file mode 100644 index 0000000..1252714 --- /dev/null +++ b/test/ered_tls_SUITE.erl @@ -0,0 +1,192 @@ +-module(ered_tls_SUITE). + +-include("ered_test_utils.hrl"). + +-compile([export_all, nowarn_export_all]). + +all() -> + [t_command, + {group, require_faketime}]. + +groups() -> + %% Tests that require 'faketime' to manipulate the system time. + [{require_faketime, [sequence], + [t_expired_cert_tls_1_2, + t_expired_cert_tls_1_3]}]. + +-define(PORTS, [31001, 31002, 31003, 31004, 31005, 31006]). + +-define(DEFAULT_SERVER_DOCKER_IMAGE, "valkey/valkey:8.0.1"). + +-define(TLS_OPTS, [{cacertfile, "tls/ca.crt"}, + {certfile, "tls/client.crt"}, + {keyfile, "tls/client.key"}, + {verify, verify_peer}, + {server_name_indication, "Server"}]). + +-define(CLIENT_OPTS, [{connection_opts, [{tls_options, ?TLS_OPTS}]}]). + +init_per_suite(_Config) -> + stop_containers(), % just in case there is junk from previous runs + generate_tls_certs(), + start_containers(), + create_cluster(), + ered_test_utils:wait_for_consistent_cluster(?PORTS, ?CLIENT_OPTS), + []. + +end_per_suite(_Config) -> + stop_containers(). + +init_per_group(require_faketime, _Config) -> + case os:find_executable("faketime") of + false -> + {skip, "Executable faketime not found"}; + _ -> + ok + end. + +end_per_group(require_faketime, _Config) -> + ok. + +init_per_testcase(_Testcase, Config) -> + %% Make sure we have a valid client cert. + generate_client_cert(), + + %% Quick check that cluster is OK; otherwise restart everything. + case catch ered_test_utils:check_consistent_cluster(?PORTS, ?CLIENT_OPTS) of + ok -> + []; + _ -> + ct:pal("Re-initialize the cluster"), + init_per_suite(Config) + end. + +generate_tls_certs() -> + filelib:ensure_path("tls/"), + %% Generate CA. + cmd_log("openssl genrsa -out tls/ca.key 4096"), + cmd_log("openssl req -x509 -new -nodes -sha256 -key tls/ca.key -days 3650 -subj '/O=Test/CN=Certificate Authority' -out tls/ca.crt"), + %% Generate server certificate. + cmd_log("openssl genrsa -out tls/server.key 2048"), + cmd_log("openssl req -new -sha256 -key tls/server.key -subj '/O=Test/CN=Server' | " + "openssl x509 -req -sha256 -CA tls/ca.crt -CAkey tls/ca.key -CAserial tls/ca.txt -CAcreateserial -days 1 -out tls/server.crt"), + %% Generate client key and cert. + cmd_log("openssl genrsa -out tls/client.key 2048"), + generate_client_cert(), + %% Let the pods read the key files + cmd_log("chmod 644 tls/*.key"). + +generate_client_cert() -> + cmd_log("openssl req -new -sha256 -key tls/client.key -subj '/O=Test/CN=Client' | " + "openssl x509 -req -sha256 -CA tls/ca.crt -CAkey tls/ca.key -CAserial tls/ca.txt -CAcreateserial -days 1 -out tls/client.crt"), + %% Since OTP caches the content of client.crt we need to clear the cache. + ssl:clear_pem_cache(). + +generate_expired_client_cert() -> + cmd_log("openssl req -new -sha256 -key tls/client.key -subj '/O=Test/CN=Client' | " + "faketime '2020-01-01 10:00:00' " + "openssl x509 -req -sha256 -CA tls/ca.crt -CAkey tls/ca.key -CAserial tls/ca.txt -CAcreateserial -days 1 -out tls/client.crt"), + %% Since OTP caches the content of client.crt we need to clear the cache. + ssl:clear_pem_cache(). + +start_containers() -> + Image = os:getenv("SERVER_DOCKER_IMAGE", ?DEFAULT_SERVER_DOCKER_IMAGE), + EnableDebugCommand = case Image of + "redis:" ++ [N, $. | _] when N >= $1, N < $7 -> + ""; % Option does not exist. + _Redis7 -> + " --enable-debug-command yes" + end, + {ok, Path} = file:get_cwd(), + cmd_log([io_lib:format("docker run --name redis-tls-~p -d --net=host" + " -v ~s/tls:/tls:ro" + " --restart=on-failure ~s redis-server" + "~s" + " --cluster-enabled yes --tls-cluster yes" + " --tls-port ~p --port 0" + " --tls-replication yes" + " --tls-cert-file /tls/server.crt" + " --tls-key-file /tls/server.key" + " --tls-ca-cert-file /tls/ca.crt" + " --cluster-node-timeout 2000;", + [P, Path, Image, EnableDebugCommand, P]) + || P <- ?PORTS]), + + timer:sleep(3000), + lists:foreach(fun(Port) -> + {ok,Pid} = ered_client:start_link("127.0.0.1", Port, ?CLIENT_OPTS), + {ok, <<"PONG">>} = ered_client:command(Pid, [<<"ping">>]), + ered_client:stop(Pid) + end, ?PORTS). + +stop_containers() -> + cmd_log([io_lib:format("docker stop redis-tls-~p; docker rm redis-tls-~p;", [P, P]) + || P <- ?PORTS ++ [cli]]). + +create_cluster() -> + Image = os:getenv("SERVER_DOCKER_IMAGE", ?DEFAULT_SERVER_DOCKER_IMAGE), + Hosts = [io_lib:format("127.0.0.1:~p ", [P]) || P <- ?PORTS], + {ok, Path} = file:get_cwd(), + Cmd = io_lib:format("echo 'yes' | " + "docker run --name redis-tls-cli --rm --net=host -v ~s/tls:/tls:ro -i ~s " + "redis-cli --tls --cacert /tls/ca.crt --cert /tls/server.crt --key /tls/server.key" + " --cluster-replicas 1 --cluster create ~s", + [Path, Image, Hosts]), + cmd_log(Cmd). + +no_more_msgs() -> + {messages,Msgs} = erlang:process_info(self(), messages), + case Msgs of + [] -> + ok; + Msgs -> + error({unexpected,Msgs}) + end. + +cmd_log(Cmd) -> + R = os:cmd(Cmd), + ct:pal("~s\n~s\n", [Cmd, R]), + R. + + +%% Basic test of commands when using TLS. +t_command(_) -> + Opts = [{client_opts, ?CLIENT_OPTS}], + R = ered_test_utils:start_cluster(?PORTS, Opts), + + lists:foreach(fun(N) -> + {ok, <<"OK">>} = ered:command(R, [<<"SET">>, N, N], N) + end, + [integer_to_binary(N) || N <- lists:seq(1,100)]), + no_more_msgs(). + +%% Setup ered using an expired client certificate in TLSv1.2. +t_expired_cert_tls_1_2(_) -> + generate_expired_client_cert(), + + ClientOpts = [{connection_opts, [{tls_options, ?TLS_OPTS ++ [{versions, ['tlsv1.2']}]}]}], + + {ok, _R} = ered:start_link([{"127.0.0.1", 31001}], + [{info_pid, [self()]}, {client_opts, ClientOpts}]), + + ?MSG(#{msg_type := connect_error, addr := {"127.0.0.1", 31001}, + reason := {tls_alert, + {certificate_expired, _}}}), + ?MSG(#{msg_type := node_down_timeout, addr := {"127.0.0.1", 31001}}, 2500), + no_more_msgs(). + +%% Setup ered using an expired client certificate in TLSv1.3. +t_expired_cert_tls_1_3(_) -> + generate_expired_client_cert(), + + ClientOpts = [{connection_opts, [{tls_options, ?TLS_OPTS ++ [{versions, ['tlsv1.3']}]}]}], + + {ok, _R} = ered:start_link([{"127.0.0.1", 31001}], + [{info_pid, [self()]}, {client_opts, ClientOpts}]), + + ?MSG(#{msg_type := socket_closed, addr := {"127.0.0.1", 31001}, + reason := {recv_exit, + {tls_alert, + {certificate_expired, _}}}}), + ?MSG(#{msg_type := node_down_timeout, addr := {"127.0.0.1", 31001}}, 2500), + no_more_msgs().