Merge pull request #273 from aws-solutions/release/v4.0.5

Release v4.0.5
aws-solutions · Oct 29, 2024 · 885146e · 885146e
2 parents 28b94cf + 0294e3b
commit 885146e
Show file tree

Hide file tree

Showing 53 changed files with 7,100 additions and 654 deletions.
diff --git a/.gitignore b/.gitignore
@@ -22,13 +22,13 @@ source/**/.venv**
 source/**/test/__pycache__
 source/**/test/.pytest**
 
-
+# IDE specific config files
+.idea/
 
 
 
 # Unit test / coverage reports
 **/coverage
-**/package
 *coverage
 source/test/coverage-reports/
 **/.venv-test
@@ -55,3 +55,6 @@ urllib*
 # Ignore lib folder within each lambada folder. Only include lib folder at upper level
 /source/**/lib
 !/source/lib
+
+# Build script output from 'poetry export'
+requirements.txt
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,20 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [4.0.5] - 2024-10-24
+
+### Changed
+
+- Add poetry.lock to pin dependency versions for Python code
+- Adapt build scripts to use Poetry for dependency management
+- Replace native Python logger with aws_lambda_powertools logger
+
+## [4.0.4] - 2024-09-23
+
+### Fixed
+- Patched dependency version of `requests` to `2.32.3` to mitigate [CVE-2024-3651](https://nvd.nist.gov/vuln/detail/CVE-2024-3651)
+- Pinned all dependencies to specific versions for reproducable builds and enable security scanning
+- Allow to install latest version of `urllib3` as transitive dependency
 
 ## [4.0.4] - 2024-09-23
 

diff --git a/deployment/build-s3-dist.sh b/deployment/build-s3-dist.sh
@@ -1,5 +1,10 @@
 #!/bin/bash
-# This assumes all of the OS-level configuration has been completed and git repo has already been cloned 
+#
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+
+# This assumes all of the OS-level configuration has been completed and git repo has already been cloned
 # 
 # This script should be run from the repo's deployment directory 
 # cd deployment 
@@ -79,10 +84,21 @@ do
 done
 
 
+  # Check if poetry is available in the shell
+  if command -v poetry >/dev/null 2>&1; then
+    POETRY_COMMAND="poetry"
+  elif [ -n "$POETRY_HOME" ] && [ -x "$POETRY_HOME/bin/poetry" ]; then
+    POETRY_COMMAND="$POETRY_HOME/bin/poetry"
+  else
+    echo "Poetry is not available. Aborting script." >&2
+    exit 1
+  fi
+
 echo "------------------------------------------------------------------------------"
 echo "[Packing] Log Parser"
 echo "------------------------------------------------------------------------------"
 cd "$source_dir"/log_parser || exit 1
+"$POETRY_COMMAND" export --without dev -f requirements.txt --output requirements.txt --without-hashes
 pip3 install -r requirements.txt --target ./package
 cd "$source_dir"/log_parser/package || exit 1
 zip -q -r9 "$build_dist_dir"/log_parser.zip .
@@ -97,6 +113,7 @@ echo "--------------------------------------------------------------------------
 echo "[Packing] Access Handler"
 echo "------------------------------------------------------------------------------"
 cd "$source_dir"/access_handler || exit 1
+"$POETRY_COMMAND" export --without dev -f requirements.txt --output requirements.txt --without-hashes
 pip3 install -r requirements.txt --target ./package
 cd "$source_dir"/access_handler/package || exit 1
 zip -q -r9 "$build_dist_dir"/access_handler.zip .
@@ -111,6 +128,7 @@ echo "--------------------------------------------------------------------------
 echo "[Packing] IP Lists Parser"
 echo "------------------------------------------------------------------------------"
 cd "$source_dir"/reputation_lists_parser || exit 1
+"$POETRY_COMMAND" export --without dev -f requirements.txt --output requirements.txt --without-hashes
 pip3 install -r requirements.txt --target ./package
 cd "$source_dir"/reputation_lists_parser/package || exit 1
 zip -q -r9 "$build_dist_dir"/reputation_lists_parser.zip .
@@ -125,6 +143,7 @@ echo "--------------------------------------------------------------------------
 echo "[Packing] Custom Resource"
 echo "------------------------------------------------------------------------------"
 cd "$source_dir"/custom_resource || exit 1
+"$POETRY_COMMAND" export --without dev -f requirements.txt --output requirements.txt --without-hashes
 pip3 install -r requirements.txt --target ./package
 cd "$source_dir"/custom_resource/package || exit 1
 zip -q -r9 "$build_dist_dir"/custom_resource.zip .
@@ -139,6 +158,7 @@ echo "--------------------------------------------------------------------------
 echo "[Packing] Helper"
 echo "------------------------------------------------------------------------------"
 cd "$source_dir"/helper || exit 1
+"$POETRY_COMMAND" export --without dev -f requirements.txt --output requirements.txt --without-hashes
 pip3 install -r requirements.txt --target ./package
 cd "$source_dir"/helper/package || exit 1
 zip -q -r9 "$build_dist_dir"/helper.zip ./*
@@ -153,6 +173,7 @@ echo "--------------------------------------------------------------------------
 echo "[Packing] Timer"
 echo "------------------------------------------------------------------------------"
 cd "$source_dir"/timer || exit 1
+"$POETRY_COMMAND" export --without dev -f requirements.txt --output requirements.txt --without-hashes
 pip3 install -r requirements.txt --target ./package
 cd "$source_dir"/timer/package || exit 1
 zip -q -r9 "$build_dist_dir"/timer.zip ./*
@@ -169,6 +190,7 @@ echo "--------------------------------------------------------------------------
 echo "[Packing] IP Retention Handler"
 echo "------------------------------------------------------------------------------"
 cd "$source_dir"/ip_retention_handler || exit 1
+"$POETRY_COMMAND" export --without dev -f requirements.txt --output requirements.txt --without-hashes
 pip3 install -r requirements.txt --target ./package
 cd "$source_dir"/ip_retention_handler/package || exit 1
 zip -q -r9 "$build_dist_dir"/ip_retention_handler.zip ./*

diff --git a/deployment/run-unit-tests.sh b/deployment/run-unit-tests.sh
@@ -16,46 +16,41 @@ source_dir="$(cd $template_dir/../source; pwd -P)"
 echo "Current directory: $template_dir"
 echo "Source directory: $source_dir"
 
-setup_python_env() {
-	if [ -d "./.venv-test" ]; then
-		echo "Reusing already setup python venv in ./.venv-test. Delete ./.venv-test if you want a fresh one created."
-		return
-	fi
-	echo "Setting up python venv"
-	python3 -m venv .venv-test
-	echo "Initiating virtual environment"
-	source .venv-test/bin/activate
-	echo "Installing python packages"
-	pip3 install -r requirements.txt --target .
-	pip3 install -r requirements_dev.txt
-	echo "deactivate virtual environment"
-	deactivate
-}
-
 run_python_lambda_test() {
 	lambda_name=$1
 	lambda_description=$2
 	echo "------------------------------------------------------------------------------"
 	echo "[Test] Python Unit Test: $lambda_description"
 	echo "------------------------------------------------------------------------------"
 
-    cd $source_dir/$lambda_name
-    echo "run_python_lambda_test: Current directory: $source_dir/$lambda_name"
+  cd $source_dir/$lambda_name
+  echo "run_python_lambda_test: Current directory: $source_dir/$lambda_name"
 
-    [ "${CLEAN:-true}" = "true" ] && rm -fr .venv-test
+  echo "Installing python packages"
 
-	setup_python_env
+  # Check if poetry is available in the shell
+  if command -v poetry >/dev/null 2>&1; then
+    POETRY_COMMAND="poetry"
+  elif [ -n "$POETRY_HOME" ] && [ -x "$POETRY_HOME/bin/poetry" ]; then
+    POETRY_COMMAND="$POETRY_HOME/bin/poetry"
+  else
+    echo "Poetry is not available. Aborting script." >&2
+    exit 1
+  fi
 
-    echo "Initiating virtual environment"
-	source .venv-test/bin/activate
+  # This creates a virtual environment based on the project name in pyproject.toml.
+  "$POETRY_COMMAND" install
 
-    # Set coverage report path
+  # Activate the virtual environment.
+  source $("$POETRY_COMMAND" env info --path)/bin/activate
+
+  # Set coverage report path
 	mkdir -p $source_dir/test/coverage-reports
 	coverage_report_path=$source_dir/test/coverage-reports/$lambda_name.coverage.xml
 	echo "coverage report path set to $coverage_report_path"
 
-    # Run unit tests with coverage
-    python3 -m pytest --cov --cov-report=term-missing --cov-report "xml:$coverage_report_path"
+  # Run unit tests with coverage
+  python3 -m pytest --cov --cov-report=term-missing --cov-report "xml:$coverage_report_path"
 
 	if [ "$?" = "1" ]; then
 		echo "(deployment/run-unit-tests.sh) ERROR: there is likely output above." 1>&2
@@ -67,11 +62,10 @@ run_python_lambda_test() {
     # absolute paths for source directories, this substitution is used to convert each absolute source directory
     # path to the corresponding project relative path. The $source_dir holds the absolute path for source directory.
 	sed -i -e "s,<source>$source_dir,<source>source,g" $coverage_report_path
-	echo "deactivate virtual environment"
-	deactivate
+
+  deactivate
 
 	if [ "${CLEAN:-true}" = "true" ]; then
-		rm -fr .venv-test
 		# Note: leaving $source_dir/test/coverage-reports to allow further processing of coverage reports
 		rm -fr coverage
 		rm .coverage

diff --git a/source/access_handler/access_handler.py b/source/access_handler/access_handler.py
@@ -1,25 +1,21 @@
-######################################################################################################################
-#  Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.                                           #
-#                                                                                                                    #
-#  Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance    #
-#  with the License. A copy of the License is located at                                                             #
-#                                                                                                                    #
-#      http://www.apache.org/licenses/LICENSE-2.0                                                                    #
-#                                                                                                                    #
-#  or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES #
-#  OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions    #
-#  and limitations under the License.                                                                                #
-######################################################################################################################
+#  Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#  SPDX-License-Identifier: Apache-2.0
 
 import os
-from ipaddress import ip_address
 from ipaddress import IPv4Network
 from ipaddress import IPv6Network
+from ipaddress import ip_address
 from os import environ
-from lib.waflibv2 import WAFLIBv2
-from lib.solution_metrics import send_metrics
+
+from aws_lambda_powertools import Logger
+
 from lib.cw_metrics_util import WAFCloudWatchMetrics
-from lib.logging_util import set_log_level
+from lib.solution_metrics import send_metrics
+from lib.waflibv2 import WAFLIBv2
+
+logger = Logger(
+    level=os.getenv('LOG_LEVEL')
+)
 
 waflib = WAFLIBv2()
 CW_METRIC_PERIOD_SECONDS = 12 * 3600    # Twelve hours in seconds
@@ -38,13 +34,13 @@ def initialize_usage_data():
     return usage_data
 
 
-def get_bad_bot_usage_data(log, scope, cw, ipset_name_v4, ipset_arn_v4, ipset_name_v6, ipset_arn_v6, usage_data):
-    log.info("[get_bad_bot_usage_data] Get bad bot data")
+def get_bad_bot_usage_data(scope, cw, ipset_name_v4, ipset_arn_v4, ipset_name_v6, ipset_arn_v6, usage_data):
+    logger.info("[get_bad_bot_usage_data] Get bad bot data")
 
     if 'IP_SET_ID_BAD_BOTV4' in environ or 'IP_SET_ID_BAD_BOTV6' in environ:
         # Get the count of ipv4 and ipv6 in bad bot ip sets
-        ipv4_count = waflib.get_ip_address_count(log, scope, ipset_name_v4, ipset_arn_v4)
-        ipv6_count = waflib.get_ip_address_count(log, scope, ipset_name_v6, ipset_arn_v6)
+        ipv4_count = waflib.get_ip_address_count(logger, scope, ipset_name_v4, ipset_arn_v4)
+        ipv6_count = waflib.get_ip_address_count(logger, scope, ipset_name_v6, ipset_arn_v6)
         usage_data['bad_bot_ip_set_size'] = str(ipv4_count + ipv6_count)
 
         # Get the count of blocked requests for the bad bot rule from cloudwatch metrics
@@ -59,14 +55,14 @@ def get_bad_bot_usage_data(log, scope, cw, ipset_name_v4, ipset_arn_v4, ipset_na
     return usage_data
 
 
-def send_anonymized_usage_data(log, scope, ipset_name_v4, ipset_arn_v4, ipset_name_v6, ipset_arn_v6):
+def send_anonymized_usage_data(scope, ipset_name_v4, ipset_arn_v4, ipset_name_v6, ipset_arn_v6):
     try:
         if 'SEND_ANONYMIZED_USAGE_DATA' not in environ or os.getenv('SEND_ANONYMIZED_USAGE_DATA').lower() != 'yes':
             return
 
-        log.info("[send_anonymized_usage_data] Start")
+        logger.info("[send_anonymized_usage_data] Start")
 
-        cw = WAFCloudWatchMetrics(log)
+        cw = WAFCloudWatchMetrics(logger)
         usage_data = initialize_usage_data()
 
         # Get the count of allowed requests for all the waf rules from cloudwatch metrics
@@ -90,22 +86,22 @@ def send_anonymized_usage_data(log, scope, ipset_name_v4, ipset_arn_v4, ipset_na
         )
 
         # Get bad bot specific usage data
-        usage_data = get_bad_bot_usage_data(log, scope, cw, ipset_name_v4, ipset_arn_v4,
-            ipset_name_v6, ipset_arn_v6, usage_data)
+        usage_data = get_bad_bot_usage_data(scope, cw, ipset_name_v4, ipset_arn_v4, ipset_name_v6, ipset_arn_v6,
+                                            usage_data)
 
         # Send usage data
-        log.info('[send_anonymized_usage_data] Send usage data: \n{}'.format(usage_data))
+        logger.info('[send_anonymized_usage_data] Send usage data: \n{}'.format(usage_data))
         response = send_metrics(data=usage_data)
         response_code = response.status_code
-        log.info('[send_anonymized_usage_data] Response Code: {}'.format(response_code))
-        log.info("[send_anonymized_usage_data] End")
+        logger.info('[send_anonymized_usage_data] Response Code: {}'.format(response_code))
+        logger.info("[send_anonymized_usage_data] End")
 
     except Exception as error:
-        log.info("[send_anonymized_usage_data] Failed to Send Data")
-        log.error(str(error))
+        logger.info("[send_anonymized_usage_data] Failed to Send Data")
+        logger.error(str(error))
 
 
-def add_ip_to_ip_set(log, scope, ip_type, source_ip, ipset_name, ipset_arn):
+def add_ip_to_ip_set(scope, ip_type, source_ip, ipset_name, ipset_arn):
     new_address = []
     output = None
 
@@ -114,24 +110,24 @@ def add_ip_to_ip_set(log, scope, ip_type, source_ip, ipset_name, ipset_arn):
     elif ip_type == "IPV6":
         new_address.append(IPv6Network(source_ip).with_prefixlen)
 
-    ipset = waflib.get_ip_set(log, scope, ipset_name, ipset_arn)
+    ipset = waflib.get_ip_set(logger, scope, ipset_name, ipset_arn)
     # merge old addresses with this one
-    log.info(ipset)
+    logger.info(ipset)
     current_list = ipset["IPSet"]["Addresses"]
-    log.info(current_list)
+    logger.info(current_list)
     new_list = list(set(current_list) | set(new_address))
-    log.info(new_list)
-    output = waflib.update_ip_set(log, scope, ipset_name, ipset_arn, new_list)
+    logger.info(new_list)
+    output = waflib.update_ip_set(logger, scope, ipset_name, ipset_arn, new_list)
 
     return output
 
 
 # ======================================================================================================================
 # Lambda Entry Point
 # ======================================================================================================================
+@logger.inject_lambda_context
 def lambda_handler(event, _):
-    log = set_log_level()
-    log.info('[lambda_handler] Start')
+    logger.info('[lambda_handler] Start')
 
     # ----------------------------------------------------------
     # Read inputs parameters
@@ -144,30 +140,30 @@ def lambda_handler(event, _):
         ipset_arn_v6 = os.getenv('IP_SET_ID_BAD_BOTV6')
 
         # Fixed as old line had security exposure based on user supplied IP address
-        log.info("Event->%s<-", str(event))
+        logger.info("Event->%s<-", str(event))
         if event['requestContext']['identity']['userAgent'] == 'Amazon CloudFront':
             source_ip = str(event['headers']['X-Forwarded-For'].split(',')[0].strip())
         else:
             source_ip = str(event['requestContext']['identity']['sourceIp'])
 
-        log.info("scope = %s", scope)
-        log.info("ipset_name_v4 = %s", ipset_name_v4)
-        log.info("ipset_name_v6 = %s", ipset_name_v6)
-        log.info("IPARNV4 = %s", ipset_arn_v4)
-        log.info("IPARNV6 = %s", ipset_arn_v6)
-        log.info("source_ip = %s", source_ip)
+        logger.info("scope = %s", scope)
+        logger.info("ipset_name_v4 = %s", ipset_name_v4)
+        logger.info("ipset_name_v6 = %s", ipset_name_v6)
+        logger.info("IPARNV4 = %s", ipset_arn_v4)
+        logger.info("IPARNV6 = %s", ipset_arn_v6)
+        logger.info("source_ip = %s", source_ip)
 
         ip_type = "IPV%s" % ip_address(source_ip).version
         output = None
         if ip_type == "IPV4":
-            output = add_ip_to_ip_set(log, scope, ip_type, source_ip, ipset_name_v4, ipset_arn_v4)
+            output = add_ip_to_ip_set(scope, ip_type, source_ip, ipset_name_v4, ipset_arn_v4)
         elif ip_type == "IPV6":
-            output = add_ip_to_ip_set(log, scope, ip_type, source_ip, ipset_name_v6, ipset_arn_v6)
+            output = add_ip_to_ip_set(scope, ip_type, source_ip, ipset_name_v6, ipset_arn_v6)
     except Exception as e:
-        log.error(e)
+        logger.error(e)
         raise
     finally:
-        log.info("Output->%s<-", output)
+        logger.info("Output->%s<-", output)
         message = "message: [%s] Thanks for the visit." % source_ip
         response = {
             'statusCode': 200,
@@ -176,7 +172,7 @@ def lambda_handler(event, _):
         }
 
     if output is not None:
-        send_anonymized_usage_data(log, scope, ipset_name_v4, ipset_arn_v4, ipset_name_v6, ipset_arn_v6)
-    log.info('[lambda_handler] End')
+        send_anonymized_usage_data(scope, ipset_name_v4, ipset_arn_v4, ipset_name_v6, ipset_arn_v6)
+    logger.info('[lambda_handler] End')
 
     return response