diff --git a/.env.dist b/.env.dist index 715df3296..5f87655d1 100644 --- a/.env.dist +++ b/.env.dist @@ -136,7 +136,7 @@ RALPH_BACKENDS__DATA__CLICKHOUSE__TEST_TABLE_NAME=test_xapi_events_all # LRS HTTP backend -RALPH_BACKENDS__DATA__LRS__BASE_URL=http://ralph:secret@0.0.0.0:8100/ +RALPH_BACKENDS__DATA__LRS__BASE_URL=http://0.0.0.0:8100/ RALPH_BACKENDS__DATA__LRS__USERNAME=ralph RALPH_BACKENDS__DATA__LRS__PASSWORD=secret RALPH_BACKENDS__DATA__LRS__HEADERS__X_EXPERIENCE_API_VERSION=1.0.3 diff --git a/docs/commands.md b/docs/cli.md similarity index 86% rename from docs/commands.md rename to docs/cli.md index 12616600f..8aeff9510 100644 --- a/docs/commands.md +++ b/docs/cli.md @@ -1,4 +1,4 @@ -# Commands +# CLI ::: mkdocs-click :module: ralph.cli diff --git a/mkdocs.yml b/mkdocs.yml index 526600025..9dae9bedc 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -44,6 +44,7 @@ markdown_extensions: nav: - Ralph: index.md - Features: + - CLI: cli.md - LRS HTTP server: features/api.md - Backends for data storage: features/backends.md - Learning statements models: features/models.md diff --git a/pyproject.toml b/pyproject.toml index 5cc665a00..b904ea67c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -84,6 +84,7 @@ cli = [ "bcrypt>=4.0.0", "click>=8.1.0", "click-option-group>=0.5.0", + "docstring-parser>=0.15", "sentry-sdk[fastapi]>=1.9.0", ] dev = [ diff --git a/src/ralph/backends/data/async_ws.py b/src/ralph/backends/data/async_ws.py index 074f8c7f3..c41fd2964 100644 --- a/src/ralph/backends/data/async_ws.py +++ b/src/ralph/backends/data/async_ws.py @@ -4,7 +4,7 @@ from typing import AsyncIterator, Optional, Union import websockets -from pydantic import AnyUrl, PositiveInt +from pydantic import AnyUrl, PositiveInt, parse_obj_as from websockets.http import USER_AGENT from ralph.backends.data.base import ( @@ -74,7 +74,7 @@ class Config(BaseSettingsConfig): env_prefix = "RALPH_BACKENDS__DATA__WS__" CLIENT_OPTIONS: WSClientOptions = WSClientOptions() - URI: AnyUrl + URI: Optional[AnyUrl] = parse_obj_as(AnyUrl, "ws://localhost:8765") class AsyncWSDataBackend(BaseAsyncDataBackend[WSDataBackendSettings, str]): diff --git a/src/ralph/backends/data/base.py b/src/ralph/backends/data/base.py index 51e7a1bc6..d24351f9c 100644 --- a/src/ralph/backends/data/base.py +++ b/src/ralph/backends/data/base.py @@ -666,8 +666,11 @@ async def _queue_records( await queue.put(None) +DataBackend = TypeVar("DataBackend", BaseDataBackend, BaseAsyncDataBackend) + + def get_backend_generic_argument( - backend_class: Type[Union[BaseDataBackend, BaseAsyncDataBackend]], + backend_class: Type[DataBackend], position: DataBackendArgument, ) -> Optional[Type]: """Return the generic argument of `backend_class` at specified `position`.""" @@ -700,9 +703,7 @@ def get_backend_generic_argument( return None -def set_backend_settings_class( - backend_class: Type[Union[BaseDataBackend, BaseAsyncDataBackend]] -) -> None: +def set_backend_settings_class(backend_class: Type[DataBackend]) -> None: """Set `settings_class` attribute with `Config.env_prefix` for `backend_class`.""" settings_class = get_backend_generic_argument( backend_class, DataBackendArgument.SETTINGS @@ -711,9 +712,7 @@ def set_backend_settings_class( backend_class.settings_class = settings_class -def set_backend_query_class( - backend_class: Type[Union[BaseDataBackend, BaseAsyncDataBackend]] -) -> None: +def set_backend_query_class(backend_class: Type[DataBackend]) -> None: """Set `query_class` attribute for `backend_class`.""" query_class = get_backend_generic_argument(backend_class, DataBackendArgument.QUERY) if query_class: diff --git a/src/ralph/backends/data/mixins.py b/src/ralph/backends/data/mixins.py index 97c282d69..092dedb81 100644 --- a/src/ralph/backends/data/mixins.py +++ b/src/ralph/backends/data/mixins.py @@ -2,6 +2,7 @@ import json import logging +from typing import Callable from ralph.conf import settings @@ -16,7 +17,7 @@ class HistoryMixin: """ @property - def history(self): + def history(self) -> list: """Get backend history.""" logger.debug("Loading history file: %s", str(settings.HISTORY_FILE)) @@ -25,12 +26,12 @@ def history(self): with settings.HISTORY_FILE.open( encoding=settings.LOCALE_ENCODING ) as history_file: - self._history = json.load(history_file) + self._history: list = json.load(history_file) except FileNotFoundError: self._history = [] return self._history - def write_history(self, history): + def write_history(self, history: list) -> None: """Write given history as a JSON file.""" logger.debug("Writing history file: %s", str(settings.HISTORY_FILE)) @@ -45,7 +46,7 @@ def write_history(self, history): # Update history self._history = history - def clean_history(self, selector): + def clean_history(self, selector: Callable[[dict], bool]) -> None: """Clean selected events from the history. selector: a callable that selects events that need to be removed @@ -53,14 +54,14 @@ def clean_history(self, selector): self._history = list(filter(lambda event: not selector(event), self.history)) self.write_history(self._history) - def append_to_history(self, event): + def append_to_history(self, event: dict) -> None: """Append event to history.""" self.write_history(self.history + [event]) - def get_command_history(self, backend_name, command): + def get_command_history(self, backend_name: str, command: str) -> list: """Extract entry ids from the history for a given command and backend_name.""" - def filter_by_name_and_command(entry): + def filter_by_name_and_command(entry: dict) -> bool: """Check whether the history entry matches the backend_name and command.""" return entry.get("backend") == backend_name and ( command in [entry.get("command"), entry.get("action")] diff --git a/src/ralph/cli.py b/src/ralph/cli.py index 9a2aab95a..5c847d7c9 100644 --- a/src/ralph/cli.py +++ b/src/ralph/cli.py @@ -1,18 +1,21 @@ """Ralph CLI entrypoint.""" +import copy import json import logging import re import sys +from functools import partial, update_wrapper from inspect import isasyncgen, isclass from pathlib import Path from tempfile import NamedTemporaryFile -from typing import Any, Callable, Dict, Optional, Type, Union +from typing import Any, Callable, Optional, Type, Union import bcrypt try: import click + from docstring_parser import parse except ModuleNotFoundError as err: raise ModuleNotFoundError( "You need to install 'cli' optional dependencies to use the ralph " @@ -26,13 +29,17 @@ # dependencies are not installed. pass from click_option_group import optgroup +from pydantic import AnyUrl from ralph import __version__ as ralph_version from ralph.backends.data.base import ( AsyncWritable, BaseAsyncDataBackend, + BaseDataBackendSettings, BaseOperationType, BaseQuery, + DataBackend, + Writable, ) from ralph.backends.loader import ( get_cli_backends, @@ -47,7 +54,6 @@ from ralph.models.validator import Validator from ralph.utils import ( execute_async, - get_backend_class, get_backend_instance, get_root_logger, import_string, @@ -182,53 +188,175 @@ def convert(self, value, param, ctx): return options -class RalphCLI(click.Group): +class RalphBackendCLI(click.Group): """Ralph CLI entrypoint.""" - lazy_commands: Dict[str, Callable] = {} - - @classmethod - def lazy_backends_options( - cls, get_backends: Callable, name: Optional[str] = None - ) -> Callable: - """Lazy backend-related options decorator for Ralph commands.""" - - def wrapper(command): - command_name = name or command.__name__ - cls.lazy_commands[command_name] = lambda: backends_options( - get_backends(), command_name - )(command) - return command - - return wrapper - - def invoke(self, ctx: click.Context): - """Configure logging before click calls `list_commands` or `get_command`.""" - configure_logging() - verbosity = ctx.params.get("verbosity") - if verbosity is not None: - level = getattr(logging, verbosity) - get_root_logger().setLevel(level) - for handler in get_root_logger().handlers: - handler.setLevel(level) - return super().invoke(ctx) + def __init__( + self, *args, get_backends: Callable, method: Callable, **kwargs: Any + ) -> None: + """Initialize Ralph backend cli.""" + super().__init__(*args, **kwargs) + self.get_backends = get_backends + self.method = method + self.method_click_params = self.method.__click_params__ + self.method_doc = self.method.__doc__ def list_commands(self, ctx): - """Register all lazy commands before calling `list_commands`.""" - for command in self.lazy_commands.values(): - command() - self.lazy_commands = {} + """Register all sub-commands before calling `list_commands`.""" + for backend_class in self.get_backends().values(): + self.get_command(ctx, backend_class.name) return super().list_commands(ctx) def get_command(self, ctx, cmd_name) -> Union[click.Command, None]: - """Register lazy command (if it is requested) before calling `get_command`.""" - if cmd_name in self.lazy_commands: - self.lazy_commands[cmd_name]() - del self.lazy_commands[cmd_name] - return super().get_command(ctx, cmd_name) + """Register sub-command before calling `get_command`.""" + command = super().get_command(ctx, cmd_name) + if command: + return command + + backend_class = self.get_backends().get(cmd_name) + if backend_class and ctx.command.name: + self.add_backend_sub_command(backend_class, ctx.command.name) + return super().get_command(ctx, cmd_name) + + return command + + def add_backend_read_options( + self, backend_class: Type[DataBackend], method_params: dict + ) -> None: + """Add backend-related options for the `read` command.""" + prefetch = method_params.get("prefetch") + if prefetch: + self.method = click.option( + "-p", + "--prefetch", + type=int, + default=None, + help=prefetch, + )(self.method) + + query_help = method_params.get("query") + if query_help: + if issubclass(backend_class.query_class, BaseQuery): + query_doc = [ + f" {param.arg_name} ({param.type_name}): {param.description}\n" + for param in parse(str(backend_class.query_class.__doc__)).params + ] + query_doc = "\n".join(query_doc) + query_help = f"{query_help}\n\nQUERY Attributes:\n\n{query_doc}" + + if self.method.__doc__: + self.method.__doc__ += f"\n\nQUERY: {query_help}" + + def add_backend_write_options( + self, backend_class: Type[DataBackend], method_params: dict + ) -> None: + """Add backend-related options for the `write` command.""" + if not issubclass(backend_class, (Writable, AsyncWritable)): + return + + operation_type = method_params.get("operation_type") + if operation_type: + choices = [ + op_type.value + for op_type in BaseOperationType + if op_type.value not in backend_class.unsupported_operation_types + ] + self.method = click.option( + "-o", + "--operation-type", + type=click.Choice(choices), + metavar="OP_TYPE", + required=False, + show_default=True, + default=backend_class.default_operation_type.value, + help=operation_type, + )(self.method) + + if method_params.get("concurrency"): + self.method = click.option( + "-c", + "--concurrency", + type=int, + default=1, + help=method_params.get("concurrency"), + )(self.method) + + def add_backend_settings_options( + self, backend_settings: BaseDataBackendSettings + ) -> Callable: + """Add backend-related options from backend settings.""" + settings_params = { + param.arg_name.lower().replace("_", "-"): param.description + for param in parse(str(backend_settings.__doc__)).params + } + fields = backend_settings.__fields__.items() + for name, field in sorted(fields, key=lambda x: x[0], reverse=True): + if name in ["WRITE_CHUNK_SIZE", "READ_CHUNK_SIZE"]: + continue + field_name = name.lower().replace("_", "-") + field_type = field.type_ + option = f"--{field_name}" + option_kwargs = { + "show_default": True, + "default": getattr(backend_settings, name, None), + "required": field.required, + "help": settings_params.get(field_name), + } + if field.default: + option_kwargs["type"] = type(field.default) + # If the field is a boolean, convert it to a flag option + if field_type is bool: + option = f"{option}/--no-{field_name}" + option_kwargs["is_flag"] = True + elif field_type is dict: + option_kwargs["type"] = CommaSeparatedKeyValueParamType() + elif field_type is CommaSeparatedTuple: + option_kwargs["type"] = CommaSeparatedTupleParamType() + elif isclass(field_type): + if issubclass(field_type, ClientOptions): + option_kwargs["type"] = ClientOptionsParamType(field_type) + elif issubclass(field_type, HeadersParameters): + option_kwargs["type"] = HeadersParametersParamType(field_type) + elif issubclass(field_type, AnyUrl): + option_kwargs["type"] = str + elif field_type is Path: + option_kwargs["type"] = click.Path() + + self.method = click.option(option.lower(), **option_kwargs)(self.method) + + def add_backend_sub_command( + self, backend_class: Type[DataBackend], command_name: str + ) -> None: + """Backend-related options decorator for Ralph commands.""" + self.method.__doc__ = copy.copy(self.method_doc) + self.method.__click_params__ = copy.deepcopy(self.method_click_params) + method = getattr(backend_class, command_name, self.method) + method_docstring = parse(str(method.__doc__)) + method_params = { + param.arg_name: param.description for param in method_docstring.params + } + backend_settings = backend_class.settings_class() + for click_param in self.method.__click_params__: + click_param.help = method_params.get(click_param.name) + if click_param.name == "chunk_size": + if command_name == "write": + click_param.default = backend_settings.WRITE_CHUNK_SIZE + click_param.show_default = True + elif command_name == "read": + click_param.default = backend_settings.READ_CHUNK_SIZE + click_param.show_default = True + + self.method.__doc__ = ( + f"{backend_class.__doc__}\n\n{method_docstring.short_description}" + ) + self.add_backend_read_options(backend_class, method_params) + self.add_backend_write_options(backend_class, method_params) + self.add_backend_settings_options(backend_settings) + command = update_wrapper(partial(self.method, backend_class), self.method) + self.add_command(click.command(name=backend_class.name)(command)) -@click.group(name="ralph", cls=RalphCLI) +@click.group(name="ralph") @click.option( "-v", "--verbosity", @@ -238,7 +366,7 @@ def get_command(self, ctx, cmd_name) -> Union[click.Command, None]: help="Either CRITICAL, ERROR, WARNING, INFO (default) or DEBUG", ) @click.version_option(version=ralph_version) -def cli(verbosity=None): # noqa: ARG001 +def cli(verbosity: Optional[str] = None): """The cli is a stream-based tool to play with your logs. It offers functionalities to: @@ -246,55 +374,12 @@ def cli(verbosity=None): # noqa: ARG001 - Read and write learning data to various databases or servers - Manage an instance of a Ralph LRS server """ - - -# Once we have a base backend interface we could use Dict[str, Type[BaseBackend]] -def backends_options(backends: Dict[str, Type], name: Optional[str] = None): - """Backend-related options decorator for Ralph commands.""" - - def wrapper(command): - backend_names = [] - for backend_name, backend in backends.items(): - backend_names.append(backend_name) - fields = backend.settings_class.__fields__.items() - for field_name, field in sorted(fields, key=lambda x: x[0], reverse=True): - field_type = field.type_ - field_name = ( # noqa: PLW2901 - f"{backend_name}-{field_name.lower()}".replace("_", "-") - ) - option = f"--{field_name}" - option_kwargs = {"default": None} - if field.default: - option_kwargs["type"] = type(field.default) - # If the field is a boolean, convert it to a flag option - if field_type is bool: - option = f"{option}/--no-{field_name}" - option_kwargs["is_flag"] = True - elif field_type is dict: - option_kwargs["type"] = CommaSeparatedKeyValueParamType() - elif field_type is CommaSeparatedTuple: - option_kwargs["type"] = CommaSeparatedTupleParamType() - elif isclass(field_type) and issubclass(field_type, ClientOptions): - option_kwargs["type"] = ClientOptionsParamType(field_type) - elif isclass(field_type) and issubclass(field_type, HeadersParameters): - option_kwargs["type"] = HeadersParametersParamType(field_type) - elif field_type is Path: - option_kwargs["type"] = click.Path() - - command = optgroup.option(option.lower(), **option_kwargs)(command) - - command = (optgroup.group(f"{backend_name} backend"))(command) - - command = click.option( - "-b", - "--backend", - type=click.Choice(sorted(backend_names)), - required=True, - help="Backend", - )(command) - return cli.command(name=name or command.__name__)(command) - - return wrapper + configure_logging() + if verbosity is not None: + level = getattr(logging, verbosity) + get_root_logger().setLevel(level) + for handler in get_root_logger().handlers: + handler.setLevel(level) @cli.command() @@ -604,56 +689,38 @@ def convert(from_, to_, ignore_errors, fail_on_unknown, **conversion_set_kwargs) click.echo(event) -@RalphCLI.lazy_backends_options(get_cli_backends) @click.argument("query", required=False) -@click.option( - "-s", - "--chunk-size", - type=int, - default=None, - help="Get events by chunks of size #", -) -@click.option( - "-t", - "--target", - type=str, - default=None, - help="Endpoint from which to read events (e.g. `/statements`)", -) -@click.option( - "-i", - "--ignore_errors", - is_flag=False, - show_default=True, - default=False, - help="Ignore errors during the encoding operation.", -) -def read( - backend, - chunk_size, - target, - query, - ignore_errors, +@click.option("-m", "--max-statements", type=int, default=None) +@click.option("-I", "--ignore-errors", is_flag=True, default=False) +@click.option("-s", "--chunk-size", type=int, default=None) +@click.option("-t", "--target", type=str, default=None) +def _read( # noqa: PLR0913 + backend_class: Type[DataBackend], + query: str, + target: str, + chunk_size: int, + ignore_errors: bool, + max_statements: int, + prefetch: Optional[int] = None, **options, ): """Read records matching the QUERY (json or string) from a configured backend.""" - logger.info( - ( - "Fetching data from the configured %s backend " - "(chunk size: %s | target: %s | query: %s)" - ), - backend, - chunk_size, - target, - query, + logger.info("Reading data from %s backend", backend_class.name) + msg = ( + "(query: %s | target: %s | chunk size: %s | ignore errors: %s | " + "max statements: %s | prefetch: %s)" ) + logger.info(msg, query, target, chunk_size, ignore_errors, max_statements, prefetch) logger.debug("Backend parameters: %s", options) - backend_class = get_backend_class(get_cli_backends(), backend) backend = get_backend_instance(backend_class, options) if query and issubclass(backend.query_class, BaseQuery): - query = backend.query_class.from_string(query) + query: BaseQuery = backend.query_class.from_string(query) + + async_options = {} + if isinstance(backend, BaseAsyncDataBackend): + async_options["prefetch"] = prefetch statements = backend.read( query=query, @@ -661,6 +728,8 @@ def read( chunk_size=chunk_size, raw_output=True, ignore_errors=ignore_errors, + max_statements=max_statements, + **async_options, ) if isinstance(backend, BaseAsyncDataBackend): statements = iter_over_async(statements) @@ -669,57 +738,34 @@ def read( click.echo(statement, nl=False) -@RalphCLI.lazy_backends_options(get_cli_write_backends) -@click.option( - "-t", - "--target", - type=str, - default=None, - help="The target container to write into", -) -@click.option( - "-s", - "--chunk-size", - type=int, - default=None, - help="Get events by chunks of size #", +@cli.group( + cls=RalphBackendCLI, + get_backends=get_cli_backends, + method=_read, + options_metavar="", + subcommand_metavar="BACKEND [OPTIONS] [QUERY]", ) -@click.option( - "-I", - "--ignore-errors", - default=False, - is_flag=True, - help="Continue writing regardless of raised errors", -) -@click.option( - "-o", - "--operation-type", - type=click.Choice([op_type.value for op_type in BaseOperationType]), - metavar="OP_TYPE", - required=False, - help="Either index, create, delete, update or append", -) -@click.option( - "-c", - "--concurrency", - default=1, - help="Number of chunks to write concurrently. (async backends only)", -) -def write( # noqa: PLR0913 - backend, - target, - chunk_size, - ignore_errors, - operation_type, - concurrency, +def read(): + """Read records matching the QUERY (json or string) from a configured backend.""" + + +@click.option("-I", "--ignore-errors", is_flag=True, default=False) +@click.option("-s", "--chunk-size", type=int, default=None) +@click.option("-t", "--target", type=str, default=None) +def _write( # noqa: PLR0913 + backend_class: Type[DataBackend], + target: str, + chunk_size: int, + ignore_errors: bool, + operation_type: str, + concurrency: Optional[int] = None, **options, ): """Write an archive to a configured backend.""" - logger.info("Writing to target %s for the configured %s backend", target, backend) - + logger.info( + "Writing to target %s for the configured %s backend", target, backend_class.name + ) logger.debug("Backend parameters: %s", options) - - backend_class = get_backend_class(get_cli_write_backends(), backend) backend = get_backend_instance(backend_class, options) writer = backend.write @@ -738,36 +784,30 @@ def write( # noqa: PLR0913 ) -@RalphCLI.lazy_backends_options(get_cli_list_backends, name="list") -@click.option( - "-t", - "--target", - type=str, - default=None, - help="Container to list events from", -) -@click.option( - "-n/-a", - "--new/--all", - default=False, - help="List not fetched (or all) documents", +@cli.group( + cls=RalphBackendCLI, + get_backends=get_cli_write_backends, + method=_write, + options_metavar="", + subcommand_metavar="BACKEND [OPTIONS]", ) -@click.option( - "-D/-I", - "--details/--ids", - default=False, - help="Get documents detailed output (JSON)", -) -def list_(target, details, new, backend, **options): +def write(): + """Write data to a configured backend.""" + + +@click.option("-n/-a", "--new/--all", default=False) +@click.option("-D/-I", "--details/--ids", default=False) +@click.option("-t", "--target", type=str, default=None) +def _list( + backend_class: Type[DataBackend], target: str, details: bool, new: bool, **options +): """List available documents from a configured data backend.""" - logger.info("Listing documents for the configured %s backend", backend) + logger.info("Listing documents for the configured %s backend", backend_class.name) logger.debug("Target container: %s", target) logger.debug("Fetch details: %s", str(details)) logger.debug("Backend parameters: %s", options) - backend_class = get_backend_class(get_cli_list_backends(), backend) backend = get_backend_instance(backend_class, options) - documents = backend.list(target=target, details=details, new=new) documents = iter_over_async(documents) if isasyncgen(documents) else documents counter = 0 @@ -779,7 +819,18 @@ def list_(target, details, new, backend, **options): logger.warning("Configured %s backend contains no document", backend.name) -@RalphCLI.lazy_backends_options(get_lrs_backends, name="runserver") +@cli.group( + cls=RalphBackendCLI, + get_backends=get_cli_list_backends, + method=_list, + options_metavar="", + subcommand_metavar="BACKEND [OPTIONS]", + name="list", +) +def list_(): + """List available documents from a configured data backend.""" + + @click.option( "-h", "--host", @@ -796,12 +847,14 @@ def list_(target, details, new, backend, **options): default=settings.RUNSERVER_PORT, help="LRS server port", ) -def runserver(backend: str, host: str, port: int, **options): +def _runserver(backend_class: Type[DataBackend], host: str, port: int, **options): """Run the API server for the development environment. Starts uvicorn programmatically for convenience and documentation. """ - logger.info("Running API server on %s:%s with %s backend", host, port, backend) + logger.info( + "Running API server on %s:%s with %s backend", host, port, backend_class.name + ) logger.info( ( "Do not use runserver in production - start production servers " @@ -814,28 +867,31 @@ def runserver(backend: str, host: str, port: int, **options): # environment variables by creating a temporary environment file and passing it to # uvicorn. with NamedTemporaryFile(mode="w", encoding=settings.LOCALE_ENCODING) as env_file: - env_file.write(f"RALPH_RUNSERVER_BACKEND={backend}\n") - for key, value in options.items(): + env_file.write(f"RALPH_RUNSERVER_BACKEND={backend_class.name}\n") + for option, value in options.items(): if value is None: continue - backend_name, field_name = key.split(sep="_", maxsplit=1) - key = ( # noqa: PLW2901 - f"RALPH_BACKENDS__LRS__{backend_name}__{field_name}".upper() - ) + + key = f"RALPH_BACKENDS__LRS__{backend_class.name}__{option}".upper() if isinstance(value, tuple): value = ",".join(value) # noqa: PLW2901 - if issubclass(type(value), ClientOptions): + + if isinstance(value, ClientOptions): for key_dict, value_dict in value.dict().items(): if value_dict is None: continue + key_dict = f"{key}__{key_dict}" # noqa: PLW2901 logger.debug( "Setting environment variable %s to '%s'", key_dict, value_dict ) env_file.write(f"{key_dict}={value_dict}\n") + continue + logger.debug("Setting environment variable %s to '%s'", key, value) env_file.write(f"{key}={value}\n") + env_file.seek(0) try: uvicorn.run( @@ -853,3 +909,17 @@ def runserver(backend: str, host: str, port: int, **options): ) from error logger.info("Shutting down uvicorn server.") + + +@cli.group( + cls=RalphBackendCLI, + get_backends=get_lrs_backends, + method=_runserver, + options_metavar="", + subcommand_metavar="BACKEND [OPTIONS]", +) +def runserver(): + """Run the API server for the development environment. + + Start uvicorn programmatically for convenience and documentation. + """ diff --git a/src/ralph/utils.py b/src/ralph/utils.py index afcf51879..133a742dd 100644 --- a/src/ralph/utils.py +++ b/src/ralph/utils.py @@ -84,13 +84,10 @@ def get_backend_class(backends: Dict[str, Type], name: str) -> Any: def get_backend_instance(backend_class: Type, options: Dict) -> Any: """Return the instantiated backend given the backend class and options.""" - prefix = f"{backend_class.name}_" # Filter backend-related parameters. Parameter name is supposed to start # with the backend name options = { - name.replace(prefix, "").upper(): value - for name, value in options.items() - if name.startswith(prefix) and value is not None + name.upper(): value for name, value in options.items() if value is not None } return backend_class(backend_class.settings_class(**options)) diff --git a/tests/backends/data/test_async_ws.py b/tests/backends/data/test_async_ws.py index a90574706..c3198050c 100644 --- a/tests/backends/data/test_async_ws.py +++ b/tests/backends/data/test_async_ws.py @@ -6,10 +6,11 @@ import pytest import websockets +from websockets.http import USER_AGENT from ralph.backends.data.async_ws import AsyncWSDataBackend, WSDataBackendSettings from ralph.backends.data.base import DataBackendStatus -from ralph.exceptions import BackendException, BackendParameterException +from ralph.exceptions import BackendException from tests.fixtures.backends import WS_TEST_HOST, WS_TEST_PORT @@ -30,16 +31,21 @@ def test_backends_data_async_ws_default_instantiation(caplog, monkeypatch, fs): assert AsyncWSDataBackend.name == "async_ws" assert AsyncWSDataBackend.settings_class == WSDataBackendSettings - msg = ( - "Failed to instantiate default async data backend settings: " - "1 validation error for WSDataBackendSettings\nURI\n " - "field required (type=value_error.missing)" - ) - with pytest.raises(BackendParameterException, match=re.escape(msg)): - with caplog.at_level(logging.ERROR): - AsyncWSDataBackend() - - assert ("ralph.backends.data.base", logging.ERROR, msg) in caplog.record_tuples + backend = AsyncWSDataBackend() + assert backend.settings.URI == "ws://localhost:8765" + assert backend.settings.CLIENT_OPTIONS.dict() == { + "close_timeout": None, + "compression": "deflate", + "max_size": 2**20, + "max_queue": 2**5, + "open_timeout": 10, + "origin": None, + "ping_interval": 20, + "ping_timeout": 20, + "read_limit": 2**16, + "user_agent_header": USER_AGENT, + "write_limit": 2**16, + } def test_backends_data_async_ws_instantiation_with_settings(monkeypatch): diff --git a/tests/test_cli.py b/tests/test_cli.py index 82bae831a..4d98201ec 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -20,7 +20,7 @@ CommaSeparatedKeyValueParamType, CommaSeparatedTupleParamType, JSONStringParamType, - backends_options, + RalphBackendCLI, cli, ) from ralph.conf import settings @@ -541,14 +541,15 @@ def mock_read(*_, **__): monkeypatch.setattr(LDPDataBackend, "read", mock_read) runner = CliRunner() - command = "read -b ldp --ldp-endpoint ovh-eu a547d9b3-6f2f-4913-a872-cf4efe699a66" + command = "read ldp --endpoint ovh-eu a547d9b3-6f2f-4913-a872-cf4efe699a66" result = runner.invoke(cli, command.split()) assert result.exit_code == 0 assert '{"foo": "bar"}' in result.output + del cli.commands["read"].commands["ldp"] -def test_cli_read_command_with_fs_backend(fs, monkeypatch): +def test_cli_read_command_with_fs_backend(monkeypatch): """Test ralph read command using the FS backend.""" archive_content = {"foo": "bar"} @@ -560,13 +561,15 @@ def mock_read(*_, **__): monkeypatch.setattr(FSDataBackend, "read", mock_read) runner = CliRunner() - result = runner.invoke(cli, "read -b fs foo".split()) + with runner.isolated_filesystem(): + result = runner.invoke(cli, "read fs foo".split()) assert result.exit_code == 0 assert '{"foo": "bar"}' in result.output + del cli.commands["read"].commands["fs"] -def test_cli_read_command_with_chunk_size(fs, monkeypatch): +def test_cli_read_command_with_chunk_size(monkeypatch): """Test ralph `read` command with a `chunk_size` option.""" def get_mock_read_bytes(expected_chunk_size: int): @@ -581,27 +584,34 @@ def mock_read_bytes(self, query, target, chunk_size, *_): return mock_read_bytes - monkeypatch.delenv("RALPH_BACKENDS__DATA__FS__READ_CHUNK_SIZE", raising=False) + # monkeypatch.delenv("RALPH_BACKENDS__DATA__FS__READ_CHUNK_SIZE", raising=False) runner = CliRunner() - - # Given no chunk size, a default chunk size should be used. - monkeypatch.setattr(FSDataBackend, "_read_bytes", get_mock_read_bytes(4096)) - result = runner.invoke(cli, "read -b fs".split()) - assert result.exit_code == 0 - assert '{"foo": "bar"}' in result.output + with runner.isolated_filesystem(): + # Given no chunk size, a default chunk size should be used. + monkeypatch.setattr(FSDataBackend, "_read_bytes", get_mock_read_bytes(4096)) + result = runner.invoke(cli, "read fs".split()) + assert result.exit_code == 0 + assert '{"foo": "bar"}' in result.output # Given a chunk size set by the environment, it should overwrite the default. runner = CliRunner(env={"RALPH_BACKENDS__DATA__FS__READ_CHUNK_SIZE": "3"}) - monkeypatch.setattr(FSDataBackend, "_read_bytes", get_mock_read_bytes(3)) - result = runner.invoke(cli, "read -b fs".split()) - assert result.exit_code == 0 - assert '{"foo": "bar"}' in result.output - - # Given a chunk size set by the chunk-size option, it should overwrite the default. - monkeypatch.setattr(FSDataBackend, "_read_bytes", get_mock_read_bytes(1)) - result = runner.invoke(cli, "read -b fs --chunk-size 1".split()) - assert result.exit_code == 0 - assert '{"foo": "bar"}' in result.output + with runner.isolated_filesystem(): + monkeypatch.setattr(FSDataBackend, "_read_bytes", get_mock_read_bytes(3)) + # NB: Command option defaults are set once. + # To force the CLI to recreate the command with new option defaults + # from environment variables - we delete the previous command. + del cli.commands["read"].commands["fs"] + result = runner.invoke(cli, "read fs".split()) + assert result.exit_code == 0 + assert '{"foo": "bar"}' in result.output + + # Given a chunk size set by the chunk-size option, it should overwrite the + # default. + monkeypatch.setattr(FSDataBackend, "_read_bytes", get_mock_read_bytes(1)) + result = runner.invoke(cli, "read fs --chunk-size 1".split()) + assert result.exit_code == 0 + assert '{"foo": "bar"}' in result.output + del cli.commands["read"].commands["fs"] def test_cli_read_command_with_es_backend(es): @@ -622,9 +632,9 @@ def test_cli_read_command_with_es_backend(es): runner = CliRunner() es_hosts = ",".join(ES_TEST_HOSTS) es_client_options = "verify_certs=True" - command = f"""-v ERROR read -b es --es-hosts {es_hosts} - --es-default-index {ES_TEST_INDEX} - --es-client-options {es_client_options}""" + command = f"""-v ERROR read es --hosts {es_hosts} + --default-index {ES_TEST_INDEX} + --client-options {es_client_options}""" result = runner.invoke(cli, command.split()) assert result.exit_code == 0 expected = ( @@ -646,6 +656,7 @@ def test_cli_read_command_with_es_backend(es): ) assert expected == result.output + del cli.commands["read"].commands["es"] def test_cli_read_command_client_options_with_es_backend(es): @@ -653,7 +664,7 @@ def test_cli_read_command_client_options_with_es_backend(es): runner = CliRunner() es_client_options = "ca_certs=/path/,verify_certs=True" - command = f"""-v ERROR read -b es --es-client-options {es_client_options}""" + command = f"""-v ERROR read es --client-options {es_client_options}""" result = runner.invoke(cli, command.split()) assert result.exit_code == 1 assert "TLS options require scheme to be 'https'" in str(result.exception) @@ -686,9 +697,9 @@ def test_cli_read_command_with_es_backend_query(es): command = ( "-v ERROR " "read " - "-b es " - f"--es-hosts {es_hosts} " - f"--es-default-index {ES_TEST_INDEX} " + "es " + f"--hosts {es_hosts} " + f"--default-index {ES_TEST_INDEX} " f"{query_str}" ) result = runner.invoke(cli, command.split()) @@ -712,11 +723,15 @@ def test_cli_read_command_with_es_backend_query(es): ) assert expected == result.output + del cli.commands["read"].commands["es"] + +def test_cli_read_command_with_mongo_backend_query(): + """Test ralph read command using the mongo backend and a query.""" # Test with an invalid json query string invalid_query_str = "wrong_query_string" - - command = f"-v ERROR read -b mongo {invalid_query_str}" + command = f"-v DEBUG read mongo {invalid_query_str}" + runner = CliRunner() result = runner.invoke(cli, command.split()) assert result.exit_code > 0 assert isinstance(result.exception, BackendParameterException) @@ -727,6 +742,7 @@ def test_cli_read_command_with_es_backend_query(es): "'doc': 'wrong_query_string', 'pos': 0, 'lineno': 1, 'colno': 1}}]" ) assert str(result.exception) == msg + del cli.commands["read"].commands["mongo"] def test_cli_read_command_with_ws_backend(events, ws): @@ -741,8 +757,9 @@ def websocket(): with websocket(): runner = CliRunner() uri = f"ws://{WS_TEST_HOST}:{WS_TEST_PORT}" - result = runner.invoke(cli, ["read", "-b", "async_ws", "--async-ws-uri", uri]) + result = runner.invoke(cli, ["read", "async_ws", "--uri", uri]) assert "\n".join([json.dumps(event) for event in events]) in result.output + del cli.commands["read"].commands["async_ws"] def test_cli_list_command_with_ldp_backend(monkeypatch): @@ -789,12 +806,12 @@ def mock_list(this, target=None, details=False, new=False): runner = CliRunner() # List documents with default options - result = runner.invoke(cli, ["list", "-b", "ldp", "--ldp-endpoint", "ovh-eu"]) + result = runner.invoke(cli, ["list", "ldp", "--endpoint", "ovh-eu"]) assert result.exit_code == 0 assert "\n".join(archive_list) in result.output # List documents with detailed output - result = runner.invoke(cli, ["list", "-b", "ldp", "--ldp-endpoint", "ovh-eu", "-D"]) + result = runner.invoke(cli, ["list", "ldp", "--endpoint", "ovh-eu", "-D"]) assert result.exit_code == 0 assert ( "\n".join(json.dumps(detail) for detail in archive_list_details) @@ -802,20 +819,21 @@ def mock_list(this, target=None, details=False, new=False): ) # List new documents only - result = runner.invoke(cli, ["list", "-b", "ldp", "--ldp-endpoint", "ovh-eu", "-n"]) + result = runner.invoke(cli, ["list", "ldp", "--endpoint", "ovh-eu", "-n"]) assert result.exit_code == 0 assert "997db3eb-b9ca-485d-810f-b530a6cef7c6" in result.output assert "5d5c4c93-04a4-42c5-9860-f51fa4044aa1" not in result.output # Edge case: stream contains no document monkeypatch.setattr(LDPDataBackend, "list", lambda this, target, details, new: ()) - result = runner.invoke(cli, ["list", "-b", "ldp", "--ldp-endpoint", "ovh-eu"]) + result = runner.invoke(cli, ["list", "ldp", "--endpoint", "ovh-eu"]) assert result.exit_code == 0 assert "Configured ldp backend contains no document" in result.output + del cli.commands["list"].commands["ldp"] -def test_cli_list_command_with_fs_backend(fs, monkeypatch): - """Test ralph list command using the LDP backend.""" +def test_cli_list_command_with_fs_backend(monkeypatch): + """Test ralph list command using the fs backend.""" archive_list = [ "file1", "file2", @@ -834,7 +852,7 @@ def test_cli_list_command_with_fs_backend(fs, monkeypatch): ] def mock_list(this, target=None, details=False, new=False): - """Mock LDP backend list method.""" + """Mock FS backend list method.""" response = archive_list if details: @@ -846,82 +864,72 @@ def mock_list(this, target=None, details=False, new=False): monkeypatch.setattr(FSDataBackend, "list", mock_list) runner = CliRunner() + with runner.isolated_filesystem(): + # List documents with default options + result = runner.invoke(cli, ["list", "fs"]) + assert result.exit_code == 0 + assert "\n".join(archive_list) in result.output + + # List documents with detailed output + result = runner.invoke(cli, ["list", "fs", "-D"]) + assert result.exit_code == 0 + assert ( + "\n".join(json.dumps(detail) for detail in archive_list_details) + in result.output + ) - # List documents with default options - result = runner.invoke(cli, ["list", "-b", "fs"]) - assert result.exit_code == 0 - assert "\n".join(archive_list) in result.output - - # List documents with detailed output - result = runner.invoke(cli, ["list", "-b", "fs", "-D"]) - assert result.exit_code == 0 - assert ( - "\n".join(json.dumps(detail) for detail in archive_list_details) - in result.output - ) - - # List new documents only - result = runner.invoke(cli, ["list", "-b", "fs", "-n"]) - assert result.exit_code == 0 - assert "file2" in result.output - assert "file1" not in result.output + # List new documents only + result = runner.invoke(cli, ["list", "fs", "-n"]) + assert result.exit_code == 0 + assert "file2" in result.output + assert "file1" not in result.output - # Edge case: stream contains no document - monkeypatch.setattr(FSDataBackend, "list", lambda this, target, details, new: ()) - result = runner.invoke(cli, ["list", "-b", "fs"]) - assert result.exit_code == 0 - assert "Configured fs backend contains no document" in result.output + # Edge case: stream contains no document + monkeypatch.setattr(FSDataBackend, "list", lambda *args, **kwargs: ()) + result = runner.invoke(cli, ["list", "fs"]) + assert result.exit_code == 0 + assert "Configured fs backend contains no document" in result.output + del cli.commands["list"].commands["fs"] -def test_cli_write_command_with_fs_backend(fs): +def test_cli_write_command_with_fs_backend(): """Test ralph write command using the FS backend.""" - fs.create_dir(str(settings.APP_DIR)) - fs.create_dir("foo") - - filename = Path("foo/file1") - + filename = Path("file1") # Create a file runner = CliRunner() - result = runner.invoke( - cli, - "write -b fs -t file1 --fs-default-directory-path foo".split(), - input=b"test content", - ) + with runner.isolated_filesystem(): + result = runner.invoke(cli, "write fs -t file1".split(), input=b"test content") - assert result.exit_code == 0 + assert result.exit_code == 0 - with filename.open("rb") as test_file: - content = test_file.read() + with filename.open("rb") as test_file: + content = test_file.read() - assert b"test content" in content + assert b"test content" in content - # Trying to create the same file without -f should raise an error - runner = CliRunner() - result = runner.invoke( - cli, - "write -b fs -t file1 --fs-default-directory-path foo".split(), - input=b"other content", - ) - assert result.exit_code == 1 - assert "file1 already exists and overwrite is not allowed" in result.output + # Trying to create the same file without -f should raise an error + result = runner.invoke(cli, "write fs -t file1".split(), input=b"other content") + assert result.exit_code == 1 + assert "file1 already exists and overwrite is not allowed" in result.output - # Try to create the same file with -o update - runner = CliRunner() - result = runner.invoke( - cli, - "write -b fs -t file1 -o update --fs-default-directory-path foo".split(), - input=b"other content", - ) + # Try to create the same file with -o update + runner = CliRunner() + result = runner.invoke( + cli, + "write fs -t file1 -o update".split(), + input=b"other content", + ) - assert result.exit_code == 0 + assert result.exit_code == 0 - with filename.open("rb") as test_file: - content = test_file.read() + with filename.open("rb") as test_file: + content = test_file.read() - assert b"other content" in content + assert b"other content" in content + del cli.commands["write"].commands["fs"] -def test_cli_write_command_with_chunk_size(fs, monkeypatch): +def test_cli_write_command_with_chunk_size(monkeypatch): """Test ralph `write` command with a `chunk_size` option.""" def get_mock_write_bytes(expected_chunk_size: int): @@ -936,22 +944,28 @@ def mock_write_bytes(self, query, target, chunk_size, *_): monkeypatch.delenv("RALPH_BACKENDS__DATA__FS__WRITE_CHUNK_SIZE", raising=False) runner = CliRunner() - - # Given no chunk size, a default chunk size should be used. - monkeypatch.setattr(FSDataBackend, "_write_bytes", get_mock_write_bytes(4096)) - result = runner.invoke(cli, "write -b fs".split()) - assert result.exit_code == 0 - - # Given a chunk size set by the environment, it should overwrite the default. - runner = CliRunner(env={"RALPH_BACKENDS__DATA__FS__WRITE_CHUNK_SIZE": "3"}) - monkeypatch.setattr(FSDataBackend, "_write_bytes", get_mock_write_bytes(3)) - result = runner.invoke(cli, "write -b fs".split()) - assert result.exit_code == 0 - - # Given a chunk size set by the chunk-size option, it should overwrite the default. - monkeypatch.setattr(FSDataBackend, "_write_bytes", get_mock_write_bytes(1)) - result = runner.invoke(cli, "write -b fs --chunk-size 1".split()) - assert result.exit_code == 0 + with runner.isolated_filesystem(): + # Given no chunk size, a default chunk size should be used. + monkeypatch.setattr(FSDataBackend, "_write_bytes", get_mock_write_bytes(4096)) + result = runner.invoke(cli, "write fs".split()) + assert result.exit_code == 0 + + # Given a chunk size set by the environment, it should overwrite the default. + runner = CliRunner(env={"RALPH_BACKENDS__DATA__FS__WRITE_CHUNK_SIZE": "3"}) + monkeypatch.setattr(FSDataBackend, "_write_bytes", get_mock_write_bytes(3)) + # NB: Command option defaults are set once. + # To force the CLI to recreate the command with new option defaults + # from environment variables - we delete the previous command. + del cli.commands["write"].commands["fs"] + result = runner.invoke(cli, "write fs".split()) + assert result.exit_code == 0 + + # Given a chunk size set by the chunk-size option, it should overwrite the + # default. + monkeypatch.setattr(FSDataBackend, "_write_bytes", get_mock_write_bytes(1)) + result = runner.invoke(cli, "write fs --chunk-size 1".split()) + assert result.exit_code == 0 + del cli.commands["write"].commands["fs"] def test_cli_write_command_with_es_backend(es): @@ -964,7 +978,7 @@ def test_cli_write_command_with_es_backend(es): es_hosts = ",".join(ES_TEST_HOSTS) result = runner.invoke( cli, - f"write -b es --es-hosts {es_hosts} --es-default-index {ES_TEST_INDEX}".split(), + f"write es --hosts {es_hosts} --default-index {ES_TEST_INDEX}".split(), input="\n".join(json.dumps(record) for record in records), ) assert result.exit_code == 0 @@ -976,6 +990,7 @@ def test_cli_write_command_with_es_backend(es): assert len(documents) == 10 assert [document.get("_source") for document in documents] == records + del cli.commands["write"].commands["es"] @pytest.mark.parametrize("host_,port_", [("0.0.0.0", "8000"), ("127.0.0.1", "80")]) @@ -990,9 +1005,10 @@ def mock_uvicorn_run(_, host=None, port=None, **kwargs): monkeypatch.setattr("ralph.cli.uvicorn.run", mock_uvicorn_run) runner = CliRunner() - result = runner.invoke(cli, f"runserver -h {host_} -p {port_} -b es".split()) + result = runner.invoke(cli, f"runserver es -h {host_} -p {port_}".split()) assert result.exit_code == 0 assert f"Running API server on {host_}:{port_} with es backend" in result.output + del cli.commands["runserver"].commands["es"] def test_cli_runserver_command_environment_file_generation(monkeypatch): @@ -1001,21 +1017,27 @@ def test_cli_runserver_command_environment_file_generation(monkeypatch): def mock_uvicorn_run(_, env_file=None, **kwargs): """Mock uvicorn.run asserting environment file content.""" expected_env_lines = [ - f"RALPH_RUNSERVER_BACKEND={settings.RUNSERVER_BACKEND}\n", + "RALPH_RUNSERVER_BACKEND=es\n", "RALPH_BACKENDS__LRS__ES__DEFAULT_INDEX=foo\n", "RALPH_BACKENDS__LRS__ES__CLIENT_OPTIONS__verify_certs=True\n", + "RALPH_BACKENDS__LRS__ES__ALLOW_YELLOW_STATUS=False\n", + "RALPH_BACKENDS__LRS__ES__HOSTS=http://elasticsearch:9200\n", + "RALPH_BACKENDS__LRS__ES__LOCALE_ENCODING=utf8\n", + "RALPH_BACKENDS__LRS__ES__POINT_IN_TIME_KEEP_ALIVE=1m\n", ] with open(env_file, mode="r", encoding=settings.LOCALE_ENCODING) as file: assert file.readlines() == expected_env_lines monkeypatch.setattr("ralph.cli.uvicorn.run", mock_uvicorn_run) runner = CliRunner() - result = runner.invoke( - cli, - "runserver -b es --es-default-index foo " - "--es-client-options verify_certs=True".split(), - ) + with runner.isolated_filesystem(): + result = runner.invoke( + cli, + "runserver es --default-index foo " + "--client-options verify_certs=True".split(), + ) assert result.exit_code == 0 + del cli.commands["runserver"].commands["es"] def test_cli_ralph_cli_lazy_backend_options(monkeypatch): @@ -1024,21 +1046,31 @@ def test_cli_ralph_cli_lazy_backend_options(monkeypatch): reload(cli_module) call_counter = {"count": 0} - def mock_backends_options(backends, name=None): + def mock_add_backend_sub_command(self, *args): call_counter["count"] += 1 - return backends_options(backends, name) + return RalphBackendCLI.add_backend_sub_command(self, *args) - monkeypatch.setattr("ralph.cli.backends_options", mock_backends_options) + monkeypatch.setattr( + "ralph.cli.RalphBackendCLI.add_backend_sub_command", + mock_add_backend_sub_command, + ) runner = CliRunner() # Given a command that does not require backend options, the `backend_options` # function should not be called. runner.invoke(cli_module.cli, ["convert --help"]) assert not call_counter["count"] + runner.invoke(cli_module.cli, ["--help"]) + assert not call_counter["count"] # Given a command that requires backend options, the `backend_options` function - # should be called once. + # should be called once for each backend. runner.invoke(cli_module.cli, ["list", "--help"]) - assert call_counter["count"] == 1 - # Given a command that requires backend options of multiple commands, the - # `backend_options` function should be called once for each command. - runner.invoke(cli_module.cli, ["--help"]) - assert call_counter["count"] == 4 # list + (read, write, runserver) + assert call_counter["count"] == 9 + call_counter["count"] = 0 + runner.invoke(cli_module.cli, ["read", "--help"]) + assert call_counter["count"] == 12 + call_counter["count"] = 0 + runner.invoke(cli_module.cli, ["write", "--help"]) + assert call_counter["count"] == 10 + call_counter["count"] = 0 + runner.invoke(cli_module.cli, ["runserver", "--help"]) + assert call_counter["count"] == 6 diff --git a/tests/test_cli_list_usage.py b/tests/test_cli_list_usage.py new file mode 100644 index 000000000..ed07b36d4 --- /dev/null +++ b/tests/test_cli_list_usage.py @@ -0,0 +1,387 @@ +"""Tests for Ralph cli read usage strings.""" + +import re + +from click.testing import CliRunner + +from ralph.cli import cli + +from tests.fixtures.backends import ( + CLICKHOUSE_TEST_HOST, + ES_TEST_HOSTS, + MONGO_TEST_COLLECTION, + MONGO_TEST_CONNECTION_URI, +) + + +def test_cli_list_command_usage(): + """Test `ralph list` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "list --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph list BACKEND [OPTIONS] + + List available documents from a configured data backend. + + Options: + --help Show this message and exit. + + Commands: + async_es Asynchronous Elasticsearch data backend. + async_mongo Asynchronous MongoDB data backend. + clickhouse ClickHouse database backend. + es Elasticsearch data backend. + fs FileSystem data backend. + ldp OVH LDP (Log Data Platform) data backend. + mongo MongoDB data backend. + s3 S3 data backend. + swift SWIFT data backend. + """ + assert result.output == re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) + + +def test_cli_list_async_es_command_usage(): + """Test the `ralph list async_es` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "list async_es --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph list async_es [OPTIONS] + + Asynchronous Elasticsearch data backend. + + List available Elasticsearch indices, data streams and aliases. + + Options: + --allow-yellow-status / --no-allow-yellow-status + Whether to consider Elasticsearch yellow + health status to be ok. [default: no-allow- + yellow-status] + --client-options KEY=VALUE,KEY=VALUE + A dictionary of valid options for the + Elasticsearch class initialization. [default: + ca_certs=None verify_certs=None] + --default-index TEXT The default index to use for querying + Elasticsearch. [default: statements] + --hosts VALUE1,VALUE2,VALUE3 The comma-separated list of Elasticsearch + nodes to connect to. [default: + ES_TEST_HOSTS] + --locale-encoding TEXT The encoding used for reading/writing + documents. [default: utf8] + --point-in-time-keep-alive TEXT + The duration for which Elasticsearch should + keep a point in time alive. [default: 1m] + --refresh-after-write TEXT Whether the Elasticsearch index should be + refreshed after the write operation. + -n, --new / -a, --all Ignored. + -D, --details / -I, --ids Get detailed information instead of just + names. + -t, --target TEXT The comma-separated list of data streams, + indices, and aliases to limit the request. + Supports wildcards (*). If target is `None`, + lists all available indices, data streams and + aliases. Equivalent to (`target` = "*"). + --help Show this message and exit. + """ + output = output.replace("ES_TEST_HOSTS", "".join(ES_TEST_HOSTS)) + assert result.output == re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) + + +def test_cli_list_async_mongo_command_usage(): + """Test the `ralph list async_mongo` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "list async_mongo --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph list async_mongo [OPTIONS] + + Asynchronous MongoDB data backend. + + List collections in the target database. + + Options: + --client-options KEY=VALUE,KEY=VALUE + A dictionary of MongoDB client options. + [default: document_class=None tz_aware=None] + --connection-uri TEXT The MongoDB connection URI. [default: + MONGO_TEST_CONNECTION_URI] + --default-collection TEXT The MongoDB database collection to get objects + from. [default: MONGO_TEST_COLLECTION] + --default-database TEXT The MongoDB database to connect to. [default: + statements] + --locale-encoding TEXT The locale encoding to use when none is + provided. [default: utf8] + -n, --new / -a, --all Ignored. + -D, --details / -I, --ids Get detailed collection information instead of + just IDs. + -t, --target TEXT The MongoDB database name to list collections + from. If target is `None`, the + `DEFAULT_DATABASE` is used instead. + --help Show this message and exit. + """ + output = output.replace("MONGO_TEST_CONNECTION_URI", MONGO_TEST_CONNECTION_URI) + output = output.replace("MONGO_TEST_COLLECTION", MONGO_TEST_COLLECTION) + assert result.output == re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) + + +def test_cli_list_clickhouse_command_usage(): + """Test the `ralph list clickhouse` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "list clickhouse --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph list clickhouse [OPTIONS] + + ClickHouse database backend. + + List tables for a given database. + + Options: + --client-options KEY=VALUE,KEY=VALUE + A dictionary of valid options for the + ClickHouse client connection. [default: + date_time_input_format='best_effort'] + --database TEXT ClickHouse database to connect to. [default: + xapi] + --event-table-name TEXT Table where events live. [default: + xapi_events_all] + --host TEXT ClickHouse server host to connect to. + [default: CLICKHOUSE_TEST_HOST] + --locale-encoding TEXT The locale encoding to use when none is + provided. [default: utf8] + --password TEXT Password for the given ClickHouse username + (optional). + --port INTEGER ClickHouse server port to connect to. + [default: 8123] + --username TEXT ClickHouse username to connect as (optional). + -n, --new / -a, --all Ignored. + -D, --details / -I, --ids Get detailed table information instead of just + table names. + -t, --target TEXT The database name to list tables from. + --help Show this message and exit. + """ + output = output.replace("CLICKHOUSE_TEST_HOST", CLICKHOUSE_TEST_HOST) + assert result.output == re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) + + +def test_cli_list_es_command_usage(): + """Test the `ralph list es` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "list es --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph list es [OPTIONS] + + Elasticsearch data backend. + + List available Elasticsearch indices, data streams and aliases. + + Options: + --allow-yellow-status / --no-allow-yellow-status + Whether to consider Elasticsearch yellow + health status to be ok. [default: no-allow- + yellow-status] + --client-options KEY=VALUE,KEY=VALUE + A dictionary of valid options for the + Elasticsearch class initialization. [default: + ca_certs=None verify_certs=None] + --default-index TEXT The default index to use for querying + Elasticsearch. [default: statements] + --hosts VALUE1,VALUE2,VALUE3 The comma-separated list of Elasticsearch + nodes to connect to. [default: + ES_TEST_HOSTS] + --locale-encoding TEXT The encoding used for reading/writing + documents. [default: utf8] + --point-in-time-keep-alive TEXT + The duration for which Elasticsearch should + keep a point in time alive. [default: 1m] + --refresh-after-write TEXT Whether the Elasticsearch index should be + refreshed after the write operation. + -n, --new / -a, --all Ignored. + -D, --details / -I, --ids Get detailed information instead of just + names. + -t, --target TEXT The comma-separated list of data streams, + indices, and aliases to limit the request. + Supports wildcards (*). If target is `None`, + lists all available indices, data streams and + aliases. Equivalent to (`target` = "*"). + --help Show this message and exit. + """ + output = output.replace("ES_TEST_HOSTS", "".join(ES_TEST_HOSTS)) + assert result.output == re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) + + +def test_cli_list_fs_command_usage(): + """Test the `ralph list fs` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "list fs --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph list fs [OPTIONS] + + FileSystem data backend. + + List files and directories in the target directory. + + Options: + --default-directory-path PATH The default target directory path where to + perform list, read and write operations. + [default: .] + --default-query-string TEXT The default query string to match files for the + read operation. [default: *] + --locale-encoding TEXT The encoding used for writing dictionaries to + files. [default: utf8] + -n, --new / -a, --all Given the history, list only not already read + files. + -D, --details / -I, --ids Get detailed file information instead of just + file paths. + -t, --target TEXT The directory path where to list the files and + directories. If target is `None`, the + `default_directory` is used instead. If target + is a relative path, it is considered to be + relative to the `default_directory_path`. + --help Show this message and exit. + """ + assert result.output == re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) + + +def test_cli_list_ldp_command_usage(): + """Test the `ralph list ldp` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "list ldp --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph list ldp [OPTIONS] + + OVH LDP (Log Data Platform) data backend. + + List archives for a given target stream_id. + + Options: + --application-key TEXT The OVH API application key (AK). + --application-secret TEXT The OVH API application secret (AS). + --consumer-key TEXT The OVH API consumer key (CK). + --default-stream-id TEXT The default stream identifier to query. + --endpoint TEXT The OVH API endpoint. [default: ovh-eu] + --locale-encoding TEXT [default: utf8] + --request-timeout TEXT HTTP request timeout in seconds. + --service-name TEXT The default LDP account name. + -n, --new / -a, --all Given the history, list only not already read + archives. + -D, --details / -I, --ids Get detailed archive information in addition to + archive IDs. + -t, --target TEXT The target stream_id where to list the archives. If + target is `None`, the `DEFAULT_STREAM_ID` is used + instead. + --help Show this message and exit. + """ + assert result.output == re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) + + +def test_cli_list_mongo_command_usage(): + """Test the `ralph list mongo` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "list mongo --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph list mongo [OPTIONS] + + MongoDB data backend. + + List collections in the `target` database. + + Options: + --client-options KEY=VALUE,KEY=VALUE + A dictionary of MongoDB client options. + [default: document_class=None tz_aware=None] + --connection-uri TEXT The MongoDB connection URI. [default: + MONGO_TEST_CONNECTION_URI] + --default-collection TEXT The MongoDB database collection to get objects + from. [default: MONGO_TEST_COLLECTION] + --default-database TEXT The MongoDB database to connect to. [default: + statements] + --locale-encoding TEXT The locale encoding to use when none is + provided. [default: utf8] + -n, --new / -a, --all Ignored. + -D, --details / -I, --ids Get detailed collection information instead of + just IDs. + -t, --target TEXT The MongoDB database name to list collections + from. If target is `None`, the + `DEFAULT_DATABASE` is used instead. + --help Show this message and exit. + """ + output = output.replace("MONGO_TEST_CONNECTION_URI", MONGO_TEST_CONNECTION_URI) + output = output.replace("MONGO_TEST_COLLECTION", MONGO_TEST_COLLECTION) + assert result.output == re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) + + +def test_cli_list_s3_command_usage(): + """Test the `ralph list s3` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "list s3 --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph list s3 [OPTIONS] + + S3 data backend. + + List objects for the target bucket. + + Options: + --access-key-id TEXT The access key id for the S3 account. + --default-bucket-name TEXT The default bucket name targeted. + --default-region TEXT The default region used in instantiating the + client. + --endpoint-url TEXT The endpoint URL of the S3. + --locale-encoding TEXT The encoding used for writing dictionaries to + objects. [default: utf8] + --secret-access-key TEXT The secret key for the S3 account. + --session-token TEXT The session token for the S3 account. + -n, --new / -a, --all Given the history, list only unread files. + -D, --details / -I, --ids Get detailed object information instead of just + object name. + -t, --target TEXT The target bucket to list from. If target is + `None`, the `default_bucket_name` is used instead. + --help Show this message and exit. + """ + assert result.output == re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) + + +def test_cli_list_swift_command_usage(): + """Test the `ralph list swift` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "list swift --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph list swift [OPTIONS] + + SWIFT data backend. + + List files for the target container. + + Options: + --auth-url TEXT The authentication URL. [default: + https://auth.cloud.ovh.net/] + --default-container TEXT The default target container. + --identity-api-version TEXT The keystone API version to authenticate to. + [default: 3] + --locale-encoding TEXT The encoding used for reading/writing documents. + [default: utf8] + --object-storage-url TEXT The default storage URL. + --password TEXT The password of the openstack swift user. + --project-domain-name TEXT The project domain name. [default: Default] + --region-name TEXT The region where the container is. + --tenant-id TEXT The identifier of the tenant of the container. + --tenant-name TEXT The name of the tenant of the container. + --username TEXT The name of the openstack swift user. + --user-domain-name TEXT The user domain name. [default: Default] + -n, --new / -a, --all Given the history, list only not already read + objects. + -D, --details / -I, --ids Get detailed object information instead of just + names. + -t, --target TEXT The target container to list from. If `target` is + `None`, the `default_container` will be used. + --help Show this message and exit. + """ + assert result.output == re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) diff --git a/tests/test_cli_read_usage.py b/tests/test_cli_read_usage.py new file mode 100644 index 000000000..33eee6905 --- /dev/null +++ b/tests/test_cli_read_usage.py @@ -0,0 +1,697 @@ +"""Tests for Ralph cli read usage strings.""" + +import re +import sys + +from click.testing import CliRunner + +from ralph.cli import cli + +from tests.fixtures.backends import ( + CLICKHOUSE_TEST_HOST, + ES_TEST_HOSTS, + MONGO_TEST_COLLECTION, + MONGO_TEST_CONNECTION_URI, +) + + +def test_cli_read_command_usage(): + """Test `ralph read` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "read --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph read BACKEND [OPTIONS] [QUERY] + + Read records matching the QUERY (json or string) from a configured backend. + + Options: + --help Show this message and exit. + + Commands: + async_es Asynchronous Elasticsearch data backend. + async_lrs Asynchronous LRS data backend. + async_mongo Asynchronous MongoDB data backend. + async_ws Websocket stream backend. + clickhouse ClickHouse database backend. + es Elasticsearch data backend. + fs FileSystem data backend. + ldp OVH LDP (Log Data Platform) data backend. + lrs LRS data backend. + mongo MongoDB data backend. + s3 S3 data backend. + swift SWIFT data backend. + """ + assert re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) == result.output + + +def test_cli_read_async_es_command_usage(): + """Test `ralph read async_es` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "read async_es --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph read async_es [OPTIONS] [QUERY] + + Asynchronous Elasticsearch data backend. + + Read documents matching the query in the target index and yield them. + + QUERY: The Elasticsearch query to use when fetching documents. + + QUERY Attributes: + + q (str): The Elastisearch query in the Lucene query string syntax. See + Elasticsearch search reference for Lucene query syntax: + https://www.elastic.co/guide/en/elasticsearch/reference/8.9/search- + search.html#search-api-query-params-q + + query (dict): A search query definition using the Elasticsearch Query DSL. + See Elasticsearch search reference for query DSL syntax: + https://www.elastic.co/guide/en/elasticsearch/reference/8.9/search- + search.html#request-body-search-query + + pit (dict): Limit the search to a point in time (PIT). See ESQueryPit. + + size (int): The maximum number of documents to yield. + + sort (str or list): Specify how to sort search results. Set to `_doc` or + `_shard_doc` if order doesn't matter. See + https://www.elastic.co/guide/en/elasticsearch/reference/8.9/sort-search- + results.html + + search_after (list): Limit search query results to values after a document + matching the set of sort values in `search_after`. Used for pagination. + + track_total_hits (bool): Number of hits matching the query to count + accurately. Not used. Always set to `False`. + + Options: + --allow-yellow-status / --no-allow-yellow-status + Whether to consider Elasticsearch yellow + health status to be ok. [default: no-allow- + yellow-status] + --client-options KEY=VALUE,KEY=VALUE + A dictionary of valid options for the + Elasticsearch class initialization. [default: + ca_certs=None verify_certs=None] + --default-index TEXT The default index to use for querying + Elasticsearch. [default: statements] + --hosts VALUE1,VALUE2,VALUE3 The comma-separated list of Elasticsearch + nodes to connect to. [default: + ES_TEST_HOSTS] + --locale-encoding TEXT The encoding used for reading/writing + documents. [default: utf8] + --point-in-time-keep-alive TEXT + The duration for which Elasticsearch should + keep a point in time alive. [default: 1m] + --refresh-after-write TEXT Whether the Elasticsearch index should be + refreshed after the write operation. + -p, --prefetch INTEGER The number of records to prefetch (queue) + while yielding. If `prefetch` is `None` it + defaults to `1`, i.e. no records are + prefetched. + -m, --max-statements INTEGER The maximum number of statements to yield. If + `None` (default) or `0`, there is no maximum. + -I, --ignore-errors No impact as encoding errors are not expected + in Elasticsearch results. + -s, --chunk-size INTEGER The chunk size when reading documents by + batches. If `chunk_size` is `None` it defaults + to `READ_CHUNK_SIZE`. [default: 500] + -t, --target TEXT The target Elasticsearch index name to query. + If target is `None`, the `DEFAULT_INDEX` is + used instead. + --help Show this message and exit. + """ + output = output.replace("ES_TEST_HOSTS", "".join(ES_TEST_HOSTS)) + assert re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) == result.output + + +def test_cli_read_async_lrs_command_usage(): + """Test `ralph read async_lrs` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "read async_lrs --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph read async_lrs [OPTIONS] [QUERY] + + Asynchronous LRS data backend. + + Get statements from LRS `target` endpoint. + + QUERY: The query to select records to read. + + QUERY Attributes: + + Options: + --base-url TEXT LRS server URL. [default: http://0.0.0.0:8100] + --headers KEY=VALUE,KEY=VALUE Headers defined for the LRS server connection. + [default: X_EXPERIENCE_API_VERSION='1.0.3' + CONTENT_TYPE='application/json'] + --locale-encoding TEXT The encoding used for reading statements. + [default: utf8] + --password TEXT Basic auth password for LRS authentication. + [default: secret] + --statements-endpoint TEXT Default endpoint for LRS statements resource. + [default: /xAPI/statements] + --status-endpoint TEXT Endpoint used to check server status. + [default: /__heartbeat__] + --username TEXT Basic auth username for LRS authentication. + [default: ralph] + -p, --prefetch INTEGER The number of records to prefetch (queue) while + yielding. If `prefetch` is `None` it defaults + to `1` - no records are prefetched. + -m, --max-statements INTEGER The maximum number of statements to yield. + -I, --ignore-errors If `True`, errors during the read operation are + ignored and logged. If `False` (default), a + `BackendException` is raised if an error + occurs. + -s, --chunk-size INTEGER The number of records or bytes to read in one + batch, depending on whether the records are + dictionaries or bytes. [default: 500] + -t, --target TEXT Endpoint from which to read data (e.g. + `/statements`). If target is `None`, + `/xAPI/statements` default endpoint is used. + --help Show this message and exit. + """ + assert re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) == result.output + + +def test_cli_read_async_mongo_command_usage(): + """Test `ralph read async_mongo` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "read async_mongo --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph read async_mongo [OPTIONS] [QUERY] + + Asynchronous MongoDB data backend. + + Read documents matching the `query` from `target` collection and yield them. + + QUERY: The MongoDB query to use when fetching documents. + + QUERY Attributes: + + filter (dict): A filter query to select which documents to include. + + limit (int): The maximum number of results to return. + + projection (dict): Dictionary specifying the fields to include or exclude. + + sort (list): A list of (key, direction) pairs specifying the sort order. + + Options: + --client-options KEY=VALUE,KEY=VALUE + A dictionary of MongoDB client options. + [default: document_class=None tz_aware=None] + --connection-uri TEXT The MongoDB connection URI. [default: + MONGO_TEST_CONNECTION_URI] + --default-collection TEXT The MongoDB database collection to get objects + from. [default: MONGO_TEST_COLLECTION] + --default-database TEXT The MongoDB database to connect to. [default: + statements] + --locale-encoding TEXT The locale encoding to use when none is + provided. [default: utf8] + -p, --prefetch INTEGER The number of records to prefetch (queue) + while yielding. If `prefetch` is `None` it + defaults to `1`, i.e. no records are + prefetched. + -m, --max-statements INTEGER The maximum number of statements to yield. If + `None` (default) or `0`, there is no maximum. + -I, --ignore-errors If `True`, encoding errors during the read + operation will be ignored and logged. If + `False` (default), a `BackendException` is + raised on any error. + -s, --chunk-size INTEGER The chunk size when reading documents by + batches. If `chunk_size` is `None` it defaults + to `READ_CHUNK_SIZE`. [default: 500] + -t, --target TEXT The MongoDB collection name to query. If + target is `None`, the `DEFAULT_COLLECTION` is + used instead. + --help Show this message and exit. + """ + output = output.replace("MONGO_TEST_CONNECTION_URI", MONGO_TEST_CONNECTION_URI) + output = output.replace("MONGO_TEST_COLLECTION", MONGO_TEST_COLLECTION) + assert re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) == result.output + + +def test_cli_read_async_ws_command_usage(): + """Test `ralph read async_ws` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "read async_ws --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph read async_ws [OPTIONS] [QUERY] + + Websocket stream backend. + + Read records matching the `query` in the `target` container and yield them. + + QUERY: Ignored. + + Options: + --client-options KEY=VALUE,KEY=VALUE + A dictionary of valid options for the + websocket client connection. See + `WSClientOptions`. [default: + close_timeout=None compression='deflate' + max_size=1048576 max_queue=32 open_timeout=10 + origin=None ping_interval=20 ping_timeout=20 + read_limit=65536 + user_agent_header='Python/PYTHON_VERSION + websockets/12.0' write_limit=65536] + --locale-encoding TEXT [default: utf8] + --uri TEXT The URI to connect to. [default: + ws://localhost:8765] + -p, --prefetch INTEGER The number of records to prefetch (queue) + while yielding. If `prefetch` is `None` it + defaults to `1` - no records are prefetched. + -m, --max-statements INTEGER The maximum number of statements to yield. + -I, --ignore-errors If `True`, encoding errors during the read + operation will be ignored and logged. If + `False` (default), a `BackendException` is + raised on any error. + -s, --chunk-size INTEGER Ignored. [default: 500] + -t, --target TEXT Ignored. + --help Show this message and exit. + """ + output = output.replace("PYTHON_VERSION", f"3.{sys.version_info[1]}") + assert re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) == result.output + + +def test_cli_read_clickhouse_command_usage(): + """Test `ralph read clickhouse` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "read clickhouse --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph read clickhouse [OPTIONS] [QUERY] + + ClickHouse database backend. + + Read documents matching the query in the target table and yield them. + + QUERY: The query to use when fetching documents. + + QUERY Attributes: + + select (str or list): Name of the table(s) to query. + + where (str or list): Where expression for filtering the data. + + parameters (dict): Dictionary of substitution values. + + limit (int): Maximum number of rows to return. + + sort (str): Order by expression determining the sorting direction. + + column_oriented (bool): Whether to return the results as a sequence of + columns rather than a sequence of rows. + + Options: + --client-options KEY=VALUE,KEY=VALUE + A dictionary of valid options for the + ClickHouse client connection. [default: + date_time_input_format='best_effort'] + --database TEXT ClickHouse database to connect to. [default: + xapi] + --event-table-name TEXT Table where events live. [default: + xapi_events_all] + --host TEXT ClickHouse server host to connect to. + [default: CLICKHOUSE_TEST_HOST] + --locale-encoding TEXT The locale encoding to use when none is + provided. [default: utf8] + --password TEXT Password for the given ClickHouse username + (optional). + --port INTEGER ClickHouse server port to connect to. + [default: 8123] + --username TEXT ClickHouse username to connect as (optional). + -m, --max-statements INTEGER The maximum number of statements to yield. If + `None` (default) or `0`, there is no maximum. + -I, --ignore-errors If `True`, encoding errors during the read + operation will be ignored and logged. If + `False` (default), a `BackendException` is + raised on any error. + -s, --chunk-size INTEGER The chunk size when reading documents by + batches. If `chunk_size` is `None` it defaults + to `READ_CHUNK_SIZE`. [default: 500] + -t, --target TEXT The target table name to query. If target is + `None`, the `EVENT_TABLE_NAME` is used + instead. + --help Show this message and exit. + """ + output = output.replace("CLICKHOUSE_TEST_HOST", CLICKHOUSE_TEST_HOST) + assert re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) == result.output + + +def test_cli_read_es_command_usage(): + """Test `ralph read es` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "read es --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph read es [OPTIONS] [QUERY] + + Elasticsearch data backend. + + Read documents matching the query in the target index and yield them. + + QUERY: The Elasticsearch query to use when fetching documents. + + QUERY Attributes: + + q (str): The Elastisearch query in the Lucene query string syntax. See + Elasticsearch search reference for Lucene query syntax: + https://www.elastic.co/guide/en/elasticsearch/reference/8.9/search- + search.html#search-api-query-params-q + + query (dict): A search query definition using the Elasticsearch Query DSL. + See Elasticsearch search reference for query DSL syntax: + https://www.elastic.co/guide/en/elasticsearch/reference/8.9/search- + search.html#request-body-search-query + + pit (dict): Limit the search to a point in time (PIT). See ESQueryPit. + + size (int): The maximum number of documents to yield. + + sort (str or list): Specify how to sort search results. Set to `_doc` or + `_shard_doc` if order doesn't matter. See + https://www.elastic.co/guide/en/elasticsearch/reference/8.9/sort-search- + results.html + + search_after (list): Limit search query results to values after a document + matching the set of sort values in `search_after`. Used for pagination. + + track_total_hits (bool): Number of hits matching the query to count + accurately. Not used. Always set to `False`. + + Options: + --allow-yellow-status / --no-allow-yellow-status + Whether to consider Elasticsearch yellow + health status to be ok. [default: no-allow- + yellow-status] + --client-options KEY=VALUE,KEY=VALUE + A dictionary of valid options for the + Elasticsearch class initialization. [default: + ca_certs=None verify_certs=None] + --default-index TEXT The default index to use for querying + Elasticsearch. [default: statements] + --hosts VALUE1,VALUE2,VALUE3 The comma-separated list of Elasticsearch + nodes to connect to. [default: + ES_TEST_HOSTS] + --locale-encoding TEXT The encoding used for reading/writing + documents. [default: utf8] + --point-in-time-keep-alive TEXT + The duration for which Elasticsearch should + keep a point in time alive. [default: 1m] + --refresh-after-write TEXT Whether the Elasticsearch index should be + refreshed after the write operation. + -m, --max-statements INTEGER The maximum number of statements to yield. If + `None` (default) or `0`, there is no maximum. + -I, --ignore-errors No impact as encoding errors are not expected + in Elasticsearch results. + -s, --chunk-size INTEGER The chunk size when reading documents by + batches. If `chunk_size` is `None` it defaults + to `READ_CHUNK_SIZE`. [default: 500] + -t, --target TEXT The target Elasticsearch index name to query. + If target is `None`, the `DEFAULT_INDEX` is + used instead. + --help Show this message and exit. + """ + output = output.replace("ES_TEST_HOSTS", "".join(ES_TEST_HOSTS)) + assert re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) == result.output + + +def test_cli_read_fs_command_usage(): + """Test `ralph read fs` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "read fs --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph read fs [OPTIONS] [QUERY] + + FileSystem data backend. + + Read files matching the query in the target folder and yield them. + + QUERY: The relative pattern for the files to read. + + Options: + --default-directory-path PATH The default target directory path where to + perform list, read and write operations. + [default: .] + --default-query-string TEXT The default query string to match files for the + read operation. [default: *] + --locale-encoding TEXT The encoding used for writing dictionaries to + files. [default: utf8] + -m, --max-statements INTEGER The maximum number of statements to yield. If + `None` (default) or `0`, there is no maximum. + -I, --ignore-errors If `True`, encoding errors during the read + operation will be ignored and logged. If + `False` (default), a `BackendException` is + raised on any error. + -s, --chunk-size INTEGER The chunk size when reading files. If + `chunk_size` is `None` it defaults to + `READ_CHUNK_SIZE`. If `raw_output` is set to + `False`, files are read line by line. + [default: 4096] + -t, --target TEXT The target directory path containing the files. + If target is `None`, the + `default_directory_path` is used instead. If + target is a relative path, it is considered to + be relative to the + `default_directory_path`. + --help Show this message and exit. + """ + assert re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) == result.output + + +def test_cli_read_ldp_command_usage(): + """Test `ralph read ldp` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "read ldp --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph read ldp [OPTIONS] [QUERY] + + OVH LDP (Log Data Platform) data backend. + + Read an archive matching the query in the target stream_id and yield it. + + QUERY: The ID of the archive to read. + + Options: + --application-key TEXT The OVH API application key (AK). + --application-secret TEXT The OVH API application secret (AS). + --consumer-key TEXT The OVH API consumer key (CK). + --default-stream-id TEXT The default stream identifier to query. + --endpoint TEXT The OVH API endpoint. [default: ovh-eu] + --locale-encoding TEXT [default: utf8] + --request-timeout TEXT HTTP request timeout in seconds. + --service-name TEXT The default LDP account name. + -m, --max-statements INTEGER The maximum number of statements to yield. If + `None` (default) or `0`, there is no maximum. + -I, --ignore-errors No impact as no encoding operation is performed. + -s, --chunk-size INTEGER The chunk size when reading archives by batch. + If `chunk_size` is `None` it defaults to + `READ_CHUNK_SIZE`. [default: 4096] + -t, --target TEXT The target stream_id containing the archives. If + target is `None`, the `DEFAULT_STREAM_ID` is + used instead. + --help Show this message and exit. + """ + assert re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) == result.output + + +def test_cli_read_lrs_command_usage(): + """Test `ralph read lrs` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "read lrs --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph read lrs [OPTIONS] [QUERY] + + LRS data backend. + + Get statements from LRS `target` endpoint. + + QUERY: The query to select records to read. + + QUERY Attributes: + + Options: + --base-url TEXT LRS server URL. [default: http://0.0.0.0:8100] + --headers KEY=VALUE,KEY=VALUE Headers defined for the LRS server connection. + [default: X_EXPERIENCE_API_VERSION='1.0.3' + CONTENT_TYPE='application/json'] + --locale-encoding TEXT The encoding used for reading statements. + [default: utf8] + --password TEXT Basic auth password for LRS authentication. + [default: secret] + --statements-endpoint TEXT Default endpoint for LRS statements resource. + [default: /xAPI/statements] + --status-endpoint TEXT Endpoint used to check server status. + [default: /__heartbeat__] + --username TEXT Basic auth username for LRS authentication. + [default: ralph] + -m, --max-statements INTEGER The maximum number of statements to yield. + -I, --ignore-errors If `True`, errors during the read operation are + ignored and logged. If `False` (default), a + `BackendException` is raised if an error + occurs. + -s, --chunk-size INTEGER The number of records or bytes to read in one + batch, depending on whether the records are + dictionaries or bytes. [default: 500] + -t, --target TEXT Endpoint from which to read data (e.g. + `/statements`). If target is `None`, + `/xAPI/statements` default endpoint is used. + --help Show this message and exit. + """ + assert re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) == result.output + + +def test_cli_read_mongo_command_usage(): + """Test `ralph read mongo` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "read mongo --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph read mongo [OPTIONS] [QUERY] + + MongoDB data backend. + + Read documents matching the `query` from `target` collection and yield them. + + QUERY: The MongoDB query to use when reading documents. + + QUERY Attributes: + + filter (dict): A filter query to select which documents to include. + + limit (int): The maximum number of results to return. + + projection (dict): Dictionary specifying the fields to include or exclude. + + sort (list): A list of (key, direction) pairs specifying the sort order. + + Options: + --client-options KEY=VALUE,KEY=VALUE + A dictionary of MongoDB client options. + [default: document_class=None tz_aware=None] + --connection-uri TEXT The MongoDB connection URI. [default: + MONGO_TEST_CONNECTION_URI] + --default-collection TEXT The MongoDB database collection to get objects + from. [default: MONGO_TEST_COLLECTION] + --default-database TEXT The MongoDB database to connect to. [default: + statements] + --locale-encoding TEXT The locale encoding to use when none is + provided. [default: utf8] + -m, --max-statements INTEGER The maximum number of statements to yield. If + `None` (default) or `0`, there is no maximum. + -I, --ignore-errors If `True`, encoding errors during the read + operation will be ignored and logged. If + `False` (default), a `BackendException` is + raised on any error. + -s, --chunk-size INTEGER The chunk size when reading archives by batch. + If `chunk_size` is `None` it defaults to + `READ_CHUNK_SIZE`. [default: 500] + -t, --target TEXT The MongoDB collection name to query. If + target is `None`, the `DEFAULT_COLLECTION` is + used instead. + --help Show this message and exit. + """ + output = output.replace("MONGO_TEST_CONNECTION_URI", MONGO_TEST_CONNECTION_URI) + output = output.replace("MONGO_TEST_COLLECTION", MONGO_TEST_COLLECTION) + assert re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) == result.output + + +def test_cli_read_s3_command_usage(): + """Test `ralph read s3` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "read s3 --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph read s3 [OPTIONS] [QUERY] + + S3 data backend. + + Read an object matching the `query` in the `target` bucket and yield it. + + QUERY: The ID of the object to read. + + Options: + --access-key-id TEXT The access key id for the S3 account. + --default-bucket-name TEXT The default bucket name targeted. + --default-region TEXT The default region used in instantiating the + client. + --endpoint-url TEXT The endpoint URL of the S3. + --locale-encoding TEXT The encoding used for writing dictionaries to + objects. [default: utf8] + --secret-access-key TEXT The secret key for the S3 account. + --session-token TEXT The session token for the S3 account. + -m, --max-statements INTEGER The maximum number of statements to yield. If + `None` (default) or `0`, there is no maximum. + -I, --ignore-errors If `True`, encoding errors during the read + operation will be ignored and logged. If `False` + (default), a `BackendException` is raised on any + error. + -s, --chunk-size INTEGER The number of records or bytes to read in one + batch, depending on whether the records are + dictionaries or bytes. If `chunk_size` is `None` + it defaults to `READ_CHUNK_SIZE`. [default: + 4096] + -t, --target TEXT The target bucket containing the object. If + target is `None`, the `default_bucket` is used + instead. + --help Show this message and exit. + """ + assert re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) == result.output + + +def test_cli_read_swift_command_usage(): + """Test `ralph read swift` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "read swift --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph read swift [OPTIONS] [QUERY] + + SWIFT data backend. + + Read objects matching the `query` in the `target` container and yield them. + + QUERY: The query to select objects to read. + + Options: + --auth-url TEXT The authentication URL. [default: + https://auth.cloud.ovh.net/] + --default-container TEXT The default target container. + --identity-api-version TEXT The keystone API version to authenticate to. + [default: 3] + --locale-encoding TEXT The encoding used for reading/writing documents. + [default: utf8] + --object-storage-url TEXT The default storage URL. + --password TEXT The password of the openstack swift user. + --project-domain-name TEXT The project domain name. [default: Default] + --region-name TEXT The region where the container is. + --tenant-id TEXT The identifier of the tenant of the container. + --tenant-name TEXT The name of the tenant of the container. + --username TEXT The name of the openstack swift user. + --user-domain-name TEXT The user domain name. [default: Default] + -m, --max-statements INTEGER The maximum number of statements to yield. If + `None` (default) or `0`, there is no maximum. + -I, --ignore-errors If `True`, encoding errors during the read + operation will be ignored and logged. If `False` + (default), a `BackendException` is raised on any + error. + -s, --chunk-size INTEGER The number of records or bytes to read in one + batch, depending on whether the records are + dictionaries or bytes. If `chunk_size` is `None` + it defaults to `READ_CHUNK_SIZE`. [default: + 4096] + -t, --target TEXT The target container name. If `target` is + `None`, a default value is used instead. + --help Show this message and exit. + """ + assert re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) == result.output diff --git a/tests/test_cli_runserver_usage.py b/tests/test_cli_runserver_usage.py new file mode 100644 index 000000000..6a7fa1dbf --- /dev/null +++ b/tests/test_cli_runserver_usage.py @@ -0,0 +1,206 @@ +"""Tests for Ralph cli read usage strings.""" + +import re + +from click.testing import CliRunner + +from ralph.cli import cli + +from tests.fixtures.backends import ES_TEST_HOSTS + + +def test_cli_runserver_command_usage(): + """Test `ralph runserver` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "runserver --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph runserver BACKEND [OPTIONS] + + Run the API server for the development environment. + + Start uvicorn programmatically for convenience and documentation. + + Options: + --help Show this message and exit. + + Commands: + async_es Asynchronous Elasticsearch LRS backend implementation. + async_mongo Async MongoDB LRS backend implementation. + clickhouse ClickHouse LRS backend implementation. + es Elasticsearch LRS backend implementation. + fs FileSystem LRS Backend. + mongo MongoDB LRS backend. + """ + assert result.output == re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) + + +def test_cli_runserver_async_es_command_usage(): + """Test the `ralph runserver async_es` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "runserver async_es --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph runserver async_es [OPTIONS] + + Asynchronous Elasticsearch LRS backend implementation. + + Run the API server for the development environment. + + Options: + --allow-yellow-status / --no-allow-yellow-status + [default: no-allow-yellow-status] + --client-options KEY=VALUE,KEY=VALUE + [default: ca_certs=None verify_certs=None] + --default-index TEXT [default: statements] + --hosts VALUE1,VALUE2,VALUE3 [default: ES_TEST_HOSTS] + --locale-encoding TEXT [default: utf8] + --point-in-time-keep-alive TEXT + [default: 1m] + --refresh-after-write TEXT + -h, --host TEXT + -p, --port INTEGER + --help Show this message and exit. + """ + output = output.replace("ES_TEST_HOSTS", "".join(ES_TEST_HOSTS)) + assert result.output == re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) + + +def test_cli_runserver_async_mongo_command_usage(): + """Test the `ralph runserver async_mongo` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "runserver async_mongo --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph runserver async_mongo [OPTIONS] + + Async MongoDB LRS backend implementation. + + Run the API server for the development environment. + + Options: + --client-options KEY=VALUE,KEY=VALUE + [default: document_class=None tz_aware=None] + --connection-uri TEXT [default: mongodb://localhost:27017/] + --default-collection TEXT [default: marsha] + --default-database TEXT [default: statements] + --locale-encoding TEXT [default: utf8] + -h, --host TEXT + -p, --port INTEGER + --help Show this message and exit. + """ + assert result.output == re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) + + +def test_cli_runserver_clickhouse_command_usage(): + """Test the `ralph runserver clickhouse` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "runserver clickhouse --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph runserver clickhouse [OPTIONS] + + ClickHouse LRS backend implementation. + + Run the API server for the development environment. + + Options: + --client-options KEY=VALUE,KEY=VALUE + [default: + date_time_input_format='best_effort'] + --database TEXT [default: xapi] + --event-table-name TEXT [default: xapi_events_all] + --host TEXT [default: localhost] + --ids-chunk-size INTEGER The chunk size for querying by ids. [default: + 10000] + --locale-encoding TEXT [default: utf8] + --password TEXT + --port INTEGER [default: 8123] + --username TEXT + -h, --host TEXT + -p, --port INTEGER + --help Show this message and exit. + """ + assert result.output == re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) + + +def test_cli_runserver_es_command_usage(): + """Test the `ralph runserver es` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "runserver es --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph runserver es [OPTIONS] + + Elasticsearch LRS backend implementation. + + Run the API server for the development environment. + + Options: + --allow-yellow-status / --no-allow-yellow-status + [default: no-allow-yellow-status] + --client-options KEY=VALUE,KEY=VALUE + [default: ca_certs=None verify_certs=None] + --default-index TEXT [default: statements] + --hosts VALUE1,VALUE2,VALUE3 [default: ES_TEST_HOSTS] + --locale-encoding TEXT [default: utf8] + --point-in-time-keep-alive TEXT + [default: 1m] + --refresh-after-write TEXT + -h, --host TEXT + -p, --port INTEGER + --help Show this message and exit. + """ + output = output.replace("ES_TEST_HOSTS", "".join(ES_TEST_HOSTS)) + assert result.output == re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) + + +def test_cli_runserver_fs_command_usage(): + """Test the `ralph runserver fs` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "runserver fs --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph runserver fs [OPTIONS] + + FileSystem LRS Backend. + + Run the API server for the development environment. + + Options: + --default-directory-path PATH [default: .] + --default-lrs-file TEXT The default LRS filename to store statements. + [default: fs_lrs.jsonl] + --default-query-string TEXT [default: *] + --locale-encoding TEXT [default: utf8] + -h, --host TEXT + -p, --port INTEGER + --help Show this message and exit. + """ + assert result.output == re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) + + +def test_cli_runserver_mongo_command_usage(): + """Test the `ralph runserver mongo` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "runserver mongo --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph runserver mongo [OPTIONS] + + MongoDB LRS backend. + + Run the API server for the development environment. + + Options: + --client-options KEY=VALUE,KEY=VALUE + [default: document_class=None tz_aware=None] + --connection-uri TEXT [default: mongodb://localhost:27017/] + --default-collection TEXT [default: marsha] + --default-database TEXT [default: statements] + --locale-encoding TEXT [default: utf8] + -h, --host TEXT + -p, --port INTEGER + --help Show this message and exit. + """ + assert result.output == re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) diff --git a/tests/test_cli_usage.py b/tests/test_cli_usage.py index dc4fd1524..02ddac8ca 100644 --- a/tests/test_cli_usage.py +++ b/tests/test_cli_usage.py @@ -1,12 +1,8 @@ """Tests for Ralph cli usage strings.""" -import logging - from click.testing import CliRunner from ralph.cli import cli -test_logger = logging.getLogger("ralph") - def test_cli_auth_command_usage(): """Test ralph auth command usage.""" @@ -99,483 +95,3 @@ def test_cli_convert_command_usage(): result = runner.invoke(cli, ["convert"]) assert result.exit_code > 0 assert "Error: Missing option '-p' / '--platform-url'" in result.output - - -def test_cli_read_command_usage(): - """Test ralph read command usage.""" - runner = CliRunner() - result = runner.invoke(cli, ["read", "--help"]) - - assert result.exit_code == 0 - assert ( - "Usage: ralph read [OPTIONS] [QUERY]\n\n" - " Read records matching the QUERY (json or string) from a configured backend." - "\n\n" - "Options:\n" - " -b, --backend " - "[async_es|async_lrs|async_mongo|async_ws|clickhouse|es|fs|ldp|lrs|mongo|s3|" - "swift]\n" - " Backend [required]\n" - " async_es backend: \n" - " --async-es-allow-yellow-status / --no-async-es-allow-yellow-status\n" - " --async-es-client-options KEY=VALUE,KEY=VALUE\n" - " --async-es-default-index TEXT\n" - " --async-es-hosts VALUE1,VALUE2,VALUE3\n" - " --async-es-locale-encoding TEXT\n" - " --async-es-point-in-time-keep-alive TEXT\n" - " --async-es-read-chunk-size INTEGER\n" - " --async-es-refresh-after-write TEXT\n" - " --async-es-write-chunk-size INTEGER\n" - " async_lrs backend: \n" - " --async-lrs-base-url TEXT\n" - " --async-lrs-headers KEY=VALUE,KEY=VALUE\n" - " --async-lrs-locale-encoding TEXT\n" - " --async-lrs-password TEXT\n" - " --async-lrs-read-chunk-size INTEGER\n" - " --async-lrs-statements-endpoint TEXT\n" - " --async-lrs-status-endpoint TEXT\n" - " --async-lrs-username TEXT\n" - " --async-lrs-write-chunk-size INTEGER\n" - " async_mongo backend: \n" - " --async-mongo-client-options KEY=VALUE,KEY=VALUE\n" - " --async-mongo-connection-uri MONGODSN\n" - " --async-mongo-default-collection TEXT\n" - " --async-mongo-default-database TEXT\n" - " --async-mongo-locale-encoding TEXT\n" - " --async-mongo-read-chunk-size INTEGER\n" - " --async-mongo-write-chunk-size INTEGER\n" - " async_ws backend: \n" - " --async-ws-client-options KEY=VALUE,KEY=VALUE\n" - " --async-ws-locale-encoding TEXT\n" - " --async-ws-read-chunk-size INTEGER\n" - " --async-ws-uri TEXT\n" - " --async-ws-write-chunk-size INTEGER\n" - " clickhouse backend: \n" - " --clickhouse-client-options KEY=VALUE,KEY=VALUE\n" - " --clickhouse-database TEXT\n" - " --clickhouse-event-table-name TEXT\n" - " --clickhouse-host TEXT\n" - " --clickhouse-locale-encoding TEXT\n" - " --clickhouse-password TEXT\n" - " --clickhouse-port INTEGER\n" - " --clickhouse-read-chunk-size INTEGER\n" - " --clickhouse-username TEXT\n" - " --clickhouse-write-chunk-size INTEGER\n" - " es backend: \n" - " --es-allow-yellow-status / --no-es-allow-yellow-status\n" - " --es-client-options KEY=VALUE,KEY=VALUE\n" - " --es-default-index TEXT\n" - " --es-hosts VALUE1,VALUE2,VALUE3\n" - " --es-locale-encoding TEXT\n" - " --es-point-in-time-keep-alive TEXT\n" - " --es-read-chunk-size INTEGER\n" - " --es-refresh-after-write TEXT\n" - " --es-write-chunk-size INTEGER\n" - " fs backend: \n" - " --fs-default-directory-path PATH\n" - " --fs-default-query-string TEXT\n" - " --fs-locale-encoding TEXT\n" - " --fs-read-chunk-size INTEGER\n" - " --fs-write-chunk-size INTEGER\n" - " ldp backend: \n" - " --ldp-application-key TEXT\n" - " --ldp-application-secret TEXT\n" - " --ldp-consumer-key TEXT\n" - " --ldp-default-stream-id TEXT\n" - " --ldp-endpoint TEXT\n" - " --ldp-locale-encoding TEXT\n" - " --ldp-read-chunk-size INTEGER\n" - " --ldp-request-timeout TEXT\n" - " --ldp-service-name TEXT\n" - " --ldp-write-chunk-size INTEGER\n" - " lrs backend: \n" - " --lrs-base-url TEXT\n" - " --lrs-headers KEY=VALUE,KEY=VALUE\n" - " --lrs-locale-encoding TEXT\n" - " --lrs-password TEXT\n" - " --lrs-read-chunk-size INTEGER\n" - " --lrs-statements-endpoint TEXT\n" - " --lrs-status-endpoint TEXT\n" - " --lrs-username TEXT\n" - " --lrs-write-chunk-size INTEGER\n" - " mongo backend: \n" - " --mongo-client-options KEY=VALUE,KEY=VALUE\n" - " --mongo-connection-uri MONGODSN\n" - " --mongo-default-collection TEXT\n" - " --mongo-default-database TEXT\n" - " --mongo-locale-encoding TEXT\n" - " --mongo-read-chunk-size INTEGER\n" - " --mongo-write-chunk-size INTEGER\n" - " s3 backend: \n" - " --s3-access-key-id TEXT\n" - " --s3-default-bucket-name TEXT\n" - " --s3-default-region TEXT\n" - " --s3-endpoint-url TEXT\n" - " --s3-locale-encoding TEXT\n" - " --s3-read-chunk-size INTEGER\n" - " --s3-secret-access-key TEXT\n" - " --s3-session-token TEXT\n" - " --s3-write-chunk-size INTEGER\n" - " swift backend: \n" - " --swift-auth-url TEXT\n" - " --swift-default-container TEXT\n" - " --swift-identity-api-version TEXT\n" - " --swift-locale-encoding TEXT\n" - " --swift-object-storage-url TEXT\n" - " --swift-password TEXT\n" - " --swift-project-domain-name TEXT\n" - " --swift-read-chunk-size INTEGER\n" - " --swift-region-name TEXT\n" - " --swift-tenant-id TEXT\n" - " --swift-tenant-name TEXT\n" - " --swift-username TEXT\n" - " --swift-user-domain-name TEXT\n" - " --swift-write-chunk-size INTEGER\n" - " -s, --chunk-size INTEGER Get events by chunks of size #\n" - " -t, --target TEXT Endpoint from which to read events (e.g.\n" - " `/statements`)\n" - " -i, --ignore_errors BOOLEAN Ignore errors during the encoding operation." - "\n" - " [default: False]\n" - " --help Show this message and exit.\n" - ) == result.output - logging.warning(result.output) - result = runner.invoke(cli, ["read"]) - assert result.exit_code > 0 - assert ( - "Error: Missing option '-b' / '--backend'. " - "Choose from:\n\tasync_es,\n\tasync_lrs,\n\tasync_mongo,\n\tasync_ws," - "\n\tclickhouse,\n\tes,\n\tfs,\n\tldp,\n\tlrs,\n\tmongo,\n\ts3,\n\tswift\n" - ) in result.output - - -def test_cli_list_command_usage(): - """Test ralph list command usage.""" - runner = CliRunner() - result = runner.invoke(cli, ["list", "--help"]) - - assert result.exit_code == 0 - assert ( - "Usage: ralph list [OPTIONS]\n\n" - " List available documents from a configured data backend.\n\n" - "Options:\n" - " -b, --backend [async_es|async_mongo|clickhouse|es|fs|ldp|mongo|s3|swift]\n" - " Backend [required]\n" - " async_es backend: \n" - " --async-es-allow-yellow-status / --no-async-es-allow-yellow-status\n" - " --async-es-client-options KEY=VALUE,KEY=VALUE\n" - " --async-es-default-index TEXT\n" - " --async-es-hosts VALUE1,VALUE2,VALUE3\n" - " --async-es-locale-encoding TEXT\n" - " --async-es-point-in-time-keep-alive TEXT\n" - " --async-es-read-chunk-size INTEGER\n" - " --async-es-refresh-after-write TEXT\n" - " --async-es-write-chunk-size INTEGER\n" - " async_mongo backend: \n" - " --async-mongo-client-options KEY=VALUE,KEY=VALUE\n" - " --async-mongo-connection-uri MONGODSN\n" - " --async-mongo-default-collection TEXT\n" - " --async-mongo-default-database TEXT\n" - " --async-mongo-locale-encoding TEXT\n" - " --async-mongo-read-chunk-size INTEGER\n" - " --async-mongo-write-chunk-size INTEGER\n" - " clickhouse backend: \n" - " --clickhouse-client-options KEY=VALUE,KEY=VALUE\n" - " --clickhouse-database TEXT\n" - " --clickhouse-event-table-name TEXT\n" - " --clickhouse-host TEXT\n" - " --clickhouse-locale-encoding TEXT\n" - " --clickhouse-password TEXT\n" - " --clickhouse-port INTEGER\n" - " --clickhouse-read-chunk-size INTEGER\n" - " --clickhouse-username TEXT\n" - " --clickhouse-write-chunk-size INTEGER\n" - " es backend: \n" - " --es-allow-yellow-status / --no-es-allow-yellow-status\n" - " --es-client-options KEY=VALUE,KEY=VALUE\n" - " --es-default-index TEXT\n" - " --es-hosts VALUE1,VALUE2,VALUE3\n" - " --es-locale-encoding TEXT\n" - " --es-point-in-time-keep-alive TEXT\n" - " --es-read-chunk-size INTEGER\n" - " --es-refresh-after-write TEXT\n" - " --es-write-chunk-size INTEGER\n" - " fs backend: \n" - " --fs-default-directory-path PATH\n" - " --fs-default-query-string TEXT\n" - " --fs-locale-encoding TEXT\n" - " --fs-read-chunk-size INTEGER\n" - " --fs-write-chunk-size INTEGER\n" - " ldp backend: \n" - " --ldp-application-key TEXT\n" - " --ldp-application-secret TEXT\n" - " --ldp-consumer-key TEXT\n" - " --ldp-default-stream-id TEXT\n" - " --ldp-endpoint TEXT\n" - " --ldp-locale-encoding TEXT\n" - " --ldp-read-chunk-size INTEGER\n" - " --ldp-request-timeout TEXT\n" - " --ldp-service-name TEXT\n" - " --ldp-write-chunk-size INTEGER\n" - " mongo backend: \n" - " --mongo-client-options KEY=VALUE,KEY=VALUE\n" - " --mongo-connection-uri MONGODSN\n" - " --mongo-default-collection TEXT\n" - " --mongo-default-database TEXT\n" - " --mongo-locale-encoding TEXT\n" - " --mongo-read-chunk-size INTEGER\n" - " --mongo-write-chunk-size INTEGER\n" - " s3 backend: \n" - " --s3-access-key-id TEXT\n" - " --s3-default-bucket-name TEXT\n" - " --s3-default-region TEXT\n" - " --s3-endpoint-url TEXT\n" - " --s3-locale-encoding TEXT\n" - " --s3-read-chunk-size INTEGER\n" - " --s3-secret-access-key TEXT\n" - " --s3-session-token TEXT\n" - " --s3-write-chunk-size INTEGER\n" - " swift backend: \n" - " --swift-auth-url TEXT\n" - " --swift-default-container TEXT\n" - " --swift-identity-api-version TEXT\n" - " --swift-locale-encoding TEXT\n" - " --swift-object-storage-url TEXT\n" - " --swift-password TEXT\n" - " --swift-project-domain-name TEXT\n" - " --swift-read-chunk-size INTEGER\n" - " --swift-region-name TEXT\n" - " --swift-tenant-id TEXT\n" - " --swift-tenant-name TEXT\n" - " --swift-username TEXT\n" - " --swift-user-domain-name TEXT\n" - " --swift-write-chunk-size INTEGER\n" - " -t, --target TEXT Container to list events from\n" - " -n, --new / -a, --all List not fetched (or all) documents\n" - " -D, --details / -I, --ids Get documents detailed output (JSON)\n" - " --help Show this message and exit.\n" - ) == result.output - - result = runner.invoke(cli, ["list"]) - assert result.exit_code > 0 - assert ( - "Error: Missing option '-b' / '--backend'. Choose from:\n\tasync_es,\n\t" - "async_mongo,\n\tclickhouse,\n\tes,\n\tfs,\n\tldp,\n\tmongo,\n\ts3," - "\n\tswift\n" - ) in result.output - - -def test_cli_write_command_usage(): - """Test ralph write command usage.""" - runner = CliRunner() - result = runner.invoke(cli, ["write", "--help"]) - - assert result.exit_code == 0 - - expected_output = ( - "Usage: ralph write [OPTIONS]\n\n" - " Write an archive to a configured backend.\n\n" - "Options:\n" - " -b, --backend " - "[async_es|async_lrs|async_mongo|clickhouse|es|fs|lrs|mongo|s3|swift]" - "\n" - " Backend [required]\n" - " async_es backend: \n" - " --async-es-allow-yellow-status / --no-async-es-allow-yellow-status\n" - " --async-es-client-options KEY=VALUE,KEY=VALUE\n" - " --async-es-default-index TEXT\n" - " --async-es-hosts VALUE1,VALUE2,VALUE3\n" - " --async-es-locale-encoding TEXT\n" - " --async-es-point-in-time-keep-alive TEXT\n" - " --async-es-read-chunk-size INTEGER\n" - " --async-es-refresh-after-write TEXT\n" - " --async-es-write-chunk-size INTEGER\n" - " async_lrs backend: \n" - " --async-lrs-base-url TEXT\n" - " --async-lrs-headers KEY=VALUE,KEY=VALUE\n" - " --async-lrs-locale-encoding TEXT\n" - " --async-lrs-password TEXT\n" - " --async-lrs-read-chunk-size INTEGER\n" - " --async-lrs-statements-endpoint TEXT\n" - " --async-lrs-status-endpoint TEXT\n" - " --async-lrs-username TEXT\n" - " --async-lrs-write-chunk-size INTEGER\n" - " async_mongo backend: \n" - " --async-mongo-client-options KEY=VALUE,KEY=VALUE\n" - " --async-mongo-connection-uri MONGODSN\n" - " --async-mongo-default-collection TEXT\n" - " --async-mongo-default-database TEXT\n" - " --async-mongo-locale-encoding TEXT\n" - " --async-mongo-read-chunk-size INTEGER\n" - " --async-mongo-write-chunk-size INTEGER\n" - " clickhouse backend: \n" - " --clickhouse-client-options KEY=VALUE,KEY=VALUE\n" - " --clickhouse-database TEXT\n" - " --clickhouse-event-table-name TEXT\n" - " --clickhouse-host TEXT\n" - " --clickhouse-locale-encoding TEXT\n" - " --clickhouse-password TEXT\n" - " --clickhouse-port INTEGER\n" - " --clickhouse-read-chunk-size INTEGER\n" - " --clickhouse-username TEXT\n" - " --clickhouse-write-chunk-size INTEGER\n" - " es backend: \n" - " --es-allow-yellow-status / --no-es-allow-yellow-status\n" - " --es-client-options KEY=VALUE,KEY=VALUE\n" - " --es-default-index TEXT\n" - " --es-hosts VALUE1,VALUE2,VALUE3\n" - " --es-locale-encoding TEXT\n" - " --es-point-in-time-keep-alive TEXT\n" - " --es-read-chunk-size INTEGER\n" - " --es-refresh-after-write TEXT\n" - " --es-write-chunk-size INTEGER\n" - " fs backend: \n" - " --fs-default-directory-path PATH\n" - " --fs-default-query-string TEXT\n" - " --fs-locale-encoding TEXT\n" - " --fs-read-chunk-size INTEGER\n" - " --fs-write-chunk-size INTEGER\n" - " lrs backend: \n" - " --lrs-base-url TEXT\n" - " --lrs-headers KEY=VALUE,KEY=VALUE\n" - " --lrs-locale-encoding TEXT\n" - " --lrs-password TEXT\n" - " --lrs-read-chunk-size INTEGER\n" - " --lrs-statements-endpoint TEXT\n" - " --lrs-status-endpoint TEXT\n" - " --lrs-username TEXT\n" - " --lrs-write-chunk-size INTEGER\n" - " mongo backend: \n" - " --mongo-client-options KEY=VALUE,KEY=VALUE\n" - " --mongo-connection-uri MONGODSN\n" - " --mongo-default-collection TEXT\n" - " --mongo-default-database TEXT\n" - " --mongo-locale-encoding TEXT\n" - " --mongo-read-chunk-size INTEGER\n" - " --mongo-write-chunk-size INTEGER\n" - " s3 backend: \n" - " --s3-access-key-id TEXT\n" - " --s3-default-bucket-name TEXT\n" - " --s3-default-region TEXT\n" - " --s3-endpoint-url TEXT\n" - " --s3-locale-encoding TEXT\n" - " --s3-read-chunk-size INTEGER\n" - " --s3-secret-access-key TEXT\n" - " --s3-session-token TEXT\n" - " --s3-write-chunk-size INTEGER\n" - " swift backend: \n" - " --swift-auth-url TEXT\n" - " --swift-default-container TEXT\n" - " --swift-identity-api-version TEXT\n" - " --swift-locale-encoding TEXT\n" - " --swift-object-storage-url TEXT\n" - " --swift-password TEXT\n" - " --swift-project-domain-name TEXT\n" - " --swift-read-chunk-size INTEGER\n" - " --swift-region-name TEXT\n" - " --swift-tenant-id TEXT\n" - " --swift-tenant-name TEXT\n" - " --swift-username TEXT\n" - " --swift-user-domain-name TEXT\n" - " --swift-write-chunk-size INTEGER\n" - " -t, --target TEXT The target container to write into\n" - " -s, --chunk-size INTEGER Get events by chunks of size #\n" - " -I, --ignore-errors Continue writing regardless of raised errors" - "\n" - " -o, --operation-type OP_TYPE Either index, create, delete, update or " - "append\n" - " -c, --concurrency INTEGER Number of chunks to write concurrently. (" - "async\n" - " backends only)\n" - " --help Show this message and exit.\n" - ) - assert expected_output == result.output - - result = runner.invoke(cli, ["write"]) - assert result.exit_code > 0 - assert ( - "Missing option '-b' / '--backend'. Choose from:\n\tasync_es,\n\tasync_lrs,\n\t" - "async_mongo,\n\tclickhouse,\n\tes,\n\tfs,\n\tlrs,\n\tmongo,\n\ts3,\n\tswift\n" - ) in result.output - - -def test_cli_runserver_command_usage(): - """Test ralph runserver command usage.""" - runner = CliRunner() - result = runner.invoke(cli, ["runserver", "--help"]) - - expected_output = ( - "Usage: ralph runserver [OPTIONS]\n\n" - " Run the API server for the development environment.\n\n" - " Starts uvicorn programmatically for convenience and documentation.\n\n" - "Options:\n" - " -b, --backend [async_es|async_mongo|clickhouse|es|fs|mongo]\n" - " Backend [required]\n" - " async_es backend: \n" - " --async-es-allow-yellow-status / --no-async-es-allow-yellow-status\n" - " --async-es-client-options KEY=VALUE,KEY=VALUE\n" - " --async-es-default-index TEXT\n" - " --async-es-hosts VALUE1,VALUE2,VALUE3\n" - " --async-es-locale-encoding TEXT\n" - " --async-es-point-in-time-keep-alive TEXT\n" - " --async-es-read-chunk-size INTEGER\n" - " --async-es-refresh-after-write TEXT\n" - " --async-es-write-chunk-size INTEGER\n" - " async_mongo backend: \n" - " --async-mongo-client-options KEY=VALUE,KEY=VALUE\n" - " --async-mongo-connection-uri MONGODSN\n" - " --async-mongo-default-collection TEXT\n" - " --async-mongo-default-database TEXT\n" - " --async-mongo-locale-encoding TEXT\n" - " --async-mongo-read-chunk-size INTEGER\n" - " --async-mongo-write-chunk-size INTEGER\n" - " clickhouse backend: \n" - " --clickhouse-client-options KEY=VALUE,KEY=VALUE\n" - " --clickhouse-database TEXT\n" - " --clickhouse-event-table-name TEXT\n" - " --clickhouse-host TEXT\n" - " --clickhouse-ids-chunk-size INTEGER\n" - " --clickhouse-locale-encoding TEXT\n" - " --clickhouse-password TEXT\n" - " --clickhouse-port INTEGER\n" - " --clickhouse-read-chunk-size INTEGER\n" - " --clickhouse-username TEXT\n" - " --clickhouse-write-chunk-size INTEGER\n" - " es backend: \n" - " --es-allow-yellow-status / --no-es-allow-yellow-status\n" - " --es-client-options KEY=VALUE,KEY=VALUE\n" - " --es-default-index TEXT\n" - " --es-hosts VALUE1,VALUE2,VALUE3\n" - " --es-locale-encoding TEXT\n" - " --es-point-in-time-keep-alive TEXT\n" - " --es-read-chunk-size INTEGER\n" - " --es-refresh-after-write TEXT\n" - " --es-write-chunk-size INTEGER\n" - " fs backend: \n" - " --fs-default-directory-path PATH\n" - " --fs-default-lrs-file TEXT\n" - " --fs-default-query-string TEXT\n" - " --fs-locale-encoding TEXT\n" - " --fs-read-chunk-size INTEGER\n" - " --fs-write-chunk-size INTEGER\n" - " mongo backend: \n" - " --mongo-client-options KEY=VALUE,KEY=VALUE\n" - " --mongo-connection-uri MONGODSN\n" - " --mongo-default-collection TEXT\n" - " --mongo-default-database TEXT\n" - " --mongo-locale-encoding TEXT\n" - " --mongo-read-chunk-size INTEGER\n" - " --mongo-write-chunk-size INTEGER\n" - " -h, --host TEXT LRS server host name\n" - " -p, --port INTEGER LRS server port\n" - " --help Show this message and exit.\n" - ) - assert result.exit_code == 0 - assert expected_output in result.output - - result = runner.invoke(cli, ["runserver"]) - assert result.exit_code > 0 - assert ( - "Missing option '-b' / '--backend'. Choose from:\n\tasync_es,\n\tasync_mongo,\n" - "\tclickhouse,\n\tes,\n\tfs,\n\tmongo\n" - ) in result.output diff --git a/tests/test_cli_write_usage.py b/tests/test_cli_write_usage.py new file mode 100644 index 000000000..2343a04ab --- /dev/null +++ b/tests/test_cli_write_usage.py @@ -0,0 +1,520 @@ +"""Tests for Ralph cli read usage strings.""" + +import re + +from click.testing import CliRunner + +from ralph.cli import cli + +from tests.fixtures.backends import ( + CLICKHOUSE_TEST_HOST, + ES_TEST_HOSTS, + MONGO_TEST_COLLECTION, + MONGO_TEST_CONNECTION_URI, +) + + +def test_cli_write_command_usage(): + """Test `ralph write` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "write --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph write BACKEND [OPTIONS] + + Write data to a configured backend. + + Options: + --help Show this message and exit. + + Commands: + async_es Asynchronous Elasticsearch data backend. + async_lrs Asynchronous LRS data backend. + async_mongo Asynchronous MongoDB data backend. + clickhouse ClickHouse database backend. + es Elasticsearch data backend. + fs FileSystem data backend. + lrs LRS data backend. + mongo MongoDB data backend. + s3 S3 data backend. + swift SWIFT data backend. + """ + assert result.output == re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) + + +def test_cli_write_async_es_command_usage(): + """Test the `ralph write async_es` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "write async_es --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph write async_es [OPTIONS] + + Asynchronous Elasticsearch data backend. + + Write data documents to the target index and return their count. + + Options: + --allow-yellow-status / --no-allow-yellow-status + Whether to consider Elasticsearch yellow + health status to be ok. [default: no-allow- + yellow-status] + --client-options KEY=VALUE,KEY=VALUE + A dictionary of valid options for the + Elasticsearch class initialization. [default: + ca_certs=None verify_certs=None] + --default-index TEXT The default index to use for querying + Elasticsearch. [default: statements] + --hosts VALUE1,VALUE2,VALUE3 The comma-separated list of Elasticsearch + nodes to connect to. [default: + ES_TEST_HOSTS] + --locale-encoding TEXT The encoding used for reading/writing + documents. [default: utf8] + --point-in-time-keep-alive TEXT + The duration for which Elasticsearch should + keep a point in time alive. [default: 1m] + --refresh-after-write TEXT Whether the Elasticsearch index should be + refreshed after the write operation. + -c, --concurrency INTEGER The number of chunks to write concurrently. If + `None` it defaults to `1`. + -o, --operation-type OP_TYPE The mode of the write operation. If + `operation_type` is `None`, the + `default_operation_type` is used instead. See + `BaseOperationType`. [default: index] + -I, --ignore-errors If `True`, errors during decoding, encoding + and sending batches of documents are ignored + and logged. If `False` (default), a + `BackendException` is raised on any error. + -s, --chunk-size INTEGER The number of documents to write in one batch. + If `chunk_size` is `None` it defaults to + `WRITE_CHUNK_SIZE`. [default: 500] + -t, --target TEXT The target Elasticsearch index name. If target + is `None`, the `DEFAULT_INDEX` is used + instead. + --help Show this message and exit. + """ + output = output.replace("ES_TEST_HOSTS", "".join(ES_TEST_HOSTS)) + assert result.output == re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) + + +def test_cli_write_async_lrs_command_usage(): + """Test the `ralph write async_lrs` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "write async_lrs --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph write async_lrs [OPTIONS] + + Asynchronous LRS data backend. + + Write `data` records to the `target` endpoint and return their count. + + Options: + --base-url TEXT LRS server URL. [default: http://0.0.0.0:8100] + --headers KEY=VALUE,KEY=VALUE Headers defined for the LRS server connection. + [default: X_EXPERIENCE_API_VERSION='1.0.3' + CONTENT_TYPE='application/json'] + --locale-encoding TEXT The encoding used for reading statements. + [default: utf8] + --password TEXT Basic auth password for LRS authentication. + [default: secret] + --statements-endpoint TEXT Default endpoint for LRS statements resource. + [default: /xAPI/statements] + --status-endpoint TEXT Endpoint used to check server status. + [default: /__heartbeat__] + --username TEXT Basic auth username for LRS authentication. + [default: ralph] + -c, --concurrency INTEGER The number of chunks to write concurrently. If + `None` it defaults to `1`. + -o, --operation-type OP_TYPE The mode of the write operation. If + `operation_type` is `None`, the + `default_operation_type` is used instead. See + `BaseOperationType`. [default: create] + -I, --ignore-errors If `True`, errors during the write operation + are ignored and logged. If `False` (default), a + `BackendException` is raised if an error + occurs. + -s, --chunk-size INTEGER The number of records or bytes to write in one + batch, depending on whether `data` contains + dictionaries or bytes. If `chunk_size` is + `None`, a default value is used instead. + [default: 500] + -t, --target TEXT Endpoint in which to write data (e.g. + `/statements`). If `target` is `None`, + `/xAPI/statements` default endpoint is used. + --help Show this message and exit. + """ + assert result.output == re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) + + +def test_cli_write_async_mongo_command_usage(): + """Test the `ralph write async_mongo` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "write async_mongo --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph write async_mongo [OPTIONS] + + Asynchronous MongoDB data backend. + + Write data documents to the target collection and return their count. + + Options: + --client-options KEY=VALUE,KEY=VALUE + A dictionary of MongoDB client options. + [default: document_class=None tz_aware=None] + --connection-uri TEXT The MongoDB connection URI. [default: + MONGO_TEST_CONNECTION_URI] + --default-collection TEXT The MongoDB database collection to get objects + from. [default: MONGO_TEST_COLLECTION] + --default-database TEXT The MongoDB database to connect to. [default: + statements] + --locale-encoding TEXT The locale encoding to use when none is + provided. [default: utf8] + -c, --concurrency INTEGER The number of chunks to write concurrently. If + `None` it defaults to `1`. + -o, --operation-type OP_TYPE The mode of the write operation. If + `operation_type` is `None`, the + `default_operation_type` is used instead. See + `BaseOperationType`. [default: index] + -I, --ignore-errors If `True`, errors during decoding, encoding + and sending batches of documents are ignored + and logged. If `False` (default), a + `BackendException` is raised on any error. + -s, --chunk-size INTEGER The number of documents to write in one batch. + If `chunk_size` is `None` it defaults to + `WRITE_CHUNK_SIZE`. [default: 500] + -t, --target TEXT The target MongoDB collection name. + --help Show this message and exit. + """ + output = output.replace("MONGO_TEST_CONNECTION_URI", MONGO_TEST_CONNECTION_URI) + output = output.replace("MONGO_TEST_COLLECTION", MONGO_TEST_COLLECTION) + assert result.output == re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) + + +def test_cli_write_clickhouse_command_usage(): + """Test the `ralph write clickhouse` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "write clickhouse --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph write clickhouse [OPTIONS] + + ClickHouse database backend. + + Write `data` documents to the `target` table and return their count. + + Options: + --client-options KEY=VALUE,KEY=VALUE + A dictionary of valid options for the + ClickHouse client connection. [default: + date_time_input_format='best_effort'] + --database TEXT ClickHouse database to connect to. [default: + xapi] + --event-table-name TEXT Table where events live. [default: + xapi_events_all] + --host TEXT ClickHouse server host to connect to. + [default: CLICKHOUSE_TEST_HOST] + --locale-encoding TEXT The locale encoding to use when none is + provided. [default: utf8] + --password TEXT Password for the given ClickHouse username + (optional). + --port INTEGER ClickHouse server port to connect to. + [default: 8123] + --username TEXT ClickHouse username to connect as (optional). + -o, --operation-type OP_TYPE The mode of the write operation. If + `operation_type` is `None`, the + `default_operation_type` is used instead. See + `BaseOperationType`. [default: create] + -I, --ignore-errors If `True`, errors during decoding, encoding + and sending batches of documents are ignored + and logged. If `False` (default), a + `BackendException` is raised on any error. + -s, --chunk-size INTEGER The number of documents to write in one batch. + If `chunk_size` is `None` it defaults to + `WRITE_CHUNK_SIZE`. [default: 500] + -t, --target TEXT The target table name. If target is `None`, + the `EVENT_TABLE_NAME` is used instead. + --help Show this message and exit. + """ + output = output.replace("CLICKHOUSE_TEST_HOST", CLICKHOUSE_TEST_HOST) + assert result.output == re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) + + +def test_cli_write_es_command_usage(): + """Test the `ralph write es` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "write es --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph write es [OPTIONS] + + Elasticsearch data backend. + + Write data documents to the target index and return their count. + + Options: + --allow-yellow-status / --no-allow-yellow-status + Whether to consider Elasticsearch yellow + health status to be ok. [default: no-allow- + yellow-status] + --client-options KEY=VALUE,KEY=VALUE + A dictionary of valid options for the + Elasticsearch class initialization. [default: + ca_certs=None verify_certs=None] + --default-index TEXT The default index to use for querying + Elasticsearch. [default: statements] + --hosts VALUE1,VALUE2,VALUE3 The comma-separated list of Elasticsearch + nodes to connect to. [default: + ES_TEST_HOSTS] + --locale-encoding TEXT The encoding used for reading/writing + documents. [default: utf8] + --point-in-time-keep-alive TEXT + The duration for which Elasticsearch should + keep a point in time alive. [default: 1m] + --refresh-after-write TEXT Whether the Elasticsearch index should be + refreshed after the write operation. + -o, --operation-type OP_TYPE The mode of the write operation. If + `operation_type` is `None`, the + `default_operation_type` is used instead. See + `BaseOperationType`. [default: index] + -I, --ignore-errors If `True`, errors during decoding, encoding + and sending batches of documents are ignored + and logged. If `False` (default), a + `BackendException` is raised on any error. + -s, --chunk-size INTEGER The number of documents to write in one batch. + If `chunk_size` is `None` it defaults to + `WRITE_CHUNK_SIZE`. [default: 500] + -t, --target TEXT The target Elasticsearch index name. If target + is `None`, the `DEFAULT_INDEX` is used + instead. + --help Show this message and exit. + """ + output = output.replace("ES_TEST_HOSTS", "".join(ES_TEST_HOSTS)) + assert result.output == re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) + + +def test_cli_write_fs_command_usage(): + """Test the `ralph write fs` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "write fs --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph write fs [OPTIONS] + + FileSystem data backend. + + Write data records to the target file and return their count. + + Options: + --default-directory-path PATH The default target directory path where to + perform list, read and write operations. + [default: .] + --default-query-string TEXT The default query string to match files for the + read operation. [default: *] + --locale-encoding TEXT The encoding used for writing dictionaries to + files. [default: utf8] + -o, --operation-type OP_TYPE The mode of the write operation. If + operation_type is `CREATE` or `INDEX`, the + target file is expected to be absent. If + the target file exists a `FileExistsError` is + raised. If operation_type is `UPDATE`, the + target file is overwritten. If operation_type + is `APPEND`, the data is appended to the + end of the target file. [default: create] + -I, --ignore-errors If `True`, errors during decoding and encoding + of records are ignored and logged. If `False` + (default), a `BackendException` is raised on + any error. + -s, --chunk-size INTEGER Ignored. [default: 4096] + -t, --target TEXT The target file path. If target is a relative + path, it is considered to be relative to the + `default_directory_path`. If target is `None`, + a random (uuid4) file is created in the + `default_directory_path` and used as the target + instead. + --help Show this message and exit. + """ + assert result.output == re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) + + +def test_cli_write_lrs_command_usage(): + """Test the `ralph write lrs` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "write lrs --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph write lrs [OPTIONS] + + LRS data backend. + + Write `data` records to the `target` endpoint and return their count. + + Options: + --base-url TEXT LRS server URL. [default: http://0.0.0.0:8100] + --headers KEY=VALUE,KEY=VALUE Headers defined for the LRS server connection. + [default: X_EXPERIENCE_API_VERSION='1.0.3' + CONTENT_TYPE='application/json'] + --locale-encoding TEXT The encoding used for reading statements. + [default: utf8] + --password TEXT Basic auth password for LRS authentication. + [default: secret] + --statements-endpoint TEXT Default endpoint for LRS statements resource. + [default: /xAPI/statements] + --status-endpoint TEXT Endpoint used to check server status. + [default: /__heartbeat__] + --username TEXT Basic auth username for LRS authentication. + [default: ralph] + -o, --operation-type OP_TYPE The mode of the write operation. If + `operation_type` is `None`, the + `default_operation_type` is used instead. See + `BaseOperationType`. [default: create] + -I, --ignore-errors If `True`, errors during the write operation + are ignored and logged. If `False` (default), a + `BackendException` is raised if an error + occurs. + -s, --chunk-size INTEGER The number of records or bytes to write in one + batch, depending on whether `data` contains + dictionaries or bytes. If `chunk_size` is + `None`, a default value is used instead. + [default: 500] + -t, --target TEXT Endpoint in which to write data (e.g. + `/statements`). If `target` is `None`, + `/xAPI/statements` default endpoint is used. + --help Show this message and exit. + """ + assert result.output == re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) + + +def test_cli_write_mongo_command_usage(): + """Test the `ralph write mongo` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "write mongo --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph write mongo [OPTIONS] + + MongoDB data backend. + + Write `data` documents to the `target` collection and return their count. + + Options: + --client-options KEY=VALUE,KEY=VALUE + A dictionary of MongoDB client options. + [default: document_class=None tz_aware=None] + --connection-uri TEXT The MongoDB connection URI. [default: + MONGO_TEST_CONNECTION_URI] + --default-collection TEXT The MongoDB database collection to get objects + from. [default: MONGO_TEST_COLLECTION] + --default-database TEXT The MongoDB database to connect to. [default: + statements] + --locale-encoding TEXT The locale encoding to use when none is + provided. [default: utf8] + -o, --operation-type OP_TYPE The mode of the write operation. If + `operation_type` is `None`, the + `default_operation_type` is used instead. See + `BaseOperationType`. [default: index] + -I, --ignore-errors If `True`, errors during decoding, encoding + and sending batches of documents are ignored + and logged. If `False` (default), a + `BackendException` is raised on any error. + -s, --chunk-size INTEGER The number of documents to write in one batch. + If `chunk_size` is `None` it defaults to + `WRITE_CHUNK_SIZE`. [default: 500] + -t, --target TEXT The target MongoDB collection name. + --help Show this message and exit. + """ + output = output.replace("MONGO_TEST_CONNECTION_URI", MONGO_TEST_CONNECTION_URI) + output = output.replace("MONGO_TEST_COLLECTION", MONGO_TEST_COLLECTION) + assert result.output == re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) + + +def test_cli_write_s3_command_usage(): + """Test the `ralph write s3` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "write s3 --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph write s3 [OPTIONS] + + S3 data backend. + + Write `data` records to the `target` bucket and return their count. + + Options: + --access-key-id TEXT The access key id for the S3 account. + --default-bucket-name TEXT The default bucket name targeted. + --default-region TEXT The default region used in instantiating the + client. + --endpoint-url TEXT The endpoint URL of the S3. + --locale-encoding TEXT The encoding used for writing dictionaries to + objects. [default: utf8] + --secret-access-key TEXT The secret key for the S3 account. + --session-token TEXT The session token for the S3 account. + -o, --operation-type OP_TYPE The mode of the write operation. If + operation_type is `CREATE` or `INDEX`, the + target object is expected to be absent. If the + target object exists a `BackendException` is + raised. [default: create] + -I, --ignore-errors If `True`, errors during decoding and encoding + of records are ignored and logged. If `False` + (default), a `BackendException` is raised on any + error. + -s, --chunk-size INTEGER The chunk size when writing objects by batch. If + `chunk_size` is `None` it defaults to + `WRITE_CHUNK_SIZE`. [default: 4096] + -t, --target TEXT The target bucket and the target object + separated by a `/`. If target is `None`, the + default bucket is used and a random (uuid4) + object is created. If target does not contain a + `/`, it is assumed to be the target object and + the default bucket is used. + --help Show this message and exit. + """ + assert result.output == re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) + + +def test_cli_write_swift_command_usage(): + """Test the `ralph write swift` command usage.""" + runner = CliRunner() + result = runner.invoke(cli, "write swift --help".split()) + + assert result.exit_code == 0 + output = """Usage: ralph write swift [OPTIONS] + + SWIFT data backend. + + Write `data` records to the `target` container and returns their count. + + Options: + --auth-url TEXT The authentication URL. [default: + https://auth.cloud.ovh.net/] + --default-container TEXT The default target container. + --identity-api-version TEXT The keystone API version to authenticate to. + [default: 3] + --locale-encoding TEXT The encoding used for reading/writing documents. + [default: utf8] + --object-storage-url TEXT The default storage URL. + --password TEXT The password of the openstack swift user. + --project-domain-name TEXT The project domain name. [default: Default] + --region-name TEXT The region where the container is. + --tenant-id TEXT The identifier of the tenant of the container. + --tenant-name TEXT The name of the tenant of the container. + --username TEXT The name of the openstack swift user. + --user-domain-name TEXT The user domain name. [default: Default] + -o, --operation-type OP_TYPE The mode of the write operation. If + `operation_type` is `None`, the + `default_operation_type` is used instead. See + `BaseOperationType`. [default: create] + -I, --ignore-errors If `True`, errors during decoding and encoding + of records are ignored and logged. If `False` + (default), a `BackendException` is raised on any + error. + -s, --chunk-size INTEGER The chunk size when writing objects by batch. If + `chunk_size` is `None` it defaults to + `WRITE_CHUNK_SIZE`. [default: 4096] + -t, --target TEXT The target container name. If `target` is + `None`, a default value is used instead. + --help Show this message and exit. + """ + assert result.output == re.sub(re.compile(r"^ {4}", re.MULTILINE), "", output) diff --git a/tests/test_dependencies.py b/tests/test_dependencies.py index 1e2adb742..ab7d83574 100644 --- a/tests/test_dependencies.py +++ b/tests/test_dependencies.py @@ -35,7 +35,7 @@ def test_dependencies_runserver_subcommand_requires_uvicorn(monkeypatch): monkeypatch.delattr(cli, "uvicorn") monkeypatch.setattr(cli, "configure_logging", lambda: None) runner = CliRunner() - result = runner.invoke(cli.cli, "runserver -b es".split()) + result = runner.invoke(cli.cli, "runserver es".split()) assert isinstance(result.exception, ModuleNotFoundError) assert str(result.exception) == ( "You need to install 'lrs' optional dependencies to use the runserver " diff --git a/tests/test_logger.py b/tests/test_logger.py index 225b63c7e..d9c417520 100644 --- a/tests/test_logger.py +++ b/tests/test_logger.py @@ -5,11 +5,10 @@ import ralph.logger from ralph.cli import cli -from ralph.conf import settings from ralph.exceptions import ConfigurationException -def test_logger_exists(fs, monkeypatch): +def test_logger_exists(monkeypatch): """Test the logging system when a correct configuration is provided.""" mock_default_config = { "version": 1, @@ -35,44 +34,39 @@ def test_logger_exists(fs, monkeypatch): }, } - fs.create_dir(str(settings.APP_DIR)) - fs.create_dir("foo") - monkeypatch.setattr(ralph.logger.settings, "LOGGING", mock_default_config) runner = CliRunner() - result = runner.invoke( - cli, - ["write", "-b", "fs", "-t", "test_file", "--fs-default-directory-path", "foo"], - input="test input", - ) - - assert result.exit_code == 0 - assert "Writing to target test_file for the configured fs backend" in result.output - assert "Backend parameters:" in result.output + with runner.isolated_filesystem(): + result = runner.invoke(cli, "write fs -t test_file".split(), input="bar") + assert result.exit_code == 0 + assert ( + "Writing to target test_file for the configured fs backend" in result.output + ) + assert "Backend parameters:" in result.output -def test_logger_no_config(fs, monkeypatch): +def test_logger_no_config(monkeypatch): """Test that an error occurs when no logging configuration exists.""" mock_default_config = None monkeypatch.setattr(ralph.logger.settings, "LOGGING", mock_default_config) runner = CliRunner() + with runner.isolated_filesystem(): + with pytest.raises(ConfigurationException): + result = runner.invoke(cli, ["list", "fs"], catch_exceptions=False) + assert result.exit_code == 1 - with pytest.raises(ConfigurationException): - result = runner.invoke(cli, ["list", "-b", "fs"], catch_exceptions=False) - assert result.exit_code == 1 - -def test_logger_bad_config(fs, monkeypatch): +def test_logger_bad_config(monkeypatch): """Test that an error occurs when a logging is improperly configured.""" mock_default_config = "this is not a valid json" monkeypatch.setattr(ralph.logger.settings, "LOGGING", mock_default_config) runner = CliRunner() - - with pytest.raises(ConfigurationException): - result = runner.invoke(cli, ["list", "-b", "fs"], catch_exceptions=False) - assert result.exit_code == 1 + with runner.isolated_filesystem(): + with pytest.raises(ConfigurationException): + result = runner.invoke(cli, ["list", "fs"], catch_exceptions=False) + assert result.exit_code == 1 diff --git a/tests/test_utils.py b/tests/test_utils.py index 45e38414c..469037c12 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -41,10 +41,10 @@ def test_utils_get_backend_class(caplog): [ # Empty options should produce default result. ({}, {"FOO": "FOO"}), - # Options not matching the backend name are ignored. - ({"foo": "bar", "not_dummy_foo": "baz"}, {"FOO": "FOO"}), - # Options matching the backend name update the defaults. - ({"dummy_foo": "bar"}, {"FOO": "bar"}), + # Not matching options are ignored. + ({"dummy_foo": "bar"}, {"FOO": "FOO"}), + # Matching options update the defaults. + ({"foo": "bar", "not_dummy_foo": "baz"}, {"FOO": "bar"}), ], ) def test_utils_get_backend_instance(options, expected):