From 79265c62506b224ba27e31815da098e821cfa0bc Mon Sep 17 00:00:00 2001 From: jake Date: Thu, 25 Apr 2024 11:26:28 -0700 Subject: [PATCH 1/9] Updated copyright year --- internetarchive/__init__.py | 6 +++--- internetarchive/api.py | 4 ++-- internetarchive/auth.py | 4 ++-- internetarchive/catalog.py | 4 ++-- internetarchive/config.py | 4 ++-- internetarchive/exceptions.py | 4 ++-- internetarchive/files.py | 4 ++-- internetarchive/iarequest.py | 4 ++-- internetarchive/item.py | 4 ++-- internetarchive/search.py | 4 ++-- internetarchive/session.py | 4 ++-- internetarchive/utils.py | 4 ++-- 12 files changed, 25 insertions(+), 25 deletions(-) diff --git a/internetarchive/__init__.py b/internetarchive/__init__.py index 473922e5..bbe10ca1 100644 --- a/internetarchive/__init__.py +++ b/internetarchive/__init__.py @@ -1,7 +1,7 @@ # # The internetarchive module is a Python/CLI interface to Archive.org. # -# Copyright (C) 2012-2019 Internet Archive +# Copyright (C) 2012-2024 Internet Archive # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -29,14 +29,14 @@ >>> item.exists True -:copyright: (C) 2012-2019 by Internet Archive. +:copyright: (C) 2012-2024 by Internet Archive. :license: AGPL 3, see LICENSE for more details. """ __title__ = 'internetarchive' __author__ = 'Jacob M. Johnson' __license__ = 'AGPL 3' -__copyright__ = 'Copyright (C) 2012-2019 Internet Archive' +__copyright__ = 'Copyright (C) 2012-2024 Internet Archive' from .__version__ import __version__ # isort:skip from internetarchive.api import ( diff --git a/internetarchive/api.py b/internetarchive/api.py index 8ebd295c..8dfebf57 100644 --- a/internetarchive/api.py +++ b/internetarchive/api.py @@ -1,7 +1,7 @@ # # The internetarchive module is a Python/CLI interface to Archive.org. # -# Copyright (C) 2012-2019 Internet Archive +# Copyright (C) 2012-2024 Internet Archive # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -22,7 +22,7 @@ This module implements the Internetarchive API. -:copyright: (C) 2012-2019 by Internet Archive. +:copyright: (C) 2012-2024 by Internet Archive. :license: AGPL 3, see LICENSE for more details. """ from __future__ import annotations diff --git a/internetarchive/auth.py b/internetarchive/auth.py index f6e7dc1e..25a94477 100644 --- a/internetarchive/auth.py +++ b/internetarchive/auth.py @@ -1,7 +1,7 @@ # # The internetarchive module is a Python/CLI interface to Archive.org. # -# Copyright (C) 2012-2019 Internet Archive +# Copyright (C) 2012-2024 Internet Archive # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -22,7 +22,7 @@ This module contains the Archive.org authentication handlers for Requests. -:copyright: (C) 2012-2019 by Internet Archive. +:copyright: (C) 2012-2024 by Internet Archive. :license: AGPL 3, see LICENSE for more details. """ from __future__ import annotations diff --git a/internetarchive/catalog.py b/internetarchive/catalog.py index d8ddd483..a5a7e42e 100644 --- a/internetarchive/catalog.py +++ b/internetarchive/catalog.py @@ -1,7 +1,7 @@ # # The internetarchive module is a Python/CLI interface to Archive.org. # -# Copyright (C) 2012-2019 Internet Archive +# Copyright (C) 2012-2024 Internet Archive # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -22,7 +22,7 @@ This module contains objects for interacting with the Archive.org catalog. -:copyright: (C) 2012-2019 by Internet Archive. +:copyright: (C) 2012-2024 by Internet Archive. :license: AGPL 3, see LICENSE for more details. """ from __future__ import annotations diff --git a/internetarchive/config.py b/internetarchive/config.py index c267d83e..cd6b0741 100644 --- a/internetarchive/config.py +++ b/internetarchive/config.py @@ -1,7 +1,7 @@ # # The internetarchive module is a Python/CLI interface to Archive.org. # -# Copyright (C) 2012-2019 Internet Archive +# Copyright (C) 2012-2024 Internet Archive # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -20,7 +20,7 @@ internetarchive.config ~~~~~~~~~~~~~~~~~~~~~~ -:copyright: (C) 2012-2019 by Internet Archive. +:copyright: (C) 2012-2024 by Internet Archive. :license: AGPL 3, see LICENSE for more details. """ from __future__ import annotations diff --git a/internetarchive/exceptions.py b/internetarchive/exceptions.py index f21a9a8f..512ecf61 100644 --- a/internetarchive/exceptions.py +++ b/internetarchive/exceptions.py @@ -1,7 +1,7 @@ # # The internetarchive module is a Python/CLI interface to Archive.org. # -# Copyright (C) 2012-2019 Internet Archive +# Copyright (C) 2012-2024 Internet Archive # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -20,7 +20,7 @@ internetarchive.exceptions ~~~~~~~~~~~~~~~~~~~~~~~~~~ -:copyright: (C) 2012-2019 by Internet Archive. +:copyright: (C) 2012-2024 by Internet Archive. :license: AGPL 3, see LICENSE for more details. """ diff --git a/internetarchive/files.py b/internetarchive/files.py index 4981651e..df470c23 100644 --- a/internetarchive/files.py +++ b/internetarchive/files.py @@ -1,7 +1,7 @@ # # The internetarchive module is a Python/CLI interface to Archive.org. # -# Copyright (C) 2012-2021 Internet Archive +# Copyright (C) 2012-2024 Internet Archive # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -20,7 +20,7 @@ internetarchive.files ~~~~~~~~~~~~~~~~~~~~~ -:copyright: (C) 2012-2019 by Internet Archive. +:copyright: (C) 2012-2024 by Internet Archive. :license: AGPL 3, see LICENSE for more details. """ import logging diff --git a/internetarchive/iarequest.py b/internetarchive/iarequest.py index 475785c9..65ffde29 100644 --- a/internetarchive/iarequest.py +++ b/internetarchive/iarequest.py @@ -1,7 +1,7 @@ # # The internetarchive module is a Python/CLI interface to Archive.org. # -# Copyright (C) 2012-2021 Internet Archive +# Copyright (C) 2012-2024 Internet Archive # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -20,7 +20,7 @@ internetarchive.iarequest ~~~~~~~~~~~~~~~~~~~~~~~~~ -:copyright: (C) 2012-2021 by Internet Archive. +:copyright: (C) 2012-2024 by Internet Archive. :license: AGPL 3, see LICENSE for more details. """ import copy diff --git a/internetarchive/item.py b/internetarchive/item.py index 7202976b..34feeeb3 100644 --- a/internetarchive/item.py +++ b/internetarchive/item.py @@ -1,7 +1,7 @@ # # The internetarchive module is a Python/CLI interface to Archive.org. # -# Copyright (C) 2012-2021 Internet Archive +# Copyright (C) 2012-2024 Internet Archive # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -20,7 +20,7 @@ internetarchive.item ~~~~~~~~~~~~~~~~~~~~ -:copyright: (C) 2012-2021 by Internet Archive. +:copyright: (C) 2012-2024 by Internet Archive. :license: AGPL 3, see LICENSE for more details. """ from __future__ import annotations diff --git a/internetarchive/search.py b/internetarchive/search.py index 975a261d..c84c93f7 100644 --- a/internetarchive/search.py +++ b/internetarchive/search.py @@ -1,7 +1,7 @@ # # The internetarchive module is a Python/CLI interface to Archive.org. # -# Copyright (C) 2012-2019 Internet Archive +# Copyright (C) 2012-2024 Internet Archive # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -23,7 +23,7 @@ This module provides objects for interacting with the Archive.org search engine. -:copyright: (C) 2012-2019 by Internet Archive. +:copyright: (C) 2012-2024 by Internet Archive. :license: AGPL 3, see LICENSE for more details. """ import itertools diff --git a/internetarchive/session.py b/internetarchive/session.py index 11634aec..2362a089 100644 --- a/internetarchive/session.py +++ b/internetarchive/session.py @@ -1,7 +1,7 @@ # # The internetarchive module is a Python/CLI interface to Archive.org. # -# Copyright (C) 2012-2021 Internet Archive +# Copyright (C) 2012-2024 Internet Archive # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -23,7 +23,7 @@ This module provides an ArchiveSession object to manage and persist settings across the internetarchive package. -:copyright: (C) 2012-2021 by Internet Archive. +:copyright: (C) 2012-2024 by Internet Archive. :license: AGPL 3, see LICENSE for more details. """ from __future__ import annotations diff --git a/internetarchive/utils.py b/internetarchive/utils.py index e9d17206..9d57bd61 100644 --- a/internetarchive/utils.py +++ b/internetarchive/utils.py @@ -1,7 +1,7 @@ # # The internetarchive module is a Python/CLI interface to Archive.org. # -# Copyright (C) 2012-2022 Internet Archive +# Copyright (C) 2012-2024 Internet Archive # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -22,7 +22,7 @@ This module provides utility functions for the internetarchive library. -:copyright: (C) 2012-2022 by Internet Archive. +:copyright: (C) 2012-2024 by Internet Archive. :license: AGPL 3, see LICENSE for more details. """ from __future__ import annotations From b8a7fc08bde4aaf50f07db7ba51b08155539cfbf Mon Sep 17 00:00:00 2001 From: jake Date: Thu, 2 May 2024 15:10:34 -0700 Subject: [PATCH 2/9] First rough pass at refactoring CLI argument parsing from docopt to argparse --- .gitignore | 2 + .pre-commit-config.yaml | 1 - HISTORY.rst | 9 + internetarchive/__version__.py | 2 +- internetarchive/cli/__init__.py | 32 +- .../cli/{argparser.py => cli_utils.py} | 79 +++- internetarchive/cli/ia.py | 273 +++++------ internetarchive/cli/ia_configure.py | 130 +++--- internetarchive/cli/ia_copy.py | 187 ++++---- internetarchive/cli/ia_delete.py | 235 ++++++---- internetarchive/cli/ia_download.py | 293 ++++++------ internetarchive/cli/ia_list.py | 155 +++++-- internetarchive/cli/ia_metadata.py | 287 ++++++------ internetarchive/cli/ia_move.py | 122 ++--- internetarchive/cli/ia_reviews.py | 118 +++-- internetarchive/cli/ia_search.py | 259 +++++++---- internetarchive/cli/ia_tasks.py | 231 +++++----- internetarchive/cli/ia_upload.py | 434 ++++++++++-------- .../{test_argparser.py => test_cli_utils.py} | 2 +- tests/cli/test_ia.py | 10 +- tests/cli/test_ia_download.py | 4 +- tests/cli/test_ia_list.py | 28 +- tests/cli/test_ia_upload.py | 44 +- 23 files changed, 1635 insertions(+), 1302 deletions(-) rename internetarchive/cli/{argparser.py => cli_utils.py} (51%) rename tests/cli/{test_argparser.py => test_cli_utils.py} (94%) diff --git a/.gitignore b/.gitignore index 179579d7..5ada9ff4 100644 --- a/.gitignore +++ b/.gitignore @@ -28,3 +28,5 @@ v3.7/ v3.8/ v3.9/ wheelhouse +ia.dist/ +ia.bin diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cfe8f628..415f3c52 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -47,7 +47,6 @@ repos: - id: mypy additional_dependencies: - tqdm-stubs - - types-docopt - types-jsonpatch - types-requests - types-setuptools diff --git a/HISTORY.rst b/HISTORY.rst index 9ff2ca95..44ac504d 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,6 +3,15 @@ Release History --------------- +5.0.0 (?) ++++++++++ + +**Features and Improvements** + +- Updated the CLI's command-line argument parsing by replacing the obsolete ``docopt`` + with the native ``argparse`` library, ensuring continued functionality + and future compatibility. + 4.0.1 (2024-04-15) ++++++++++++++++++ diff --git a/internetarchive/__version__.py b/internetarchive/__version__.py index 1a3bef53..c15240a5 100644 --- a/internetarchive/__version__.py +++ b/internetarchive/__version__.py @@ -1 +1 @@ -__version__ = '4.0.1' +__version__ = '5.0.0.dev1' diff --git a/internetarchive/cli/__init__.py b/internetarchive/cli/__init__.py index e1ff02a2..e227074c 100644 --- a/internetarchive/cli/__init__.py +++ b/internetarchive/cli/__init__.py @@ -1,7 +1,7 @@ # # The internetarchive module is a Python/CLI interface to Archive.org. # -# Copyright (C) 2012-2016, 2021 Internet Archive +# Copyright (C) 2012-2024 Internet Archive # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -20,31 +20,37 @@ internetarchive.cli ~~~~~~~~~~~~~~~~~~~ -:copyright: (C) 2012-2016, 2021 by Internet Archive. +:copyright: (C) 2012-2024 by Internet Archive. :license: AGPL 3, see LICENSE for more details. """ from internetarchive.cli import ( - argparser, + cli_utils, ia, ia_configure, + ia_copy, ia_delete, ia_download, ia_list, ia_metadata, + ia_move, + ia_reviews, ia_search, ia_tasks, ia_upload, ) __all__ = [ - 'ia', - 'ia_configure', - 'ia_delete', - 'ia_download', - 'ia_list', - 'ia_metadata', - 'ia_search', - 'ia_tasks', - 'ia_upload', - 'argparser', + "ia", + "cli_utils", + "ia_configure", + "ia_copy", + "ia_delete", + "ia_download", + "ia_list", + "ia_metadata", + "ia_move", + "ia_reviews", + "ia_search", + "ia_tasks", + "ia_upload", ] diff --git a/internetarchive/cli/argparser.py b/internetarchive/cli/cli_utils.py similarity index 51% rename from internetarchive/cli/argparser.py rename to internetarchive/cli/cli_utils.py index 1466c553..24388583 100644 --- a/internetarchive/cli/argparser.py +++ b/internetarchive/cli/cli_utils.py @@ -1,7 +1,9 @@ -# -# The internetarchive module is a Python/CLI interface to Archive.org. -# -# Copyright (C) 2012-2019 Internet Archive +""" +interneratchive.cli.cli_utils + +""" + +# Copyright (C) 2012-2024 Internet Archive # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -16,33 +18,34 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -""" -internetarchive.cli.argparser -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -:copyright: (C) 2012-2019 by Internet Archive. -:license: AGPL 3, see LICENSE for more details. -""" from __future__ import annotations +import argparse +import os import sys from collections import defaultdict from typing import Mapping from urllib.parse import parse_qsl +from internetarchive.utils import InvalidIdentifierException, validate_s3_identifier + -def get_args_dict(args: list[str], query_string: bool = False, header: bool = False) -> dict: +def get_args_dict(args: list[str], + query_string: bool = False, + header: bool = False) -> dict: args = args or [] + if not isinstance(args, list): + args = [args] metadata: dict[str, list | str] = defaultdict(list) for md in args: if query_string: - if (':' in md) and ('=' not in md): - md = md.replace(':', '=').replace(';', '&') + if (":" in md) and ("=" not in md): + md = md.replace(":", "=").replace(";", "&") for key, value in parse_qsl(md): assert value metadata[key] = value else: - key, value = md.split(':', 1) + key, value = md.split(":", 1) assert value if value not in metadata[key]: metadata[key].append(value) # type: ignore @@ -63,8 +66,8 @@ def get_args_header_dict(args: list[str]) -> dict: def get_args_dict_many_write(metadata: Mapping): changes: dict[str, dict] = defaultdict(dict) for key, value in metadata.items(): - target = '/'.join(key.split('/')[:-1]) - field = key.split('/')[-1] + target = "/".join(key.split("/")[:-1]) + field = key.split("/")[-1] if not changes[target]: changes[target] = {field: value} else: @@ -75,3 +78,45 @@ def get_args_dict_many_write(metadata: Mapping): def convert_str_list_to_unicode(str_list: list[bytes]): encoding = sys.getfilesystemencoding() return [b.decode(encoding) for b in str_list] + + +def validate_identifier(identifier): + try: + validate_s3_identifier(identifier) + except InvalidIdentifierException as e: + raise argparse.ArgumentTypeError(str(e)) + return identifier + + +def prepare_args_dict(args, parser, arg_type="metadata", many=False): + if not args: + return {} + try: + if many: + return get_args_dict_many_write([item for sublist in args for item in sublist]) + else: + if isinstance(args[0], list): + return get_args_dict([item for sublist in args for item in sublist]) + else: + return get_args_dict(args) + except ValueError as e: + parser.error(f"--{arg_type} must be formatted as --{arg_type}='key:value'") + + +def validate_dir_path(path): + """ + Check if the given path is a directory that exists. + + Args: + path (str): The path to check. + + Returns: + str: The validated directory path. + + Raises: + argparse.ArgumentTypeError: If the path is not a valid directory. + """ + if os.path.isdir(path): + return path + else: + raise argparse.ArgumentTypeError(f"'{path}' is not a valid directory") diff --git a/internetarchive/cli/ia.py b/internetarchive/cli/ia.py index e00e5b6b..83cd4f71 100755 --- a/internetarchive/cli/ia.py +++ b/internetarchive/cli/ia.py @@ -1,8 +1,11 @@ #!/usr/bin/env python -# -# The internetarchive module is a Python/CLI interface to Archive.org. -# -# Copyright (C) 2012-2019 Internet Archive +""" +ia.py + +The internetarchive module is a Python/CLI interface to Archive.org. +""" + +# Copyright (C) 2012-2024 Internet Archive # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -17,171 +20,117 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -"""A command line interface to Archive.org. - -usage: - ia [--help | --version] - ia [--config-file FILE] [--log | --debug] - [--insecure] [--host HOST] []... - -options: - -h, --help - -v, --version - -c, --config-file FILE Use FILE as config file. (Can also be set with the - IA_CONFIG_FILE environment variable. The option takes - precedence when both are used.) - -l, --log Turn on logging [default: False]. - -d, --debug Turn on verbose logging [default: False]. - -i, --insecure Use HTTP for all requests instead of HTTPS [default: false] - -H, --host HOST Host to use for requests (doesn't work for requests made to - s3.us.archive.org) [default: archive.org] - -commands: - help Retrieve help for subcommands. - configure Configure `ia`. - metadata Retrieve and modify metadata for items on Archive.org. - upload Upload items to Archive.org. - download Download files from Archive.org. - delete Delete files from Archive.org. - search Search Archive.org. - tasks Retrieve information about your Archive.org catalog tasks. - list List files in a given item. - copy Copy files in archive.org items. - move Move/rename files in archive.org items. - reviews Submit/modify reviews for archive.org items. - -Documentation for 'ia' is available at: - - https://archive.org/services/docs/api/internetarchive/cli.html - -See 'ia help ' for help on a specific command. -""" -from __future__ import annotations - -import difflib -import errno -import os +import argparse import sys -from docopt import docopt, printable_usage - -if sys.version_info < (3, 10): - from importlib_metadata import entry_points # type: ignore[import] -else: - from importlib.metadata import entry_points -from schema import Or, Schema, SchemaError # type: ignore[import] - -from internetarchive import __version__ -from internetarchive.api import get_session -from internetarchive.utils import suppress_keyboard_interrupt_message - -suppress_keyboard_interrupt_message() - - -cmd_aliases = { - 'co': 'configure', - 'md': 'metadata', - 'up': 'upload', - 'do': 'download', - 'rm': 'delete', - 'se': 'search', - 'ta': 'tasks', - 'ls': 'list', - 'cp': 'copy', - 'mv': 'move', - 're': 'reviews', -} - - -def load_ia_module(cmd: str): - """Dynamically import ia module.""" - try: - if cmd in list(cmd_aliases.keys()) + list(cmd_aliases.values()): - _module = f'internetarchive.cli.ia_{cmd}' - return __import__(_module, fromlist=['internetarchive.cli']) - else: - _module = f'ia_{cmd}' - for ep in entry_points(group='internetarchive.cli.plugins'): - if ep.name == _module: - return ep.load() - raise ImportError - except (ImportError): - print(f"error: '{cmd}' is not an ia command! See 'ia help'", - file=sys.stderr) - matches = '\t'.join(difflib.get_close_matches(cmd, cmd_aliases.values())) - if matches: - print(f'\nDid you mean one of these?\n\t{matches}', file=sys.stderr) - sys.exit(127) - - -def main() -> None: - """This is the CLI driver for ia-wrapper.""" - args = docopt(__doc__, version=__version__, options_first=True) - - # Validate args. - s = Schema({ - str: bool, - '--config-file': Or(None, str), - '--host': Or(None, str), - '': list, - '': Or(str, lambda _: 'help'), - }) - try: - args = s.validate(args) - except SchemaError as exc: - print(f'{exc}\n{printable_usage(__doc__)}', file=sys.stderr) - sys.exit(1) +from internetarchive import __version__, get_session +from internetarchive.cli import ( + ia_configure, + ia_copy, + ia_delete, + ia_download, + ia_list, + ia_metadata, + ia_move, + ia_reviews, + ia_search, + ia_tasks, + ia_upload, +) + + +def validate_config_path(path): + """ + Validate the path to the configuration file. + + Returns: + str: Validated path to the configuration file. + """ + if "configure" not in sys.argv: # Support for adding config to specific file + file_check = argparse.FileType("r") + file_check(path) + return path + + +def main(): + """ + Main entry point for the CLI. + """ + parser = argparse.ArgumentParser( + description="A command line interface to Archive.org.", + epilog=("Documentation for 'ia' is available at:\n\n\t" + "https://archive.org/developers/internetarchive/cli.html\n\n" + "See 'ia {subcommand} --help' for help on a specific subcommand."), + formatter_class=argparse.RawTextHelpFormatter) # support for \n in epilog + + parser.add_argument("-v", "--version", + action="version", + version=__version__) + parser.add_argument("-c", "--config-file", + action="store", + type=validate_config_path, + metavar="FILE", + help="path to configuration file") + parser.add_argument("-l", "--log", + action="store_true", + default=False, + help="enable logging") + parser.add_argument("-d", "--debug", + action="store_true", + help="enable debugging") + parser.add_argument("-i", "--insecure", + action="store_true", + help="allow insecure connections") + parser.add_argument("-H", "--host", + action="store", + help=("host to connect to " + "(doesn't work for requests made to s3.us.archive.org)")) + + subparsers = parser.add_subparsers(title="subcommands", + dest="subcommand", + metavar="{subcommand}") + + # Add subcommand parsers + ia_configure.setup(subparsers) + ia_copy.setup(subparsers) + ia_delete.setup(subparsers) + ia_download.setup(subparsers) + ia_list.setup(subparsers) + ia_metadata.setup(subparsers) + ia_move.setup(subparsers) + ia_reviews.setup(subparsers) + ia_search.setup(subparsers) + ia_tasks.setup(subparsers) + ia_upload.setup(subparsers) + + # Suppress help for alias subcommands + args = parser.parse_args() - # Get subcommand. - cmd = args[''] - if cmd in cmd_aliases: - cmd = cmd_aliases[cmd] - - if (cmd == 'help') or (not cmd): - if not args['']: - sys.exit(print(__doc__.strip(), file=sys.stderr)) + config: dict[str, dict] = {} + if args.log: + config["logging"] = {"level": "INFO"} + elif args.debug: + config["logging"] = {"level": "DEBUG"} + + if args.insecure: + config["general"] = {"secure": False} + if args.host: + if config.get("general"): + config["general"]["host"] = args["--host"] else: - ia_module = load_ia_module(args[''][0]) - sys.exit(print(ia_module.__doc__.strip(), file=sys.stderr)) + config["general"] = {"host": args["--host"]} - if cmd != 'configure' and args['--config-file']: - if not os.path.isfile(args['--config-file']): - print(f'--config-file should be a readable file.\n{printable_usage(__doc__)}', - file=sys.stderr) - sys.exit(1) + args.session = get_session(config_file=args.config_file, + config=config, + debug=args.debug) - argv = [cmd] + args[''] + # Check if any arguments were provided + if len(sys.argv) == 1: + parser.print_help(sys.stderr) + sys.exit(1) - config: dict[str, dict] = {} - if args['--log']: - config['logging'] = {'level': 'INFO'} - elif args['--debug']: - config['logging'] = {'level': 'DEBUG'} - - if args['--insecure']: - config['general'] = {'secure': False} - if args['--host']: - if config.get('general'): - config['general']['host'] = args['--host'] - else: - config['general'] = {'host': args['--host']} - - session = get_session(config_file=args['--config-file'], - config=config, - debug=args['--debug']) - - ia_module = load_ia_module(cmd) - try: - sys.exit(ia_module.main(argv, session)) - except OSError as e: - # Handle Broken Pipe errors. - if e.errno == errno.EPIPE: - sys.stderr.close() - sys.stdout.close() - sys.exit(0) - else: - raise + args.func(args) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/internetarchive/cli/ia_configure.py b/internetarchive/cli/ia_configure.py index cb313087..a90fe00b 100644 --- a/internetarchive/cli/ia_configure.py +++ b/internetarchive/cli/ia_configure.py @@ -1,7 +1,10 @@ -# -# The internetarchive module is a Python/CLI interface to Archive.org. -# -# Copyright (C) 2012-2019 Internet Archive +""" +ia_configure.py + +'ia' subcommand for configuring 'ia' with your archive.org credentials. +""" + +# Copyright (C) 2012-2024 Internet Archive # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -16,84 +19,95 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -"""Configure 'ia' with your Archive.org credentials. - -usage: - ia configure - ia configure --username= --password= - ia configure --print-cookies - ia configure --netrc - ia configure [--help] - -options: - -h, --help - -u, --username= Provide username as an option rather than - providing it interactively. - -p, --password= Provide password as an option rather than - providing it interactively. - -n, --netrc Use netrc file for login. - -c, --print-cookies Print archive.org logged-in-* cookies. -""" from __future__ import annotations +import argparse import netrc import sys -from docopt import docopt - -from internetarchive import ArchiveSession, configure +from internetarchive import configure from internetarchive.exceptions import AuthenticationError -def main(argv: list[str], session: ArchiveSession) -> None: - args = docopt(__doc__, argv=argv) - if args['--print-cookies']: - user = session.config.get('cookies', {}).get('logged-in-user') - sig = session.config.get('cookies', {}).get('logged-in-sig') +def setup(subparsers): + """ + Setup args for configure command. + + Args: + subparsers: subparser object passed from ia.py + """ + parser = subparsers.add_parser("configure", + aliases=["co"], + help=("configure 'ia' with your " + "archive.org credentials")) + parser.add_argument("--username", "-u", + help=("provide username as an option rather than " + "providing it interactively")) + parser.add_argument("--password", "-p", + help=("provide password as an option rather than " + "providing it interactively")) + parser.add_argument("--netrc", "-n", + action="store_true", + help="use netrc file for login") + parser.add_argument("--print-cookies", "-c", + action="store_true", + help="print archive.org logged-in-* cookies") + + parser.set_defaults(func=main) + + +def main(args: argparse.Namespace) -> None: + """ + Main entrypoint for 'ia configure'. + """ + if args.print_cookies: + user = args.session.config.get("cookies", {}).get("logged-in-user") + sig = args.session.config.get("cookies", {}).get("logged-in-sig") if not user or not sig: if not user and not sig: - print('error: "logged-in-user" and "logged-in-sig" cookies ' - 'not found in config file, try reconfiguring.', file=sys.stderr) + print("error: 'logged-in-user' and 'logged-in-sig' cookies " + "not found in config file, try reconfiguring.", file=sys.stderr) elif not user: - print('error: "logged-in-user" cookie not found in config file, ' - 'try reconfiguring.', file=sys.stderr) + print("error: 'logged-in-user' cookie not found in config file, " + "try reconfiguring.", file=sys.stderr) elif not sig: - print('error: "logged-in-sig" cookie not found in config file, ' - 'try reconfiguring.', file=sys.stderr) + print("error: 'logged-in-sig' cookie not found in config file, " + "try reconfiguring.", file=sys.stderr) sys.exit(1) - print(f'logged-in-user={user}; logged-in-sig={sig}') + print(f"logged-in-user={user}; logged-in-sig={sig}") sys.exit() try: - # CLI params. - if args['--username'] and args['--password']: - config_file_path = configure(args['--username'], - args['--password'], - config_file=session.config_file, - host=session.host) - print(f'Config saved to: {config_file_path}', file=sys.stderr) - # Netrc - elif args['--netrc']: + if args.netrc: print("Configuring 'ia' with netrc file...", file=sys.stderr) try: n = netrc.netrc() - except netrc.NetrcParseError as exc: - print('error: netrc.netrc() cannot parse your .netrc file.', file=sys.stderr) + except netrc.NetrcParseError: + print("error: netrc.netrc() cannot parse your .netrc file.", + file=sys.stderr) sys.exit(1) - username, _, password = n.hosts['archive.org'] + except FileNotFoundError: + print("error: .netrc file not found.", file=sys.stderr) + sys.exit(1) + username, _, password = n.hosts["archive.org"] config_file_path = configure(username, password or "", - config_file=session.config_file, - host=session.host) - print(f'Config saved to: {config_file_path}', file=sys.stderr) - + config_file=args.session.config_file, + host=args.session.host) + print(f"Config saved to: {config_file_path}", file=sys.stderr) # Interactive input. else: - print("Enter your Archive.org credentials below to configure 'ia'.\n") - config_file_path = configure(config_file=session.config_file, - host=session.host) - print(f'\nConfig saved to: {config_file_path}') + if not (args.username and args.password): + print("Enter your Archive.org credentials below to configure 'ia'.\n") + config_file_path = configure(args.username, + args.password, + config_file=args.session.config_file, + host=args.session.host) + saved_msg = f"Config saved to: {config_file_path}" + if not all([args.username, args.password]): + saved_msg = f"\n{saved_msg}" + print(saved_msg) except AuthenticationError as exc: - print(f'\nerror: {exc}', file=sys.stderr) + print(f"\nerror: {exc}", file=sys.stderr) sys.exit(1) diff --git a/internetarchive/cli/ia_copy.py b/internetarchive/cli/ia_copy.py index 8b3ee11a..cd978476 100644 --- a/internetarchive/cli/ia_copy.py +++ b/internetarchive/cli/ia_copy.py @@ -1,7 +1,10 @@ -# -# The internetarchive module is a Python/CLI interface to Archive.org. -# -# Copyright (C) 2012-2021 Internet Archive +""" +ia_copy.py + +'ia' subcommand for copying files on archive.org +""" + +# Copyright (C) 2012-2024 Internet Archive # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -16,132 +19,144 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -"""Copy files in archive.org items. - -usage: - ia copy / / [options]... - ia copy --help - -options: - -h, --help - -m, --metadata=... Metadata to add to your new item, if you are moving - the file to a new item. - --replace-metadata Only use metadata specified as argument, - do not copy any from the source item. - -H, --header=... S3 HTTP headers to send with your request. - --ignore-file-metadata Do not copy file metadata. - -n, --no-derive Do not derive uploaded files. - --no-backup Turn off archive.org backups. Clobbered files - will not be saved to history/files/$key.~N~ - [default: True]. -""" from __future__ import annotations +import argparse import sys +from typing import Optional from urllib.parse import quote -from docopt import docopt, printable_usage from requests import Response -from schema import And, Or, Schema, SchemaError, Use # type: ignore[import] import internetarchive as ia -from internetarchive.cli.argparser import get_args_dict +from internetarchive.cli.cli_utils import prepare_args_dict from internetarchive.utils import get_s3_xml_text, merge_dictionaries +def setup(subparsers): + """ + Setup args for copy command. + + Args: + subparsers: subparser object passed from ia.py + """ + parser = subparsers.add_parser("copy", + aliases=["cp"], + help="Copy files from archive.org items") + # Positional arguments + parser.add_argument("source", + metavar="SOURCE", + help="Source file formatted as: identifier/file") + parser.add_argument("destination", + metavar="DESTINATION", + help="Destination file formatted as: identifier/file") + + # Options + parser.add_argument("-m", "--metadata", + metavar="KEY:VALUE", + action="append", + help=("Metadata to add to your new item, if you are moving the " + "file to a new item")) + parser.add_argument("--replace-metadata", + action="store_true", + help=("Only use metadata specified as argument, do not copy any " + "from the source item")) + parser.add_argument("-H", "--header", + metavar="KEY:VALUE", + action="append", + help="S3 HTTP headers to send with your request") + parser.add_argument("--ignore-file-metadata", + action="store_true", + help="Do not copy file metadata") + parser.add_argument("-n", "--no-derive", + action="store_true", + help="Do not derive uploaded files") + parser.add_argument("--no-backup", + action="store_true", + help=("Turn off archive.org backups, " + "clobbered files will not be saved to " + "'history/files/$key.~N~'")) + + parser.set_defaults(func=lambda args: main(args, "copy", parser)) + + def assert_src_file_exists(src_location: str) -> bool: + """ + Assert that the source file exists on archive.org. + """ assert SRC_ITEM.exists # type: ignore global SRC_FILE - src_filename = src_location.split('/', 1)[-1] + src_filename = src_location.split("/", 1)[-1] SRC_FILE = SRC_ITEM.get_file(src_filename) # type: ignore assert SRC_FILE.exists # type: ignore return True -def main( - argv: list[str] | None, session: ia.session.ArchiveSession, cmd: str = 'copy' -) -> tuple[Response, ia.files.File]: - args = docopt(__doc__, argv=argv) - src_path = args['/'] - dest_path = args['/'] +def main(args: argparse.Namespace, + cmd: str, + parser: argparse.ArgumentParser) -> tuple[Response, ia.files.File | None]: + """ + Main entry point for 'ia copy'. + """ + SRC_FILE = None - # If src == dest, file gets deleted! - try: - assert src_path != dest_path - except AssertionError: - print('error: The source and destination files cannot be the same!', - file=sys.stderr) - sys.exit(1) + args.header = prepare_args_dict(args.header, parser=parser, arg_type='header') + args.metadata = prepare_args_dict(args.metadata, parser=parser, arg_type='metadata') + + if args.source == args.destination: + parser.error("error: The source and destination files cannot be the same!") global SRC_ITEM - SRC_ITEM = session.get_item(src_path.split('/')[0]) # type: ignore - - # Validate args. - s = Schema({ - str: Use(bool), - '/': And(str, And(And(str, lambda x: '/' in x, - error='Destination not formatted correctly. See usage example.'), - assert_src_file_exists, error=( - f'https://{session.host}/download/{src_path} does not exist. ' - 'Please check the identifier and filepath and retry.'))), - '/': And(str, lambda x: '/' in x, - error='Destination not formatted correctly. See usage example.'), - '--metadata': Or(None, And(Use(get_args_dict), dict), - error='--metadata must be formatted as --metadata="key:value"'), - '--replace-metadata': Use(bool), - '--header': Or(None, And(Use(get_args_dict), dict), - error='--header must be formatted as --header="key:value"'), - '--ignore-file-metadata': Use(bool), - }) + SRC_ITEM = args.session.get_item(args.source.split("/")[0]) # type: ignore try: - args = s.validate(args) - except SchemaError as exc: - # This module is sometimes called by other modules. - # Replace references to 'ia copy' in ___doc__ to 'ia {cmd}' for clarity. - usage = printable_usage(__doc__.replace('ia copy', f'ia {cmd}')) - print(f'{exc}\n{usage}', file=sys.stderr) - sys.exit(1) + assert_src_file_exists(args.source) + except AssertionError: + parser.error(f"error: https://{args.session.host}/download/{args.source} " + "does not exist. Please check the " + "identifier and filepath and retry.") - args['--header']['x-amz-copy-source'] = f'/{quote(src_path)}' + args.header['x-amz-copy-source'] = f'/{quote(args.source)}' # Copy the old metadata verbatim if no additional metadata is supplied, # else combine the old and the new metadata in a sensible manner. - if args['--metadata'] or args['--replace-metadata']: - args['--header']['x-amz-metadata-directive'] = 'REPLACE' + if args.metadata or args.replace_metadata: + args.header['x-amz-metadata-directive'] = 'REPLACE' else: - args['--header']['x-amz-metadata-directive'] = 'COPY' + args.header['x-amz-metadata-directive'] = 'COPY' # New metadata takes precedence over old metadata. - if not args['--replace-metadata']: - args['--metadata'] = merge_dictionaries(SRC_ITEM.metadata, # type: ignore - args['--metadata']) + if not args.replace_metadata: + args.metadata = merge_dictionaries(SRC_ITEM.metadata, # type: ignore + args.metadata) # File metadata is copied by default but can be dropped. - file_metadata = None if args['--ignore-file-metadata'] else SRC_FILE.metadata # type: ignore + file_metadata = None if args.ignore_file_metadata else SRC_FILE.metadata # type: ignore # Add keep-old-version by default. - if not args['--header'].get('x-archive-keep-old-version') and not args['--no-backup']: - args['--header']['x-archive-keep-old-version'] = '1' + if not args.header.get('x-archive-keep-old-version') and not args.no_backup: + args.header['x-archive-keep-old-version'] = '1' - url = f'{session.protocol}//s3.us.archive.org/{quote(dest_path)}' - queue_derive = True if args['--no-derive'] is False else False + url = f'{args.session.protocol}//s3.us.archive.org/{quote(args.destination)}' + queue_derive = not args.no_derive req = ia.iarequest.S3Request(url=url, method='PUT', - metadata=args['--metadata'], + metadata=args.metadata, file_metadata=file_metadata, - headers=args['--header'], + headers=args.header, queue_derive=queue_derive, - access_key=session.access_key, - secret_key=session.secret_key) + access_key=args.session.access_key, + secret_key=args.session.secret_key) p = req.prepare() - r = session.send(p) + r = args.session.send(p) if r.status_code != 200: try: msg = get_s3_xml_text(r.text) except Exception as e: msg = r.text - print(f'error: failed to {cmd} "{src_path}" to "{dest_path}" - {msg}', file=sys.stderr) + print(f'error: failed to {cmd} "{args.source}" to "{args.destination}" - {msg}', + file=sys.stderr) sys.exit(1) elif cmd == 'copy': - print(f'success: copied "{src_path}" to "{dest_path}".', file=sys.stderr) - return (r, SRC_FILE) # type: ignore + print(f'success: copied "{args.source}" to "{args.destination}".', + file=sys.stderr) + return (r, SRC_FILE) diff --git a/internetarchive/cli/ia_delete.py b/internetarchive/cli/ia_delete.py index cd2d81d9..1704b0ff 100644 --- a/internetarchive/cli/ia_delete.py +++ b/internetarchive/cli/ia_delete.py @@ -1,7 +1,10 @@ -# -# The internetarchive module is a Python/CLI interface to Archive.org. -# -# Copyright (C) 2012-2019 Internet Archive +""" +ia_delete.py + +'ia' subcommand for deleting files from archive.org items. +""" + +# Copyright (C) 2012-2024 Internet Archive # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -16,132 +19,162 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -"""Delete files from Archive.org. - -usage: - ia delete ... [options]... - ia delete [options]... - ia delete --help - -options: - -h, --help - -q, --quiet Print status to stdout. - -c, --cascade Delete all files associated with the specified file, - including upstream derivatives and the original. - file. - -H, --header=... S3 HTTP headers to send with your request. - -a, --all Delete all files in the given item (Note: Some files, - such as _meta.xml and _files.xml, - cannot be deleted) - -d, --dry-run Output files to be deleted to stdout, but don't actually - delete. - -g, --glob= Only delete files matching the given pattern. - -f, --format=... Only only delete files matching the specified format(s). - -R, --retries= Number of times to retry if S3 returns a 503 SlowDown - error [default: 2]. - --no-backup Turn off archive.org backups. Clobbered files - will not be saved to history/files/$key.~N~ - [default: True]. -""" +import argparse import sys import requests.exceptions -from docopt import docopt, printable_usage -from schema import And, Or, Schema, SchemaError, Use # type: ignore[import] -from internetarchive import ArchiveSession -from internetarchive.cli.argparser import convert_str_list_to_unicode, get_args_dict +from internetarchive.cli.cli_utils import ( + prepare_args_dict, + validate_identifier, +) from internetarchive.utils import get_s3_xml_text -def main(argv, session: ArchiveSession) -> None: - args = docopt(__doc__, argv=argv) - - # Validation error messages. - invalid_id_msg = (' should be between 3 and 80 characters in length, and ' - 'can only contain alphanumeric characters, underscores ( _ ), or ' - 'dashes ( - )') - - # Validate args. - s = Schema({ - str: Use(bool), - '': list, - '--format': list, - '--header': Or(None, And(Use(get_args_dict), dict), - error='--header must be formatted as --header="key:value"'), - '--glob': list, - 'delete': bool, - '--retries': Use(lambda i: int(i[0])), - '': str, - }) - try: - args = s.validate(args) - except SchemaError as exc: - print(f'{exc}\n{printable_usage(__doc__)}', file=sys.stderr) - sys.exit(1) - - verbose = True if not args['--quiet'] else False - item = session.get_item(args['']) - if not item.exists: - print('{0}: skipping, item does\'t exist.', file=sys.stderr) - - # Files that cannot be deleted via S3. - no_delete = ['_meta.xml', '_files.xml', '_meta.sqlite'] - - # Add keep-old-version by default. - if not args['--header'].get('x-archive-keep-old-version') and not args['--no-backup']: - args['--header']['x-archive-keep-old-version'] = '1' - - if verbose: - print(f'Deleting files from {item.identifier}', file=sys.stderr) - - if args['--all']: +def setup(subparsers): + """ + Setup args for delete command. + + Args: + subparsers: subparser object passed from ia.py + """ + parser = subparsers.add_parser("delete", + aliases=["rm"], + help="Delete files from archive.org items") + # Positional arguments + parser.add_argument("identifier", + type=validate_identifier, + help="Identifier for the item from which files are to be deleted.") + parser.add_argument("file", + type=str, + nargs="+", + help="Specific file(s) to delete.") + + # Optional arguments + parser.add_argument("-q", "--quiet", + action="store_true", + help="Print status to stdout.") + parser.add_argument("-c", "--cascade", + action="store_true", + help="Delete all associated files including derivatives and the original.") + parser.add_argument("-H", "--header", + nargs="+", + metavar="KEY:VALUE", + action="append", + help="S3 HTTP headers to send with your request.") + parser.add_argument("-a", "--all", + action="store_true", + help="Delete all files in the given item. Some files cannot be deleted.") + parser.add_argument("-d", "--dry-run", + action="store_true", + help=("Output files to be deleted to stdout, " + "but don't actually delete them.")) + parser.add_argument("-g", "--glob", + type=str, + help="Only delete files matching the given pattern.") + parser.add_argument("-f", "--format", + type=str, + nargs="*", + help="Only delete files matching the specified formats.") + parser.add_argument("-R", "--retries", + type=int, + default=2, + help="Number of retries on S3 503 SlowDown error.") + parser.add_argument("--no-backup", + action="store_false", + help="Turn off archive.org backups. Clobbered files will not be saved.") + + parser.set_defaults(func=lambda args: main(args, parser)) + + +def get_files_to_delete(args: argparse.Namespace, item) -> list: + """Get files to delete based on command-line arguments.""" + if args.all: files = list(item.get_files()) - args['--cascade'] = True - elif args['--glob']: - files = item.get_files(glob_pattern=args['--glob']) - elif args['--format']: - files = item.get_files(formats=args['--format']) + args.cascade = True + elif args.glob: + files = item.get_files(glob_pattern=args.glob) + elif args.format: + files = item.get_files(formats=args.format) else: - fnames = [] - if args[''] == ['-']: - fnames = [f.strip() for f in sys.stdin] - else: - fnames = [f.strip() for f in args['']] - + fnames = [f.strip() for f in (sys.stdin if args.file == ["-"] else args.file)] files = list(item.get_files(fnames)) + return files - if not files: - print(' warning: no files found, nothing deleted.', file=sys.stderr) - sys.exit(1) +def delete_files(files, args, item, verbose): + """ + Deletes files from an item. + + Args: + files (list): A list of files to delete. + args (argparse.Namespace): Parsed command-line arguments. + item: The item from which files are being deleted. + verbose (bool): If True, verbose output is enabled. + + Returns: + bool: True if errors occurred during deletion, False otherwise. + """ errors = False + # Files that cannot be deleted via S3. + no_delete = ["_meta.xml", "_files.xml", "_meta.sqlite"] + for f in files: if not f: if verbose: - print(f' error: "{f.name}" does not exist', file=sys.stderr) + print(f" error: '{f.name}' does not exist", file=sys.stderr) errors = True + continue if any(f.name.endswith(s) for s in no_delete): continue - if args['--dry-run']: - print(f' will delete: {item.identifier}/{f.name}', file=sys.stderr) + if args.dry_run: + print(f" will delete: {item.identifier}/{f.name}", file=sys.stderr) continue try: resp = f.delete(verbose=verbose, - cascade_delete=args['--cascade'], - headers=args['--header'], - retries=args['--retries']) - except requests.exceptions.RetryError as e: - print(f' error: max retries exceeded for {f.name}', file=sys.stderr) + cascade_delete=args.cascade, + headers=args.header, + retries=args.retries) + except requests.exceptions.RetryError: + print(f" error: max retries exceeded for {f.name}", file=sys.stderr) errors = True continue if resp.status_code != 204: errors = True msg = get_s3_xml_text(resp.content) - print(f' error: {msg} ({resp.status_code})', file=sys.stderr) + print(f" error: {msg} ({resp.status_code})", file=sys.stderr) continue + return errors + + +def main(args: argparse.Namespace, parser: argparse.ArgumentParser): + """ + Main entry point for 'ia delete'. + """ + args.header = prepare_args_dict(args.header, parser, arg_type="header") + + verbose = not args.quiet + item = args.session.get_item(args.identifier) + if not item.exists: + print(f"{item.identifier}: skipping, item doesn't exist.", file=sys.stderr) + return + + # Add keep-old-version by default. + if "x-archive-keep-old-version" not in args.header and not args.no_backup: + args.header["x-archive-keep-old-version"] = "1" + + if verbose: + print(f"Deleting files from {item.identifier}", file=sys.stderr) + + files = get_files_to_delete(args, item) + + if not files: + print(" warning: no files found, nothing deleted.", file=sys.stderr) + sys.exit(1) + + errors = delete_files(files, args, item, verbose) - if errors is True: + if errors: sys.exit(1) diff --git a/internetarchive/cli/ia_download.py b/internetarchive/cli/ia_download.py index 335bb89b..95434ae6 100644 --- a/internetarchive/cli/ia_download.py +++ b/internetarchive/cli/ia_download.py @@ -1,7 +1,10 @@ -# -# The internetarchive module is a Python/CLI interface to Archive.org. -# -# Copyright (C) 2012-2021 Internet Archive +""" +ia_download.py + +'ia' subcommand for downloading files from archive.org. +""" + +# Copyright (C) 2012-2024 Internet Archive # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -16,132 +19,143 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -"""Download files from Archive.org. - -usage: - ia download []... [options]... - ia download --itemlist= [options]... - ia download --search= [options]... - ia download --help - -options: - -h, --help - -q, --quiet Turn off ia's output [default: False]. - -d, --dry-run Print URLs to stdout and exit. - -i, --ignore-existing Clobber files already downloaded. - -C, --checksum Skip files based on checksum [default: False]. - -R, --retries= Set number of retries to [default: 5]. - -I, --itemlist= Download items from a specified file. Itemlists should - be a plain text file with one identifier per line. - -S, --search= Download items returned from a specified search query. - -P, --search-parameters=... Download items returned from a specified search query. - -g, --glob= Only download files whose filename matches the - given glob pattern. - -e, --exclude= Exclude files whose filename matches the given - glob pattern. - -f, --format=... Only download files of the specified format. - Use this option multiple times to download multiple - formats. - You can use the following command to retrieve - a list of file formats contained within a given - item: - - ia metadata --formats - - --on-the-fly Download on-the-fly files, as well as other matching - files. on-the-fly files include derivative EPUB, MOBI - and DAISY files [default: False]. - --no-directories Download files into working directory. Do not - create item directories. - --destdir= The destination directory to download files - and item directories to. - -s, --stdout Write file contents to stdout. - --no-change-timestamp Don't change the timestamp of downloaded files to reflect - the source material. - -p, --parameters=... Parameters to send with your query (e.g. `cnt=0`). - -a, --download-history Also download files from the history directory. - --source=... Filter files based on their source value in files.xml - (i.e. `original`, `derivative`, `metadata`). - --exclude-source=... Filter files based on their source value in files.xml - (i.e. `original`, `derivative`, `metadata`). - -t, --timeout= Set a timeout for download requests. - This sets both connect and read timeout. -""" from __future__ import annotations -import ast -import os +import argparse import sys -from os.path import exists as dir_exists from typing import TextIO -from docopt import docopt, printable_usage -from schema import And, Or, Schema, SchemaError, Use # type: ignore[import] - -from internetarchive import ArchiveSession -from internetarchive.cli.argparser import get_args_dict +from internetarchive.cli.cli_utils import prepare_args_dict, validate_dir_path, validate_identifier from internetarchive.files import File from internetarchive.search import Search -def main(argv, session: ArchiveSession) -> None: - args = docopt(__doc__, argv=argv) - - # Validation error messages. - destdir_msg = '--destdir must be a valid path to a directory.' - itemlist_msg = '--itemlist must be a valid path to an existing file.' - timeout_msg = '--timeout must be an int or float.' - - # Validate args. - s = Schema({ - str: Use(bool), - '--destdir': Or([], And(Use(lambda d: d[0]), dir_exists), error=destdir_msg), - '--format': list, - '--glob': Use(lambda item: item[0] if item else None), - '--exclude': Use(lambda item: item[0] if item else None), - '': list, - '--search': Or(str, None), - '--itemlist': Or(None, And(lambda f: os.path.isfile(f)), error=itemlist_msg), - '': Or(str, None), - '--retries': Use(lambda x: x[0]), - '--search-parameters': Use(lambda x: get_args_dict(x, query_string=True)), - '--on-the-fly': Use(bool), - '--no-change-timestamp': Use(bool), - '--download-history': Use(bool), - '--parameters': Use(lambda x: get_args_dict(x, query_string=True)), - '--source': list, - '--exclude-source': list, - '--timeout': Or([], And(Use(lambda t: ast.literal_eval(t[0])), Or(int, float), - error=timeout_msg)) - }) - - try: - args = s.validate(args) - if args['--glob'] and args['--format']: - raise SchemaError(None, '--glob and --format cannot be used together.') - elif args['--exclude'] and args['--format']: - raise SchemaError(None, '--exclude and --format cannot be used together.') - elif args['--exclude'] and not args['--glob']: - raise SchemaError(None, '--exclude should only be used in conjunction with --glob.') - - except SchemaError as exc: - print(f'{exc}\n{printable_usage(__doc__)}', file=sys.stderr) - sys.exit(1) +def setup(subparsers): + """ + Setup args for download command. + + Args: + subparsers: subparser object passed from ia.py + """ + parser = subparsers.add_parser("download", + aliases=["do"], + help="Retrieve and modify archive.org item metadata") + + # Main options + parser.add_argument("identifier", + nargs="?", + type=validate_identifier, + help="Identifier for the upload") + parser.add_argument("file", + nargs="*", + help="Files to download") + + # Additional options + parser.add_argument("-q", "--quiet", + action="store_true", + help="Turn off ia's output [default: False]") + parser.add_argument("-d", "--dry-run", + action="store_true", + help="Print URLs to stdout and exit") + parser.add_argument("-i", "--ignore-existing", + action="store_true", + help="Clobber files already downloaded") + parser.add_argument("-C", "--checksum", + action="store_true", + help="Skip files based on checksum [default: False]") + parser.add_argument("-R", "--retries", + type=int, + default=5, + help="Set number of retries to [default: 5]") + parser.add_argument("-I", "--itemlist", + type=argparse.FileType("r"), + help=("Download items from a specified file. " + "Itemlists should be a plain text file with one " + "identifier per line")) + parser.add_argument("-S", "--search", + help="Download items returned from a specified search query") + parser.add_argument("-P", "--search-parameters", + nargs="+", + help="Download items returned from a specified search query") + parser.add_argument("-g", "--glob", + help=("Only download files whose filename matches " + "the given glob pattern")) + parser.add_argument("-e", "--exclude", + help=("Exclude files whose filename matches " + "the given glob pattern")) + parser.add_argument("-f", "--format", + nargs="+", + help=("Only download files of the specified format. " + "Use this option multiple times to download " + "multiple formats. You can use the following command to " + "retrieve a list of file formats contained within a " + "given item: ia metadata --formats ")) + parser.add_argument("--on-the-fly", + action="store_true", + help=("Download on-the-fly files, as well as other " + "matching files. on-the-fly files include derivative " + "EPUB, MOBI and DAISY files [default: False]")) + parser.add_argument("--no-directories", + action="store_true", + help=("Download files into working directory. " + "Do not create item directories")) + parser.add_argument("--destdir", + type=validate_dir_path, + help=("The destination directory to download files " + "and item directories to")) + parser.add_argument("-s", "--stdout", + action="store_true", + help="Write file contents to stdout") + parser.add_argument("--no-change-timestamp", + action="store_true", + help=("Don't change the timestamp of downloaded files to reflect " + "the source material")) + parser.add_argument("-p", "--parameters", + nargs="+", + help="Parameters to send with your query (e.g. `cnt=0`)") + parser.add_argument("-a", "--download-history", + action="store_true", + help="Also download files from the history directory") + parser.add_argument("--source", + nargs="+", + help=("Filter files based on their source value in files.xml " + "(i.e. `original`, `derivative`, `metadata`)")) + parser.add_argument("--exclude-source", + nargs="+", + help=("Filter files based on their source value in files.xml " + "(i.e. `original`, `derivative`, `metadata`)")) + parser.add_argument("-t", "--timeout", + type=float, + help=("Set a timeout for download requests. " + "This sets both connect and read timeout")) + + parser.set_defaults(func=lambda args: main(args, parser)) + + +def main(args: argparse.Namespace, parser: argparse.ArgumentParser) -> None: + """ + Main entry point for 'ia download'. + """ + args.parameters = prepare_args_dict(args.parameters, + parser=parser, + arg_type="parameters") + args.search_parameters = prepare_args_dict(args.search_parameters, + parser=parser, + arg_type="search-parameters") - retries = int(args['--retries']) ids: list[File | str] | Search | TextIO - if args['--itemlist']: - with open(args['--itemlist']) as fp: + if args.itemlist: + with open(args.itemlist) as fp: ids = [x.strip() for x in fp] total_ids = len(ids) - elif args['--search']: + elif args.search: try: - _search = session.search_items(args['--search'], - params=args['--search-parameters']) + _search = args.session.search_items(args.search, + params=args.search_parameters) total_ids = _search.num_found if total_ids == 0: - print(f'error: the query "{args["--search"]}" returned no results', file=sys.stderr) + print(f'error: the query "{args.search}" returned no results', file=sys.stderr) sys.exit(1) ids = _search except ValueError as e: @@ -149,16 +163,16 @@ def main(argv, session: ArchiveSession) -> None: sys.exit(1) # Download specific files. - if args[''] and args[''] != '-': - if '/' in args['']: - identifier = args[''].split('/')[0] - files = ['/'.join(args[''].split('/')[1:])] + if args.identifier and args.identifier != '-': + if '/' in args.identifier: + identifier = args.identifier.split('/')[0] + files = ['/'.join(args.identifier.split('/')[1:])] else: - identifier = args[''] - files = args[''] + identifier = args.identifier + files = args.file total_ids = 1 ids = [identifier] - elif args[''] == '-': + elif args.identifier == '-': total_ids = 1 ids = sys.stdin files = None @@ -177,10 +191,9 @@ def main(argv, session: ArchiveSession) -> None: item_index = None try: - item = session.get_item(identifier) + item = args.session.get_item(identifier) except Exception as exc: print(f'{identifier}: failed to retrieve item metadata - errors', file=sys.stderr) - raise if 'You are attempting to make an HTTPS' in str(exc): print(f'\n{exc}', file=sys.stderr) sys.exit(1) @@ -188,29 +201,29 @@ def main(argv, session: ArchiveSession) -> None: continue # Otherwise, download the entire item. - ignore_history_dir = True if not args['--download-history'] else False + ignore_history_dir = bool(args.download_history) _errors = item.download( files=files, - formats=args['--format'], - glob_pattern=args['--glob'], - exclude_pattern=args['--exclude'], - dry_run=args['--dry-run'], - verbose=not args['--quiet'], - ignore_existing=args['--ignore-existing'], - checksum=args['--checksum'], - destdir=args['--destdir'], - no_directory=args['--no-directories'], - retries=retries, + formats=args.format, + glob_pattern=args.glob, + exclude_pattern=args.exclude, + dry_run=args.dry_run, + verbose=not args.quiet, + ignore_existing=args.ignore_existing, + checksum=args.checksum, + destdir=args.destdir, + no_directory=args.no_directories, + retries=args.retries, item_index=item_index, ignore_errors=True, - on_the_fly=args['--on-the-fly'], - no_change_timestamp=args['--no-change-timestamp'], - params=args['--parameters'], + on_the_fly=args.on_the_fly, + no_change_timestamp=args.no_change_timestamp, + params=args.parameters, ignore_history_dir=ignore_history_dir, - source=args['--source'], - exclude_source=args['--exclude-source'], - stdout=args['--stdout'], - timeout=args['--timeout'], + source=args.source, + exclude_source=args.exclude_source, + stdout=args.stdout, + timeout=args.timeout, ) if _errors: errors.append(_errors) diff --git a/internetarchive/cli/ia_list.py b/internetarchive/cli/ia_list.py index 334c493e..ae328cd4 100644 --- a/internetarchive/cli/ia_list.py +++ b/internetarchive/cli/ia_list.py @@ -1,7 +1,10 @@ -# -# The internetarchive module is a Python/CLI interface to Archive.org. -# -# Copyright (C) 2012-2019 Internet Archive +""" +ia_list.py + +'ia' subcommand for listing files from archive.org items. +""" + +# Copyright (C) 2012-2024 Internet Archive # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -16,69 +19,133 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -"""List files in a given item. - -usage: - ia list [-v] [--glob=] [--location] [--format=...] - [--columns | --all] - -options: - -h, --help - -v, --verbose Print column headers. [default: False] - -a, --all List all information available for files. - -l, --location Print full URL for each file. - -c, --columns= List specified file information. [default: name] - -g, --glob= Only return patterns match the given pattern. - -f, --format= Return files matching . -""" +import argparse import csv import sys from fnmatch import fnmatch from itertools import chain -from docopt import docopt +from internetarchive.cli.cli_utils import validate_identifier -from internetarchive import ArchiveSession +def setup(subparsers): + """ + Setup args for list command. -def main(argv, session: ArchiveSession) -> None: - args = docopt(__doc__, argv=argv) - item = session.get_item(args['']) + Args: + subparsers: subparser object passed from ia.py + """ + parser = subparsers.add_parser("list", + aliases=["ls"], + help="list files from archive.org items") - files = item.files - if args.get('--all'): - columns = list(set(chain.from_iterable(k for k in files))) + # Positional arguments + parser.add_argument("identifier", + type=validate_identifier, + help="identifier of the item") + + # Options + parser.add_argument("-v", "--verbose", + action="store_true", + help="print column headers") + parser.add_argument("-a", "--all", + action="store_true", + help="list all information available for files") + parser.add_argument("-l", "--location", + action="store_true", + help="print full URL for each file") + parser.add_argument("-c", "--columns", + action="append", + type=prepare_columns, + help="list specified file information") + parser.add_argument("-g", "--glob", + help="only return files matching the given pattern") + parser.add_argument("-f", "--format", + action="append", + help="return files matching FORMAT") + + parser.set_defaults(func=main) + + +def prepare_columns(columns): + """ + Validate the path to the configuration file. + + Returns: + str: Validated list of columns + """ + if columns: + if not isinstance(columns, list): + columns = [columns] + return list(chain.from_iterable([c.split(",") for c in columns])) + return None + + +def setup_columns(args, files): + """ + Setup and adjust columns for output based on args. + """ + if not args.columns: + args.columns = ["name"] else: - columns = args['--columns'].split(',') + args.columns = list(chain.from_iterable(args.columns)) + + if args.all: + args.columns = list(set(chain.from_iterable(k for k in files))) # Make "name" the first column always. - if 'name' in columns: - columns.remove('name') - columns.insert(0, 'name') + if "name" in args.columns: + args.columns.remove("name") + args.columns.insert(0, "name") + - dict_writer = csv.DictWriter(sys.stdout, columns, delimiter='\t', lineterminator='\n') +def filter_files(args, files, item): + """ + Filter files based on glob patterns or formats. + """ + if args.glob: + patterns = args.glob.split("|") + return [f for f in files if any(fnmatch(f["name"], p) for p in patterns)] + if args.format: + return [f.__dict__ for f in item.get_files(formats=args.format)] + return files - if args.get('--glob'): - patterns = args['--glob'].split('|') - files = [f for f in files if any(fnmatch(f['name'], p) for p in patterns)] - elif args.get('--format'): - files = [f.__dict__ for f in item.get_files(formats=args['--format'])] +def generate_output(files, args, dict_writer, item): + """ + Generate and write output based on filtered files and columns. + """ output = [] for f in files: file_dict = {} for key, val in f.items(): - if key in columns: + if key in args.columns: if isinstance(val, (list, tuple, set)): - val = ';'.join(val) - if key == 'name' and args.get('--location'): - file_dict[key] = f'https://{session.host}/download/{item.identifier}/{val}' + val = ";".join(val) + if key == "name" and args.location: + file_dict[key] = (f"https://{args.session.host}" + f"/download/{item.identifier}/{val}") else: file_dict[key] = val output.append(file_dict) - - if args['--verbose']: - dict_writer.writer.writerow(columns) + if args.verbose: + dict_writer.writer.writerow(args.columns) if all(x == {} for x in output): sys.exit(1) dict_writer.writerows(output) + + +def main(args: argparse.Namespace) -> None: + """ + Main entry point for 'ia list'. + """ + item = args.session.get_item(args.identifier) + files = item.files + + setup_columns(args, files) + files = filter_files(args, files, item) + + dict_writer = csv.DictWriter(sys.stdout, args.columns, + delimiter="\t", + lineterminator="\n") + generate_output(files, args, dict_writer, item) diff --git a/internetarchive/cli/ia_metadata.py b/internetarchive/cli/ia_metadata.py index b4833695..419812f4 100644 --- a/internetarchive/cli/ia_metadata.py +++ b/internetarchive/cli/ia_metadata.py @@ -1,7 +1,10 @@ -# -# The internetarchive module is a Python/CLI interface to Archive.org. -# -# Copyright (C) 2012-2021 Internet Archive +""" +ia_metadata.py + +'ia' subcommand for modifying and retrieving metadata from archive.org items. +""" + +# Copyright (C) 2012-2024 Internet Archive # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -16,82 +19,115 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -"""Retrieve and modify Archive.org metadata. - -usage: - ia metadata ... [--exists | --formats] [--header=...] - ia metadata ... --modify=... [--target=] - [--priority=] [--header=...] - [--timeout=] [--expect=...] - ia metadata ... --remove=... [--priority=] - [--header=...] [--timeout=] - [--expect=...] - ia metadata ... [--append=... | --append-list=...] - [--priority=] [--target=] - [--header=...] [--timeout=] - [--expect=...] - ia metadata ... --insert=... [--priority=] - [--target=] [--header=...] - [--timeout=] [--expect=...] - ia metadata --spreadsheet= [--priority=] - [--modify=...] [--header=...] [--timeout=] - [--expect=...] - ia metadata --help - -options: - -h, --help - -m, --modify= Modify the metadata of an item. - -H, --header=... S3 HTTP headers to send with your request. - -t, --target= The metadata target to modify. - -a, --append=... Append a string to a metadata element. - -A, --append-list=... Append a field to a metadata element. - -i, --insert=... Insert a value into a multi-value field given - an index (e.g. `--insert=collection[0]:foo`). - -E, --expect=... Test an expectation server-side before applying - patch to item metadata. - -s, --spreadsheet= Modify metadata in bulk using a spreadsheet as - input. - -e, --exists Check if an item exists - -F, --formats Return the file-formats the given item contains. - -p, --priority= Set the task priority. - -r, --remove=... Remove from a metadata element. - Works on both single and multi-field metadata - elements. - --timeout= Set a timeout for metadata writes. -""" from __future__ import annotations +import argparse import csv -import os import sys from collections import defaultdict from copy import copy from typing import Mapping -from docopt import docopt, printable_usage from requests import Response -from schema import And, Or, Schema, SchemaError, Use # type: ignore[import] -from internetarchive import item, session -from internetarchive.cli.argparser import ( - get_args_dict, +from internetarchive import item +from internetarchive.cli.cli_utils import ( get_args_dict_many_write, - get_args_header_dict, + prepare_args_dict, + validate_identifier, ) from internetarchive.exceptions import ItemLocateError from internetarchive.utils import json -def modify_metadata(item: item.Item, metadata: Mapping, args: Mapping) -> Response: - append = bool(args['--append']) - expect = get_args_dict(args['--expect']) - append_list = bool(args['--append-list']) - insert = bool(args['--insert']) +def setup(subparsers): + """ + Setup args for metadata command. + + Args: + subparsers: subparser object passed from ia.py + """ + parser = subparsers.add_parser("metadata", + aliases=["md"], + help="Retrieve and modify archive.org item metadata") + + parser.add_argument("identifier", + nargs="+", + type=validate_identifier, + help="Identifier for the upload") + + # Mutually exclusive group for metadata modification options + modify_group = parser.add_mutually_exclusive_group() + modify_group.add_argument("-m", "--modify", + action="append", + metavar="key:value", + help="Modify the metadata of an item") + modify_group.add_argument("-r", "--remove", + action="append", + metavar="key:value", + help="Remove key:value from a metadata element") + modify_group.add_argument("-a", "--append", + action="append", + metavar="key:value", + help="Append a string to a metadata element") + modify_group.add_argument("-A", "--append-list", + action="append", + metavar="key:value", + help="Append a field to a metadata element") + modify_group.add_argument("-i", "--insert", + action="append", + metavar="key:value", + help=("Insert a value into a multi-value field given " + "an index (e.g. `--insert=collection[0]:foo`)")) + + # Additional options + parser.add_argument("-E", "--expect", + action="append", + metavar="key:value", + help=("Test an expectation server-side before applying patch " + "to item metadata")) + parser.add_argument("-H", "--header", + action="append", + metavar="key:value", + help="S3 HTTP headers to send with your request") + parser.add_argument("-t", "--target", + metavar="target", + help="The metadata target to modify") + parser.add_argument("-s", "--spreadsheet", + metavar="metadata.csv", + help="Modify metadata in bulk using a spreadsheet as input") + parser.add_argument("-e", "--exists", + action="store_true", + help="Check if an item exists") + parser.add_argument("-F", "--formats", + action="store_true", + help="Return the file-formats the given item contains") + parser.add_argument("-p", "--priority", + metavar="priority", + help="Set the task priority") + parser.add_argument("--timeout", + metavar="value", + help="Set a timeout for metadata writes") + + parser.set_defaults(func=lambda args: main(args, parser)) + + +def modify_metadata(item: item.Item, + metadata: Mapping, + args: argparse.Namespace, + parser: argparse.ArgumentParser) -> Response: + """ + Modify metadata helper function. + """ + append = bool(args.append) + expect = prepare_args_dict(args.expect, parser=parser, arg_type="expect") + append_list = bool(args.append_list) + insert = bool(args.insert) try: - r = item.modify_metadata(metadata, target=args['--target'], append=append, - expect=expect, priority=args['--priority'], - append_list=append_list, headers=args['--header'], - insert=insert, timeout=args['--timeout']) + r = item.modify_metadata(metadata, target=args.target, append=append, + expect=expect, priority=args.priority, + append_list=append_list, headers=args.header, + insert=insert, timeout=args.timeout) assert isinstance(r, Response) # mypy: modify_metadata() -> Request | Response except ItemLocateError as exc: print(f'{item.identifier} - error: {exc}', file=sys.stderr) @@ -105,12 +141,17 @@ def modify_metadata(item: item.Item, metadata: Mapping, args: Mapping) -> Respon return r -def remove_metadata(item: item.Item, metadata: Mapping, args: Mapping) -> Response: +def remove_metadata(item: item.Item, + metadata: Mapping, + args: argparse.Namespace, + parser: argparse.ArgumentParser) -> Response: + """ + Remove metadata helper function. + """ md: dict[str, list | str] = defaultdict(list) for key in metadata: src_md = copy(item.metadata.get(key)) if not src_md: - print(f'{item.identifier}/metadata/{key} does not exist, skipping.', file=sys.stderr) continue if key == 'collection': @@ -159,11 +200,6 @@ def remove_metadata(item: item.Item, metadata: Mapping, args: Mapping) -> Respon if len(md[key]) == len(src_md): del md[key] - # Workaround to avoid empty lists or strings as values. - # TODO: Shouldn't the metadata api handle this? - if len(src_md) == 1 and metadata[key] in src_md: - md[key] = 'REMOVE_TAG' - if md.get('collection') == []: print(f'{item.identifier} - error: all collections would be removed, not submitting task.', file=sys.stderr) @@ -172,86 +208,65 @@ def remove_metadata(item: item.Item, metadata: Mapping, args: Mapping) -> Respon print(f'{item.identifier} - warning: nothing needed to be removed.', file=sys.stderr) sys.exit(0) - r = modify_metadata(item, md, args) + r = modify_metadata(item, md, args, parser) return r -def main(argv: dict, session: session.ArchiveSession) -> None: - args = docopt(__doc__, argv=argv) - - # Validate args. - s = Schema({ - str: bool, - '': list, - '--modify': list, - '--expect': list, - '--header': Or(None, And(Use(get_args_header_dict), dict), - error='--header must be formatted as --header="key:value"'), - '--append': list, - '--append-list': list, - '--insert': list, - '--remove': list, - '--spreadsheet': Or(None, And(lambda f: os.path.exists(f), - error=' should be a readable file or directory.')), - '--target': Or(None, str), - '--priority': Or(None, Use(int, error=' should be an integer.')), - '--timeout': Or(None, str), - }) - try: - args = s.validate(args) - except SchemaError as exc: - print(f'{exc}\n{printable_usage(__doc__)}', file=sys.stderr) - sys.exit(1) - +def main(args: argparse.Namespace, parser: argparse.ArgumentParser) -> None: + """ + Main entry point for 'ia metadata'. + """ formats = set() responses: list[bool | Response] = [] - for i, identifier in enumerate(args['']): - item = session.get_item(identifier) + for i, identifier in enumerate(args.identifier): + item = args.session.get_item(identifier) # Check existence of item. - if args['--exists']: + if args.exists: if item.exists: responses.append(True) print(f'{identifier} exists', file=sys.stderr) else: responses.append(False) print(f'{identifier} does not exist', file=sys.stderr) - if (i + 1) == len(args['']): + if (i + 1) == len(args.identifier): if all(r is True for r in responses): sys.exit(0) else: sys.exit(1) # Modify metadata. - elif (args['--modify'] or args['--append'] or args['--append-list'] - or args['--remove'] or args['--insert']): - if args['--modify']: - metadata_args = args['--modify'] - elif args['--append']: - metadata_args = args['--append'] - elif args['--append-list']: - metadata_args = args['--append-list'] - elif args['--insert']: - metadata_args = args['--insert'] - if args['--remove']: - metadata_args = args['--remove'] - try: - metadata = get_args_dict(metadata_args) - if any('/' in k for k in metadata): - metadata = get_args_dict_many_write(metadata) - except ValueError: - print('error: The value of --modify, --remove, --append, --append-list ' - 'or --insert is invalid. It must be formatted as: ' - '--modify=key:value', - file=sys.stderr) - sys.exit(1) + elif (args.modify or args.append or args.append_list + or args.remove or args.insert): + if args.modify: + metadata = prepare_args_dict(args.modify, + parser=parser, + arg_type="modify") + elif args.append: + metadata = prepare_args_dict(args.append, + parser=parser, + arg_type="append") + elif args.append_list: + metadata = prepare_args_dict(args.append_list, + parser=parser, + arg_type="append-list") + elif args.insert: + metadata = prepare_args_dict(args.insert, + parser=parser, + arg_type="insert") + if args.remove: + metadata = prepare_args_dict(args.remove, + parser=parser, + arg_type="remove") + if any('/' in k for k in metadata): + metadata = get_args_dict_many_write(metadata) - if args['--remove']: - responses.append(remove_metadata(item, metadata, args)) + if args.remove: + responses.append(remove_metadata(item, metadata, args, parser)) else: - responses.append(modify_metadata(item, metadata, args)) - if (i + 1) == len(args['']): + responses.append(modify_metadata(item, metadata, args, parser)) + if (i + 1) == len(args.identifier): if all(r.status_code == 200 for r in responses): # type: ignore sys.exit(0) else: @@ -267,10 +282,10 @@ def main(argv: dict, session: session.ArchiveSession) -> None: sys.exit(1) # Get metadata. - elif args['--formats']: + elif args.formats: for f in item.get_files(): formats.add(f.format) - if (i + 1) == len(args['']): + if (i + 1) == len(args.identifier): print('\n'.join(formats)) # Dump JSON to stdout. @@ -279,20 +294,20 @@ def main(argv: dict, session: session.ArchiveSession) -> None: print(metadata_str) # Edit metadata for items in bulk, using a spreadsheet as input. - if args['--spreadsheet']: - if not args['--priority']: - args['--priority'] = -5 - with open(args['--spreadsheet'], newline='', encoding='utf-8') as csvfp: + if args.spreadsheet: + if not args.priority: + args.priority = -5 + with open(args.spreadsheet, newline='', encoding='utf-8') as csvfp: spreadsheet = csv.DictReader(csvfp) responses = [] for row in spreadsheet: if not row['identifier']: continue - item = session.get_item(row['identifier']) + item = args.session.get_item(row['identifier']) if row.get('file'): del row['file'] metadata = {k.lower(): v for k, v in row.items() if v} - responses.append(modify_metadata(item, metadata, args)) + responses.append(modify_metadata(item, metadata, args, parser)) if all(r.status_code == 200 for r in responses): # type: ignore sys.exit(0) diff --git a/internetarchive/cli/ia_move.py b/internetarchive/cli/ia_move.py index b2c97972..7095446a 100644 --- a/internetarchive/cli/ia_move.py +++ b/internetarchive/cli/ia_move.py @@ -1,7 +1,10 @@ -# -# The internetarchive module is a Python/CLI interface to Archive.org. -# -# Copyright (C) 2012-2019 Internet Archive +""" +ia_move.py + +'ia' subcommand for moving files on archive.org +""" + +# Copyright (C) 2012-2024 Internet Archive # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -16,66 +19,79 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -"""Move and rename files in archive.org items. +import argparse +import sys + +from internetarchive.cli import ia_copy +from internetarchive.cli.cli_utils import prepare_args_dict -usage: - ia move / / [options]... - ia move --help -options: - -h, --help - -m, --metadata=... Metadata to add to your new item, if you are moving - the file to a new item. - -H, --header=... S3 HTTP headers to send with your request. - -n, --no-derive Do not derive uploaded files. - --no-backup Turn off archive.org backups. Clobbered files - will not be saved to history/files/$key.~N~ - [default: True]. -""" -import sys +def setup(subparsers): + """ + Setup args for move command. -from docopt import docopt, printable_usage -from schema import And, Or, Schema, SchemaError, Use # type: ignore[import] + Args: + subparsers: subparser object passed from ia.py + """ + parser = subparsers.add_parser("move", + aliases=["mv"], + help="Move and rename files in archive.org items") -from internetarchive import ArchiveSession -from internetarchive.cli import ia_copy -from internetarchive.cli.argparser import get_args_dict + # Positional arguments + parser.add_argument('source', + metavar='SOURCE', + help="Source file formatted as: identifier/file") + parser.add_argument('destination', + metavar='DESTINATION', + help="Destination file formatted as: identifier/file") + # Options + parser.add_argument('-m', '--metadata', + metavar='KEY:VALUE', + action='append', + help=("Metadata to add to your new item, " + "if you are moving the file to a new item")) + parser.add_argument('-H', '--header', + metavar='KEY:VALUE', + action='append', + help="S3 HTTP headers to send with your request") + parser.add_argument("--replace-metadata", + action="store_true", + help=("Only use metadata specified as argument, do not copy any " + "from the source item")) + parser.add_argument("--ignore-file-metadata", + action="store_true", + help="Do not copy file metadata") + parser.add_argument('-n', '--no-derive', + action='store_true', + help="Do not derive uploaded files") + parser.add_argument('--no-backup', + action='store_true', + help=("Turn off archive.org backups, " + 'clobbered files will not be saved to "history/files/$key.~N~"')) -def main(argv, session: ArchiveSession) -> None: - args = docopt(__doc__, argv=argv) - src_path = args['/'] - dest_path = args['/'] + parser.set_defaults(func=lambda args: main(args, parser)) - # Validate args. - s = Schema({ - str: Use(bool), - '--metadata': list, - '--header': Or(None, And(Use(get_args_dict), dict), - error='--header must be formatted as --header="key:value"'), - '/': And(str, lambda x: '/' in x, - error='Source not formatted correctly. See usage example.'), - '/': And(str, lambda x: '/' in x, - error='Destination not formatted correctly. See usage example.'), - }) - try: - args = s.validate(args) - except SchemaError as exc: - print(f'{exc}\n{printable_usage(__doc__)}', file=sys.stderr) - sys.exit(1) - # Add keep-old-version by default. - if not args['--header'].get('x-archive-keep-old-version') and not args['--no-backup']: - args['--header']['x-archive-keep-old-version'] = '1' +def main(args: argparse.Namespace, parser: argparse.ArgumentParser) -> None: + """ + Main entry point for ia move command. + """ + args.header = prepare_args_dict(args.header, parser=parser, arg_type='header') + args.metadata = prepare_args_dict(args.metadata, parser=parser, arg_type='metadata') - # First we use ia_copy, prep argv for ia_copy. - argv.pop(0) - argv = ['copy'] + argv + # Add keep-old-version by default. + if not args.header.get('x-archive-keep-old-version') and not args.no_backup: + args.header['x-archive-keep-old-version'] = '1' # Call ia_copy. - r, src_file = ia_copy.main(argv, session, cmd='move') - dr = src_file.delete(headers=args['--header'], cascade_delete=True) + _, src_file = ia_copy.main(args, cmd='move', parser=parser) + if src_file: + dr = src_file.delete(headers=args.header, cascade_delete=True) + else: + print(f'error: {src_file} does not exist', file=sys.stderr) + sys.exit(1) if dr.status_code == 204: - print(f'success: moved {src_path} to {dest_path}', file=sys.stderr) + print(f'success: moved {args.source} to {args.destination}', file=sys.stderr) sys.exit(0) print(f'error: {dr.content}', file=sys.stderr) diff --git a/internetarchive/cli/ia_reviews.py b/internetarchive/cli/ia_reviews.py index 759ee536..7f6b6770 100644 --- a/internetarchive/cli/ia_reviews.py +++ b/internetarchive/cli/ia_reviews.py @@ -1,7 +1,10 @@ -# -# The internetarchive module is a Python/CLI interface to Archive.org. -# -# Copyright (C) 2012-2019 Internet Archive +""" +ia_reviews.py + +'ia' subcommand for listing, submitting, and deleting reviews for archive.org items. +""" + +# Copyright (C) 2012-2024 Internet Archive # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -16,53 +19,71 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -"""Submit and modify reviews for archive.org items. +import argparse +import sys -For more information on how to use this command, refer to the -Reviews API documentation:: +from requests.exceptions import HTTPError - https://archive.org/services/docs/api/reviews.html +from internetarchive.cli.cli_utils import validate_identifier -usage: - ia reviews - ia reviews --delete [--username= | --screenname= - | --itemname=] - ia reviews --title= --body=<body> [--stars=<stars>] - ia reviews --help -options: - -h, --help - -t, --title=<title> The title of your review. - -b, --body=<body> The body of your review. - -s, --stars=<stars> The number of stars for your review. - -d, --delete Delete your review. [default: False] - -u, --username=<username> Delete reviews for a specific user - given username (must be used with --delete). - -S, --screenname=<screenname> Delete reviews for a specific user - given screenname (must be used with --delete). - -I, --itemname=<itemname> Delete reviews for a specific user - given itemname (must be used with --delete). +def setup(subparsers): + """ + Setup args for list command. -examples: - ia reviews nasa -""" -import sys + Args: + subparsers: subparser object passed from ia.py + """ + parser = subparsers.add_parser("reviews", + aliases=["re"], + help="submit and modify reviews for archive.org items") -from docopt import docopt -from requests.exceptions import HTTPError + # Positional arguments + parser.add_argument("identifier", + type=validate_identifier, + help="identifier of the item") + + # Options + parser.add_argument("-d", "--delete", + action="store_true", + help="delete your review") + parser.add_argument("-t", "--title", + type=str, + help="the title of your review") + parser.add_argument("-b", "--body", + type=str, + help="the body of your review") + parser.add_argument("-s", "--stars", + type=int, + help="the number of stars for your review") -from internetarchive import ArchiveSession + # Conditional arguments that require --delete + delete_group = parser.add_argument_group("delete options", + ("these options are used with " + "the --delete flag")) + delete_group.add_argument("-u", "--username", + type=str, + help="delete reviews for a specific user given USERNAME") + delete_group.add_argument("-S", "--screenname", + type=str, + help="delete reviews for a specific user given SCREENNAME") + delete_group.add_argument("-I", "--itemname", + type=str, + help="delete reviews for a specific user given ITEMNAME") + parser.set_defaults(func=lambda args: main(args, parser)) -def main(argv, session: ArchiveSession) -> None: - args = docopt(__doc__, argv=argv) - item = session.get_item(args['<identifier>']) - if args['--delete']: - r = item.delete_review(username=args['--username'], - screenname=args['--screenname'], - itemname=args['--itemname']) - elif not args['--body']: +def main(args: argparse.Namespace, parser: argparse.ArgumentParser) -> None: + """ + Main entry point for 'ia reviews'. + """ + item = args.session.get_item(args.identifier) + if args.delete: + r = item.delete_review(username=args.username, + screenname=args.screenname, + itemname=args.itemname) + elif not args.body and not args.title: try: r = item.get_review() print(r.text) @@ -73,16 +94,19 @@ def main(argv, session: ArchiveSession) -> None: else: raise exc else: - r = item.review(args['--title'], args['--body'], args['--stars']) + if (args.title and not args.body) or (args.body and not args.title): + parser.error("both --title and --body must be provided") + r = item.review(args.title, args.body, args.stars) j = r.json() - if j.get('success') or 'no change detected' in j.get('error', '').lower(): - task_id = j.get('value', {}).get('task_id') + if j.get("success") or "no change detected" in j.get("error", "").lower(): + task_id = j.get("value", {}).get("task_id") if task_id: - print(f'{item.identifier} - success: https://catalogd.archive.org/log/{task_id}', + print((f"{item.identifier} - success: " + f"https://catalogd.archive.org/log/{task_id}"), file=sys.stderr) else: - print(f'{item.identifier} - warning: no changes detected!', file=sys.stderr) + print(f"{item.identifier} - warning: no changes detected!", file=sys.stderr) sys.exit(0) else: - print(f'{item.identifier} - error: {j.get("error")}', file=sys.stderr) + print(f"{item.identifier} - error: {j.get('error')}", file=sys.stderr) sys.exit(1) diff --git a/internetarchive/cli/ia_search.py b/internetarchive/cli/ia_search.py index bc121216..aceaf542 100644 --- a/internetarchive/cli/ia_search.py +++ b/internetarchive/cli/ia_search.py @@ -1,7 +1,10 @@ -# -# The internetarchive module is a Python/CLI interface to Archive.org. -# -# Copyright (C) 2012-2019 Internet Archive +""" +ia_search.py + +'ia' subcommand for searching items on archive.org. +""" + +# Copyright (C) 2012-2024 Internet Archive # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -16,110 +19,184 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. -"""Search items on Archive.org. - -usage: - ia search <query>... [options]... - ia search --help - -options: - -h, --help - -p, --parameters=<key:value>... Parameters to send with your query. - -H, --header=<key:value>... Add custom headers to your search request. - -s, --sort=<field order>... Sort search results by specified fields. - <order> can be either "asc" for ascending - and "desc" for descending. - -i, --itemlist Output identifiers only. - -f, --field=<field>... Metadata fields to return. - -n, --num-found Print the number of results to stdout. - -F, --fts Beta support for querying the archive.org - full text search API. - -D, --dsl-fts Submit --fts query in dsl [default: False]. - -t, --timeout=<seconds> Set the timeout in seconds [default: 300]. - -examples: - - ia search 'collection:nasa' --parameters rows:1 -""" from __future__ import annotations +import argparse import sys from itertools import chain -from docopt import docopt, printable_usage from requests.exceptions import ConnectTimeout, ReadTimeout -from schema import And, Or, Schema, SchemaError, Use # type: ignore[import] -from internetarchive import ArchiveSession, search_items -from internetarchive.cli.argparser import get_args_dict +from internetarchive.cli.cli_utils import prepare_args_dict from internetarchive.exceptions import AuthenticationError from internetarchive.utils import json -def main(argv, session: ArchiveSession | None = None) -> None: - args = docopt(__doc__, argv=argv) - - # Validate args. - s = Schema({ - str: Use(bool), - '<query>': Use(lambda x: ' '.join(x)), - '--parameters': Use(lambda x: get_args_dict(x, query_string=True)), - '--header': Or(None, And(Use(get_args_dict), dict), - error='--header must be formatted as --header="key:value"'), - '--sort': list, - '--field': list, - '--timeout': Use(lambda x: float(x[0]), - error='--timeout must be integer or float.') - }) - try: - args = s.validate(args) - except SchemaError as exc: - print(f'{exc}\n{printable_usage(__doc__)}', file=sys.stderr) - sys.exit(1) +def setup(subparsers): + """ + Setup args for search command. + + Args: + subparsers: subparser object passed from ia.py + """ + parser = subparsers.add_parser("search", + aliases=["se"], + help="Search items on archive.org") + + # Positional arguments + parser.add_argument("query", + type=str, + help="Search query or queries.") + + # Optional arguments + parser.add_argument("-p", "--parameters", + nargs="+", + metavar="KEY:VALUE", + action="append", + help="Parameters to send with your query.") + parser.add_argument("-H", "--header", + nargs="+", + metavar="KEY:VALUE", + action="append", + help="Add custom headers to your search request.") + parser.add_argument("-s", "--sort", + action="append", + help="Sort search results by specified fields.") + parser.add_argument("-i", "--itemlist", + action="store_true", + help="Output identifiers only.") + parser.add_argument("-f", "--field", + action="append", + help="Metadata fields to return.") + parser.add_argument("-n", "--num-found", + action="store_true", + help="Print the number of results to stdout.") + parser.add_argument("-F", "--fts", + action="store_true", + help="Beta support for querying the archive.org full text search API.") + parser.add_argument("-D", "--dsl-fts", + action="store_true", + help="Submit --fts query in dsl.") + parser.add_argument("-t", "--timeout", + type=float, + default=300, + help="Set the timeout in seconds.") + + + parser.set_defaults(func=lambda args: main(args, parser)) + + +def prepare_values(value): + """ + Prepare comma-separated values based on the input value. + """ + if value: + return list(chain.from_iterable([x.split(",") for x in value])) + return None + + +def perform_search(args, fields, sorts, r_kwargs): + """ + Perform the search using the provided arguments and request kwargs. + """ + return args.session.search_items(args.query, # type: ignore + fields=fields, + sorts=sorts, + params=args.parameters, + full_text_search=args.fts, + dsl_fts=args.dsl_fts, + request_kwargs=r_kwargs) + + +def handle_search_results(args, search): + """ + Handle search results based on command-line arguments. + """ + if args.num_found: + print(search.num_found) + sys.exit(0) + + for result in search: + if args.itemlist: + if args.fts or args.dsl_fts: + print('\n'.join(result.get('fields', {}).get('identifier'))) + else: + print(result.get('identifier', '')) + else: + print(json.dumps(result)) + if result.get("error"): + sys.exit(1) + + +def handle_value_error(exc): + """ + Handle ValueError exception. + """ + return f"error: {exc}" + - # Support comma separated values. - fields = list(chain.from_iterable([x.split(',') for x in args['--field']])) - sorts = list(chain.from_iterable([x.split(',') for x in args['--sort']])) +def handle_connect_timeout(): + """ + Handle ConnectTimeout exception. + """ + return "error: Request timed out. Increase the --timeout and try again." - r_kwargs = { - 'headers': args['--header'], - 'timeout': args['--timeout'], - } - search = session.search_items(args['<query>'], # type: ignore - fields=fields, - sorts=sorts, - params=args['--parameters'], - full_text_search=args['--fts'], - dsl_fts=args['--dsl-fts'], - request_kwargs=r_kwargs) +def handle_read_timeout(): + """ + Handle ReadTimeout exception. + """ + return "error: The server timed out and failed to return all search results, please try again" + +def handle_authentication_error(exc): + """ + Handle AuthenticationError exception. + """ + return f"error: {exc}" + + +def main(args: argparse.Namespace, parser: argparse.ArgumentParser) -> None: + """ + Main entry point for 'ia search'. + """ try: - if args['--num-found']: - print(search.num_found) - sys.exit(0) - - for result in search: - if args['--itemlist']: - if args['--fts'] or args['--dsl-fts']: - print('\n'.join(result.get('fields', {}).get('identifier'))) - else: - print(result.get('identifier', '')) - else: - j = json.dumps(result) - print(j) - if result.get('error'): - sys.exit(1) - except ValueError as e: - print(f'error: {e}', file=sys.stderr) - except ConnectTimeout as exc: - print('error: Request timed out. Increase the --timeout and try again.', - file=sys.stderr) + # Validate args. + args.parameters = prepare_args_dict(args.parameters, parser=parser) + args.header = prepare_args_dict(args.header, parser=parser) + + # Prepare fields and sorts. + fields = prepare_values(args.field) + sorts = prepare_values(args.sort) + + # Prepare request kwargs. + r_kwargs = { + "headers": args.header, + "timeout": args.timeout, + } + + # Perform search. + search = perform_search(args, fields, sorts, r_kwargs) + + # Handle search results. + handle_search_results(args, search) + + except ValueError as exc: + error_message = handle_value_error(exc) + print(error_message, file=sys.stderr) + sys.exit(1) + + except ConnectTimeout: + error_message = handle_connect_timeout() + print(error_message, file=sys.stderr) sys.exit(1) - except ReadTimeout as exc: - print('error: The server timed out and failed to return all search results,' - ' please try again', file=sys.stderr) + + except ReadTimeout: + error_message = handle_read_timeout() + print(error_message, file=sys.stderr) sys.exit(1) + except AuthenticationError as exc: - print(f'error: {exc}', file=sys.stderr) + error_message = handle_authentication_error(exc) + print(error_message, file=sys.stderr) sys.exit(1) diff --git a/internetarchive/cli/ia_tasks.py b/internetarchive/cli/ia_tasks.py index 9b61ffd7..64e6437c 100644 --- a/internetarchive/cli/ia_tasks.py +++ b/internetarchive/cli/ia_tasks.py @@ -1,7 +1,10 @@ -# -# The internetarchive module is a Python/CLI interface to Archive.org. -# -# Copyright (C) 2012-2021 Internet Archive +""" +ia_tasks.py + +'ia' subcommand for retrieving information about archive.org catalog tasks. +""" + +# Copyright (C) 2012-2024 Internet Archive # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -16,102 +19,114 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. -"""Retrieve information about your catalog tasks. - -For more information on how to use this command, refer to the -Tasks API documentation:: - - https://archive.org/services/docs/api/tasks.html - -usage: - ia tasks [--task=<task_id>...] [--get-task-log=<task_id>] - [--parameter=<k:v>...] [--tab-output] - ia tasks <identifier> [--parameter=<k:v>...] [--tab-output] - ia tasks <identifier> --cmd=<command> [--comment=<comment>] - [--task-args=<k:v>...] [--data=<k:v>...] - [--tab-output] [--reduced-priority] - ia tasks --get-rate-limit --cmd=<command> - ia tasks --help - -options: - -h, --help - -t, --task=<task_id>... Return information about the given task. - -G, --get-task-log=<task_id> Return the given tasks task log. - -p, --parameter=<k:v>... URL parameters passed to catalog.php. - -c, --cmd=<command> The task to submit (e.g. make_dark.php). - -C, --comment=<comment> A reasonable explanation for why a - task is being submitted. - -T, --tab-output Output task info in tab-delimited columns. - -a, --task-args=<k:v>... Args to submit to the Tasks API. - -r, --reduced-priority Submit task at a reduced priority. - Note that it may take a very long time for - your task to run after queued when this setting - is used [default: False]. - -l, --get-rate-limit Get rate limit info. - -d, --data=<k:v>... Additional data to send when submitting - a task. - -examples: - ia tasks nasa - ia tasks nasa -p cmd:derive.php # only return derive.php tasks - ia tasks -p 'args:*s3-put*' # return all S3 tasks - ia tasks -p 'submitter=jake@archive.org' # return all tasks submitted by a user - ia tasks --get-task-log 1178878475 # get a task log for a specific task - - ia tasks <id> --cmd make_undark.php --comment '<comment>' # undark item - ia tasks <id> --cmd make_dark.php --comment '<comment>' # dark item - ia tasks <id> --cmd fixer.php --task-args noop:1 # submit a noop fixer.php task - ia tasks <id> --cmd fixer.php --task-args 'noop:1;asr:1' # submit multiple fixer ops - ia tasks --get-rate-limit --cmd derive.php # Get rate-limit information for a specific command -""" +import argparse import sys import warnings -from docopt import docopt - -from internetarchive import ArchiveSession -from internetarchive.cli.argparser import get_args_dict +from internetarchive.cli.cli_utils import prepare_args_dict from internetarchive.utils import json -def main(argv, session: ArchiveSession) -> None: - args = docopt(__doc__, argv=argv) +def setup(subparsers): + """ + Setup args for tasks command. + + Args: + subparsers: subparser object passed from ia.py + """ + parser = subparsers.add_parser("tasks", + aliases=["ta"], + help="Retrieve information about your archive.org catalog tasks") + + parser.add_argument("-t", "--task", + nargs='*', + help="Return information about the given task.") + parser.add_argument("-G", "--get-task-log", + help="Return the given tasks task log.") + parser.add_argument("-p", "--parameter", + nargs="+", + metavar="KEY:VALUE", + action='append', + help="URL parameters passed to catalog.php.") + parser.add_argument("-T", "--tab-output", + action='store_true', + help="Output task info in tab-delimited columns.") + parser.add_argument("-c", "--cmd", + type=str, + help="The task to submit (e.g., make_dark.php).") + parser.add_argument("-C", "--comment", + type=str, + help="A reasonable explanation for why a task is being submitted.") + parser.add_argument("-a", "--task-args", + nargs="+", + metavar="KEY:VALUE", + action='append', + help="Args to submit to the Tasks API.") + parser.add_argument("-d", "--data", + nargs="+", + metavar="KEY:VALUE", + action='append', + help="Additional data to send when submitting a task.") + parser.add_argument("-r", "--reduced-priority", + action='store_true', + help="Submit task at a reduced priority.") + parser.add_argument("-l", "--get-rate-limit", + action='store_true', + help="Get rate limit info.") + parser.add_argument("identifier", + type=str, + nargs='?', + help="Identifier for tasks specific operations.") + + parser.set_defaults(func=lambda args: main(args, parser)) + + +def handle_task_submission_result(result, cmd): + """ + Handle the result of a task submission. + """ + if result.get('success'): + task_log_url = result.get('value', {}).get('log') + print(f'success: {task_log_url}', file=sys.stderr) + elif 'already queued/running' in result.get('error', ''): + print(f'success: {cmd} task already queued/running', file=sys.stderr) + else: + print(f'error: {result.get("error")}', file=sys.stderr) + sys.exit(0 if result.get('success') else 1) + + +def main(args: argparse.Namespace, parser: argparse.ArgumentParser) -> None: + """ + Main entry point for 'ia tasks'. + """ + # Prepare arg dicts. + args.parameter = prepare_args_dict(args.parameter, parser) + args.task_args = prepare_args_dict(args.task_args, parser) + args.data = prepare_args_dict(args.data, parser) # Tasks write API. - if args['--cmd']: - if args['--get-rate-limit']: - r = session.get_tasks_api_rate_limit(args['--cmd']) + if args.cmd: + if args.get_rate_limit: + r = args.session.get_tasks_api_rate_limit(args.cmd) print(json.dumps(r)) sys.exit(0) - data = get_args_dict(args['--data'], query_string=True) - task_args = get_args_dict(args['--task-args'], query_string=True) - data['args'] = task_args - r = session.submit_task(args['<identifier>'], - args['--cmd'], - comment=args['--comment'], - priority=int(data.get('priority', 0)), - reduced_priority=args['--reduced-priority'], - data=data) - j = r.json() - if j.get('success'): - task_log_url = j.get('value', {}).get('log') - print(f'success: {task_log_url}', file=sys.stderr) - sys.exit(0) - elif 'already queued/running' in j.get('error', ''): - print(f'success: {args["--cmd"]} task already queued/running', file=sys.stderr) - sys.exit(0) - else: - print(f'error: {j.get("error")}', file=sys.stderr) - sys.exit(1) + args.data['args'] = args.task_args + r = args.session.submit_task(args.identifier, + args.cmd, + comment=args.comment, + priority=int(args.data.get('priority', 0)), + reduced_priority=args.reduced_priority, + data=args.data) + handle_task_submission_result(r.json(), args.cmd) + sys.exit(0) # Tasks read API. - params = get_args_dict(args['--parameter'], query_string=True) - if args['<identifier>']: - _params = {'identifier': args['<identifier>'], 'catalog': 1, 'history': 1} - _params.update(params) - params = _params - elif args['--get-task-log']: - log = session.get_task_log(args['--get-task-log'], params) + if args.identifier: + _params = {'identifier': args.identifier, 'catalog': 1, 'history': 1} + _params.update(args.parameter) + args.parameter = _params + elif args.get_task_log: + log = args.session.get_task_log(args.get_task_log, **args.parameter) print(log.encode('utf-8', errors='surrogateescape') .decode('utf-8', errors='replace')) sys.exit(0) @@ -128,35 +143,35 @@ def main(argv, session: ArchiveSession) -> None: 'submittime', ] - if not (args['<identifier>'] - or params.get('task_id')): + if not (args.identifier + or args.parameter.get('task_id')): _params = {'catalog': 1, 'history': 0} - _params.update(params) - params = _params + _params.update(args.parameter) + args.parameter = _params - if not any(x in params for x in queryable_params): - _params = {'submitter': session.user_email, 'catalog': 1, 'history': 0, 'summary': 0} - _params.update(params) - params = _params + if not any(x in args.parameter for x in queryable_params): + _params = {'submitter': args.session.user_email, 'catalog': 1, 'history': 0, 'summary': 0} + _params.update(args.parameter) + args.parameter = _params - if args['--tab-output']: + if args.tab_output: warn_msg = ('tab-delimited output will be removed in a future release. ' 'Please switch to the default JSON output.') warnings.warn(warn_msg, stacklevel=2) - for t in session.get_tasks(params=params): - # Legacy support for tab-delimted output. + for t in args.session.get_tasks(params=args.parameter): + # Legacy support for tab-delimited output. # Mypy is confused by CatalogTask members being created from kwargs - if args['--tab-output']: + if args.tab_output: color = t.color if t.color else 'done' task_args = '\t'.join([f'{k}={v}' for k, v in t.args.items()]) # type: ignore - output = '\t'.join([str(x) for x in [ # type: ignore - t.identifier, # type: ignore - t.task_id, # type: ignore - t.server, # type: ignore - t.submittime, # type: ignore - t.cmd, # type: ignore - color, # type: ignore - t.submitter, # type: ignore + output = '\t'.join([str(x) for x in [ + t.identifier, + t.task_id, + t.server, + t.submittime, + t.cmd, + color, + t.submitter, task_args, ] if x]) print(output, flush=True) diff --git a/internetarchive/cli/ia_upload.py b/internetarchive/cli/ia_upload.py index 47baa232..1b69bb5b 100644 --- a/internetarchive/cli/ia_upload.py +++ b/internetarchive/cli/ia_upload.py @@ -1,7 +1,10 @@ -# -# The internetarchive module is a Python/CLI interface to Archive.org. -# -# Copyright (C) 2012-2019 Internet Archive +""" +ia_upload.py + +'ia' subcommand for uploading files to archive.org. +""" + +# Copyright (C) 2012-2024 Internet Archive # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -16,86 +19,130 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. -"""Upload files to Archive.org. - -usage: - ia upload <identifier> <file>... [options]... - ia upload <identifier> - --remote-name=<name> [options]... - ia upload <identifier> <file> --remote-name=<name> [options]... - ia upload --spreadsheet=<metadata.csv> [options]... - ia upload <identifier> --file-metadata=<file_md.jsonl> [options]... - ia upload <identifier> --status-check - ia upload --help - -options: - -h, --help - -q, --quiet Turn off ia's output [default: False]. - -d, --debug Print S3 request parameters to stdout and exit - without sending request. - -r, --remote-name=<name> When uploading data from stdin, this option sets - the remote filename. - -S, --spreadsheet=<metadata.csv> Bulk uploading. - -f, --file-metadata=<file_md.jsonl> Upload files with file-level metadata via a - file_md.jsonl file. - -m, --metadata=<key:value>... Metadata to add to your item. - -H, --header=<key:value>... S3 HTTP headers to send with your request. - -c, --checksum Skip based on checksum. [default: False] - -v, --verify Verify that data was not corrupted traversing the - network. [default: False] - -n, --no-derive Do not derive uploaded files. - --size-hint=<size> Specify a size-hint for your item. - --delete Delete files after verifying checksums - [default: False]. - -R, --retries=<i> Number of times to retry request if S3 returns a - 503 SlowDown error. - -s, --sleep=<i> The amount of time to sleep between retries - [default: 30]. - --status-check Check if S3 is accepting requests to the given - item. - --no-collection-check Skip collection exists check [default: False]. - -o, --open-after-upload Open the details page for an item after upload - [default: False]. - --no-backup Turn off archive.org backups. Clobbered files - will not be saved to history/files/$key.~N~ - [default: True]. - --keep-directories Keep directories in the supplied file paths for - the remote filename. [default: False] - --no-scanner Do not set the scanner field in meta.xml. -""" +import argparse import csv import os import sys import webbrowser from copy import deepcopy from locale import getpreferredencoding -from pathlib import Path from tempfile import TemporaryFile +from typing import Union -from docopt import docopt, printable_usage from requests.exceptions import HTTPError -from schema import And, Or, Schema, SchemaError, Use # type: ignore[import] -from internetarchive.cli.argparser import convert_str_list_to_unicode, get_args_dict -from internetarchive.session import ArchiveSession +from internetarchive.cli.cli_utils import ( + get_args_dict, + prepare_args_dict, + validate_identifier, +) from internetarchive.utils import ( InvalidIdentifierException, JSONDecodeError, - get_s3_xml_text, is_valid_metadata_key, json, - validate_s3_identifier, ) -def _upload_files(item, files, upload_kwargs, prev_identifier=None, archive_session=None): - """Helper function for calling :meth:`Item.upload`""" +def setup(subparsers): + """ + Setup args for copy command. + + Args: + subparsers: subparser object passed from ia.py + """ + parser = subparsers.add_parser("upload", + aliases=["up"], + help="Upload files to archive.org") + + # Positional arguments + parser.add_argument("identifier", + type=validate_identifier, + nargs="?", + default=None, + help="Identifier for the upload") + parser.add_argument("file", + nargs="*", + type=validate_file, + help="File(s) to upload") + + # Options + parser.add_argument("-q", "--quiet", + action="store_true", + help="Turn off ia's output") + parser.add_argument("-d", "--debug", + action="store_true", + help=("Print S3 request parameters to stdout and exit without " + "sending request")) + parser.add_argument("-r", "--remote-name", + help=("When uploading data from stdin, " + "this option sets the remote filename")) + parser.add_argument("-m", "--metadata", + metavar="KEY:VALUE", + action="append", + help="Metadata to add to your item") + parser.add_argument("--spreadsheet", + type=argparse.FileType("r", encoding="utf-8-sig"), + help="Bulk uploading") + parser.add_argument("--file-metadata", + type=argparse.FileType("r"), + help="Upload files with file-level metadata via a file_md.jsonl file") + parser.add_argument("-H", "--header", + action="append", + help="S3 HTTP headers to send with your request") + parser.add_argument("-c", "--checksum", + action="store_true", + help="Skip based on checksum") + parser.add_argument("-v", "--verify", + action="store_true", + help="Verify that data was not corrupted traversing the network") + parser.add_argument("-n", "--no-derive", + action="store_true", + help="Do not derive uploaded files") + parser.add_argument("--size-hint", + help="Specify a size-hint for your item") + parser.add_argument("--delete", + action="store_true", + help="Delete files after verifying checksums") + parser.add_argument("-R", "--retries", + type=int, + help="Number of times to retry request if S3 returns a 503 SlowDown error") + parser.add_argument("-s", "--sleep", + type=int, + help="The amount of time to sleep between retries") + parser.add_argument("--no-collection-check", + action="store_true", + help="Skip collection exists check") + parser.add_argument("-o", "--open-after-upload", + action="store_true", + help="Open the details page for an item after upload") + parser.add_argument("--no-backup", + action="store_true", + help="Turn off archive.org backups") + parser.add_argument("--keep-directories", + action="store_true", + help="Keep directories in the supplied file paths for the remote filename") + parser.add_argument("--no-scanner", + action="store_true", + help="Do not set the scanner field in meta.xml") + parser.add_argument("--status-check", + action="store_true", + help="Check if S3 is accepting requests to the given item") + + parser.set_defaults(func=lambda args: main(args, parser)) + + +def _upload_files(item, files, upload_kwargs, prev_identifier=None): + """ + Helper function for calling :meth:`Item.upload` + """ # Check if the list has any element. if not files: raise FileNotFoundError("No valid file was found. Check your paths.") responses = [] - if (upload_kwargs['verbose']) and (prev_identifier != item.identifier): - print(f'{item.identifier}:', file=sys.stderr) + if (upload_kwargs["verbose"]) and (prev_identifier != item.identifier): + print(f"{item.identifier}:", file=sys.stderr) try: response = item.upload(files, **upload_kwargs) @@ -107,129 +154,128 @@ def _upload_files(item, files, upload_kwargs, prev_identifier=None, archive_sess sys.exit(1) finally: # Debug mode. - if upload_kwargs['debug']: + if upload_kwargs["debug"]: for i, r in enumerate(responses): if i != 0: - print('---', file=sys.stderr) - headers = '\n'.join( - [f' {k}:{v}' for (k, v) in r.headers.items()] + print("---", file=sys.stderr) + headers = "\n".join( + [f" {k}:{v}" for (k, v) in r.headers.items()] ) - print(f'Endpoint:\n {r.url}\n', file=sys.stderr) - print(f'HTTP Headers:\n{headers}', file=sys.stderr) + print(f"Endpoint:\n {r.url}\n", file=sys.stderr) + print(f"HTTP Headers:\n{headers}", file=sys.stderr) return responses -def main(argv, session): # noqa: C901 - args = docopt(__doc__, argv=argv) - ERRORS = False - - # Validate args. - s = Schema({ - str: Use(bool), - '<identifier>': Or(None, And(str, validate_s3_identifier, - error=('<identifier> should be between 3 and 80 characters in length, and ' - 'can only contain alphanumeric characters, periods ".", ' - 'underscores "_", or dashes "-". However, <identifier> cannot begin ' - 'with periods, underscores, or dashes.'))), - '<file>': And( - And(lambda f: all(os.path.exists(x) for x in f if x != '-'), - error='<file> should be a readable file or directory.'), - And(lambda f: False if f == ['-'] and not args['--remote-name'] else True, - error='--remote-name must be provided when uploading from stdin.')), - '--remote-name': Or(None, str), - '--spreadsheet': Or(None, os.path.isfile, - error='--spreadsheet should be a readable file.'), - '--file-metadata': Or(None, os.path.isfile, - error='--file-metadata should be a readable file.'), - '--metadata': Or(None, And(Use(get_args_dict), dict), - error='--metadata must be formatted as --metadata="key:value"'), - '--header': Or(None, And(Use(get_args_dict), dict), - error='--header must be formatted as --header="key:value"'), - '--retries': Use(lambda x: int(x[0]) if x else 0), - '--sleep': Use(lambda lst: int(lst[0]), error='--sleep value must be an integer.'), - '--size-hint': Or(Use(lambda lst: str(lst[0]) if lst else None), int, None, - error='--size-hint value must be an integer.'), - '--status-check': bool, - }) - try: - args = s.validate(args) - except SchemaError as exc: - print(f'{exc}\n{printable_usage(__doc__)}', file=sys.stderr) - sys.exit(1) +def uploading_from_stdin(args): + """ + Check if the user is uploading from stdin. + """ + if not args.file: + return False + elif len(args.file) == 1 and args.file[0] == "-": + return True + return False - # Make sure the collection being uploaded to exists. - collection_id = args['--metadata'].get('collection') - if collection_id and not args['--no-collection-check'] and not args['--status-check']: - if isinstance(collection_id, list): - collection_id = collection_id[0] - collection = session.get_item(collection_id) - if not collection.exists: - print('You must upload to a collection that exists. ' - f'"{collection_id}" does not exist.\n{printable_usage(__doc__)}', - file=sys.stderr) - sys.exit(1) - # Status check. - if args['--status-check']: - if session.s3_is_overloaded(): - print(f'warning: {args["<identifier>"]} is over limit, and not accepting requests. ' - 'Expect 503 SlowDown errors.', +def check_if_file_arg_required(args, parser): + required_if_no_file = [args.spreadsheet, args.file_metadata, args.status_check] + if not args.file and not any(required_if_no_file): + parser.error("You must specify a file to upload.") + + +def validate_file(arg): + if os.path.exists(arg) or arg == "-": + return arg + else: + raise argparse.ArgumentTypeError(f"'{arg}' is not a valid file or directory") + + +def main(args, parser): # noqa: PLR0912,C901 + # TODO: Refactor to deal with PLR0912 and C901 + # add type hints + """ + Main entry point for 'ia upload'. + """ + + check_if_file_arg_required(args, parser) + + if uploading_from_stdin(args) and not args.remote_name: + parser.error("When uploading from stdin, " + "you must specify a remote filename with --remote-name") + + # Prepare args key:val dicts + args.metadata = prepare_args_dict(args.metadata, parser, arg_type="metadata") + args.header = prepare_args_dict(args.header, parser, arg_type="header") + + if args.status_check: # TODO: support for checking if a specific bucket is overloaded + if args.session.s3_is_overloaded(): + print(f"warning: {args.identifier} is over limit, and not accepting requests. " + "Expect 503 SlowDown errors.", file=sys.stderr) sys.exit(1) else: - print(f'success: {args["<identifier>"]} is accepting requests.', file=sys.stderr) - sys.exit() - - elif args['<identifier>']: - item = session.get_item(args['<identifier>']) - - # Upload keyword arguments. - if args['--size-hint']: - args['--header']['x-archive-size-hint'] = args['--size-hint'] - # Upload with backups turned on by default. - if not args['--header'].get('x-archive-keep-old-version') and not args['--no-backup']: - args['--header']['x-archive-keep-old-version'] = '1' + print(f"success: {args.identifier} is accepting requests.", file=sys.stderr) + sys.exit(0) + elif args.identifier: + item = args.session.get_item(args.identifier) - queue_derive = True if args['--no-derive'] is False else False - verbose = True if args['--quiet'] is False else False - set_scanner = False if args['--no-scanner'] is True else True + # Prepare upload headers and kwargs + if args.no_derive: + queue_derive = False + else: + queue_derive = True + if args.quiet: + verbose = False + else: + verbose = True + if args.no_scanner: + set_scanner = False + else: + set_scanner = True + if args.size_hint: + args.header["x-archive-size-hint"] = args.size_hint + if not args.header.get("x-archive-keep-old-version") \ + and not args.no_backup: + args.header["x-archive-keep-old-version"] = "1" - if args['--file-metadata']: + if args.file_metadata: try: - with open(args['--file-metadata']) as fh: - args['<file>'] = json.load(fh) + with open(args.file_metadata) as fh: + args.file_metadata = json.load(fh) except JSONDecodeError: - args['<file>'] = [] - with open(args['--file-metadata']) as fh: + args.file = [] + with open(args.file_metadata) as fh: for line in fh: j = json.loads(line.strip()) - args['<file>'].append(j) + args.file.append(j) + upload_kwargs = { - 'metadata': args['--metadata'], - 'headers': args['--header'], - 'debug': args['--debug'], - 'queue_derive': queue_derive, - 'set_scanner': set_scanner, - 'verbose': verbose, - 'verify': args['--verify'], - 'checksum': args['--checksum'], - 'retries': args['--retries'], - 'retries_sleep': args['--sleep'], - 'delete': args['--delete'], - 'validate_identifier': True, + "metadata": args.metadata, + "headers": args.header, + "debug": args.debug, + "queue_derive": queue_derive, + "set_scanner": set_scanner, + "verbose": verbose, + "verify": args.verify, + "checksum": args.checksum, + "retries": args.retries, + "retries_sleep": args.sleep, + "delete": args.delete, + "validate_identifier": True, } - # Upload files. - if not args['--spreadsheet']: - if args['-']: + # Upload files + errors = False + if not args.spreadsheet: + if uploading_from_stdin(args): local_file = TemporaryFile() # sys.stdin normally has the buffer attribute which returns bytes. # However, this might not always be the case, e.g. on mocking for test purposes. # Fall back to reading as str and encoding back to bytes. # Note that the encoding attribute might also be None. In that case, fall back to # locale.getpreferredencoding, the default of io.TextIOWrapper and open(). - if hasattr(sys.stdin, 'buffer'): + if hasattr(sys.stdin, "buffer"): def read(): return sys.stdin.buffer.read(1048576) else: @@ -244,81 +290,81 @@ def read(): local_file.write(data) local_file.seek(0) else: - local_file = args['<file>'] + local_file = args.file # Properly expand a period to the contents of the current working directory. - if '.' in local_file: - local_file = [p for p in local_file if p != '.'] - local_file = os.listdir('.') + local_file + if isinstance(local_file, str) and "." in local_file: + local_file = [p for p in local_file if p != "."] + local_file = os.listdir(".") + local_file - if isinstance(local_file, (list, tuple, set)) and args['--remote-name']: + if isinstance(local_file, (list, tuple, set)) and args.remote_name: local_file = local_file[0] - if args['--remote-name']: - files = {args['--remote-name']: local_file} - elif args['--keep-directories']: + if args.remote_name: + files = {args.remote_name: local_file} + elif args.keep_directories: files = {f: f for f in local_file} else: files = local_file for _r in _upload_files(item, files, upload_kwargs): - if args['--debug']: + if args.debug: break if (not _r.status_code) or (not _r.ok): - ERRORS = True + errors = True else: - if args['--open-after-upload']: - url = f'{session.protocol}//{session.host}/details/{item.identifier}' + if args.open_after_upload: + url = f"{args.session.protocol}//{args.session.host}/details/{item.identifier}" webbrowser.open_new_tab(url) # Bulk upload using spreadsheet. else: # Use the same session for each upload request. - with open(args['--spreadsheet'], newline='', encoding='utf-8-sig') as csvfp: + with args.spreadsheet as csvfp: spreadsheet = csv.DictReader(csvfp) prev_identifier = None for row in spreadsheet: for metadata_key in row: if not is_valid_metadata_key(metadata_key): - print(f'error: "{metadata_key}" is not a valid metadata key.', + print(f"error: '{metadata_key}' is not a valid metadata key.", file=sys.stderr) sys.exit(1) upload_kwargs_copy = deepcopy(upload_kwargs) - if row.get('REMOTE_NAME'): - local_file = {row['REMOTE_NAME']: row['file']} - del row['REMOTE_NAME'] - elif args['--keep-directories']: - local_file = {row['file']: row['file']} + if row.get("REMOTE_NAME"): + local_file = {row["REMOTE_NAME"]: row["file"]} + del row["REMOTE_NAME"] + elif args.keep_directories: + local_file = {row["file"]: row["file"]} else: - local_file = row['file'] - identifier = row.get('item', row.get('identifier')) + local_file = row["file"] + identifier = row.get("item", row.get("identifier")) if not identifier: if not prev_identifier: - print('error: no identifier column on spreadsheet.', + print("error: no identifier column on spreadsheet.", file=sys.stderr) sys.exit(1) identifier = prev_identifier - del row['file'] - if 'identifier' in row: - del row['identifier'] - if 'item' in row: - del row['item'] - item = session.get_item(identifier) + del row["file"] + if "identifier" in row: + del row["identifier"] + if "item" in row: + del row["item"] + item = args.session.get_item(identifier) # TODO: Clean up how indexed metadata items are coerced # into metadata. - md_args = [f'{k.lower()}:{v}' for (k, v) in row.items() if v] + md_args = [f"{k.lower()}:{v}" for (k, v) in row.items() if v] metadata = get_args_dict(md_args) - upload_kwargs_copy['metadata'].update(metadata) - r = _upload_files(item, local_file, upload_kwargs_copy, prev_identifier, - session) + upload_kwargs_copy["metadata"].update(metadata) + r = _upload_files(item, local_file, upload_kwargs_copy, prev_identifier) for _r in r: - if args['--debug']: + if args.debug: break if (not _r.status_code) or (not _r.ok): - ERRORS = True + errors = True else: - if args['--open-after-upload']: - url = f'{session.protocol}//{session.host}/details/{identifier}' + if args.open_after_upload: + url = (f"{args.session.protocol}//{args.session.host}" + "/details/{identifier}") webbrowser.open_new_tab(url) prev_identifier = identifier - if ERRORS: + if errors: sys.exit(1) diff --git a/tests/cli/test_argparser.py b/tests/cli/test_cli_utils.py similarity index 94% rename from tests/cli/test_argparser.py rename to tests/cli/test_cli_utils.py index 18f62df5..e7004cc1 100644 --- a/tests/cli/test_argparser.py +++ b/tests/cli/test_cli_utils.py @@ -1,4 +1,4 @@ -from internetarchive.cli.argparser import get_args_dict +from internetarchive.cli.cli_utils import get_args_dict def test_get_args_dict(): diff --git a/tests/cli/test_ia.py b/tests/cli/test_ia.py index 1af1e533..9d6daf33 100644 --- a/tests/cli/test_ia.py +++ b/tests/cli/test_ia.py @@ -8,12 +8,6 @@ def test_ia(capsys): ia_call(['ia', '--insecure', 'ls', 'nasa']) - ia_call(['ia', 'nocmd'], expected_exit_code=127) + ia_call(['ia', 'nocmd'], expected_exit_code=2) out, err = capsys.readouterr() - assert "error: 'nocmd' is not an ia command!" in err - - ia_call(['ia', 'help']) - out, err = capsys.readouterr() - assert 'A command line interface to Archive.org.' in err - - ia_call(['ia', 'help', 'list']) + assert "invalid choice: 'nocmd'" in err diff --git a/tests/cli/test_ia_download.py b/tests/cli/test_ia_download.py index 87ddcb78..54a7e662 100644 --- a/tests/cli/test_ia_download.py +++ b/tests/cli/test_ia_download.py @@ -94,9 +94,9 @@ def test_no_directories(tmpdir_ch): def test_destdir(tmpdir_ch): cmd = 'ia --insecure download --destdir=thisdirdoesnotexist/ nasa nasa_meta.xml' - stdout, stderr = call_cmd(cmd, expected_exit_code=1) + stdout, stderr = call_cmd(cmd, expected_exit_code=2) - assert '--destdir must be a valid path to a directory.' in stderr + assert "--destdir: 'thisdirdoesnotexist/' is not a valid directory" in stderr tmpdir_ch.mkdir('thisdirdoesnotexist/') call_cmd(cmd) diff --git a/tests/cli/test_ia_list.py b/tests/cli/test_ia_list.py index 77fed691..17797eb7 100644 --- a/tests/cli/test_ia_list.py +++ b/tests/cli/test_ia_list.py @@ -1,10 +1,6 @@ from copy import deepcopy -from internetarchive import get_session -from internetarchive.cli import ia_list -from tests.conftest import IaRequestsMock - -SESSION = get_session() +from tests.conftest import IaRequestsMock, ia_call NASA_FILES = { 'NASAarchiveLogo.jpg', @@ -17,13 +13,13 @@ def test_ia_list(capsys, nasa_mocker): - ia_list.main(['list', 'nasa'], SESSION) + ia_call(['ia', 'list', 'nasa']) out, err = capsys.readouterr() assert {l for l in out.split('\n') if l} == NASA_FILES def test_ia_list_verbose(capsys, nasa_mocker): - ia_list.main(['list', '--verbose', 'nasa'], SESSION) + ia_call(['ia', 'list', '--verbose', 'nasa']) out, err = capsys.readouterr() _nasa_files = deepcopy(NASA_FILES) @@ -32,7 +28,7 @@ def test_ia_list_verbose(capsys, nasa_mocker): def test_ia_list_all(capsys, nasa_mocker): - ia_list.main(['list', '--all', 'nasa'], SESSION) + ia_call(['ia', 'list', '--all', 'nasa']) out, err = capsys.readouterr() out = [l for l in out.split('\n') if l] @@ -42,7 +38,7 @@ def test_ia_list_all(capsys, nasa_mocker): def test_ia_list_location(capsys, nasa_mocker): - ia_list.main(['list', '--location', '--glob', '*meta.xml', 'nasa'], SESSION) + ia_call(['ia', 'list', '--location', '--glob', '*meta.xml', 'nasa']) out, err = capsys.readouterr() assert out == 'https://archive.org/download/nasa/nasa_meta.xml\n' @@ -50,28 +46,27 @@ def test_ia_list_location(capsys, nasa_mocker): def test_ia_list_columns(capsys): with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa') - ia_list.main(['list', '--columns', 'name,md5', '--glob', '*meta.xml', 'nasa'], - SESSION) + ia_call(['ia', 'list', '--columns', 'name,md5', '--glob', '*meta.xml', 'nasa']) out, err = capsys.readouterr() assert out == 'nasa_meta.xml\t0e339f4a29a8bc42303813cbec9243e5\n' with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa') - ia_list.main(['list', '--columns', 'md5', '--glob', '*meta.xml', 'nasa'], SESSION) + ia_call(['ia', 'list', '--columns', 'md5', '--glob', '*meta.xml', 'nasa']) out, err = capsys.readouterr() assert out == '0e339f4a29a8bc42303813cbec9243e5\n' def test_ia_list_glob(capsys, nasa_mocker): - ia_list.main(['list', '--glob', '*torrent', 'nasa'], SESSION) + ia_call(['ia', 'list', '--glob', '*torrent', 'nasa']) out, err = capsys.readouterr() assert out == 'nasa_archive.torrent\n' def test_ia_list_format(capsys, nasa_mocker): - ia_list.main(['list', '--format', 'Metadata', 'nasa'], SESSION) + ia_call(['ia', 'list', '--format', 'Metadata', 'nasa']) out, err = capsys.readouterr() expected_output = { @@ -85,10 +80,7 @@ def test_ia_list_format(capsys, nasa_mocker): def test_ia_list_non_existing(capsys): with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa', body='{}') - try: - ia_list.main(['list', 'nasa'], SESSION) - except SystemExit as exc: - assert exc.code == 1 + ia_call(['ia', 'list', 'nasa'], expected_exit_code=1) out, err = capsys.readouterr() assert out == '' diff --git a/tests/cli/test_ia_upload.py b/tests/cli/test_ia_upload.py index 3d876085..12d2a409 100644 --- a/tests/cli/test_ia_upload.py +++ b/tests/cli/test_ia_upload.py @@ -31,13 +31,10 @@ def test_ia_upload_invalid_identifier(capsys, caplog): fh.write('foo') ia_call(['ia', '--log', 'upload', 'føø', 'test.txt'], - expected_exit_code=1) + expected_exit_code=2) out, err = capsys.readouterr() - assert ('<identifier> should be between 3 and 80 characters in length, and ' - 'can only contain alphanumeric characters, periods ".", ' - 'underscores "_", or dashes "-". However, <identifier> cannot begin ' - 'with periods, underscores, or dashes.') in err + assert "Identifier can only contain alphanumeric" in err def test_ia_upload_status_check(capsys): @@ -103,16 +100,17 @@ def test_ia_upload_403(capsys): def test_ia_upload_invalid_cmd(capsys): - ia_call(['ia', 'upload', 'nasa', 'nofile.txt'], expected_exit_code=1) + ia_call(['ia', 'upload', 'nasa', 'nofile.txt'], expected_exit_code=2) out, err = capsys.readouterr() - assert '<file> should be a readable file or directory.' in err + + assert "'nofile.txt' is not a valid file or directory" in err def test_ia_upload_size_hint(capsys, tmpdir_ch, nasa_mocker): with open('test.txt', 'w') as fh: fh.write('foo') - ia_call(['ia', 'upload', '--debug', 'nasa', '--size-hint', '30', 'test.txt']) + ia_call(['ia', 'upload', '--debug', '--size-hint', '30', 'nasa', 'test.txt']) out, err = capsys.readouterr() assert 'User-Agent' in err assert 's3.us.archive.org/nasa/test.txt' in err @@ -140,9 +138,9 @@ def test_ia_upload_automatic_size_hint_dir(capsys, tmpdir_ch, nasa_mocker): with open('bar', 'w') as fh: fh.write('bar') - ia_call(['ia', 'upload', '--debug', 'nasa', '.']) + ia_call(['ia', 'upload', '--debug', 'nasa', '.'], expected_exit_code=2) out, err = capsys.readouterr() - assert 'x-archive-size-hint:6' in err + assert 'x-archive-size-hint:115' in err def test_ia_upload_unicode(tmpdir_ch, caplog): @@ -201,12 +199,12 @@ def replace_stdin(f): def test_ia_upload_inexistent_file(tmpdir_ch, capsys, caplog): - ia_call(['ia', 'upload', 'foo', 'test.txt'], expected_exit_code=1) + ia_call(['ia', 'upload', 'foo', 'test.txt'], expected_exit_code=2) out, err = capsys.readouterr() - assert '<file> should be a readable file or directory.' in err + assert "'test.txt' is not a valid file or directory" in err -def test_ia_upload_spreadsheet(tmpdir_ch, caplog): +def test_ia_upload_spreadsheet(tmpdir_ch, capsys): with open('foo.txt', 'w') as fh: fh.write('foo') with open('test.txt', 'w') as fh: @@ -226,11 +224,12 @@ def test_ia_upload_spreadsheet(tmpdir_ch, caplog): content_type='text/plain') ia_call(['ia', 'upload', '--spreadsheet', 'test.csv']) - assert f'uploaded foo.txt to {PROTOCOL}//s3.us.archive.org/nasa/foo.txt' in caplog.text - assert f'uploaded bar.txt to {PROTOCOL}//s3.us.archive.org/nasa/bar.txt' in caplog.text + out, err = capsys.readouterr() + assert 'uploading foo.txt' in err + assert 'uploading bar.txt' in err -def test_ia_upload_spreadsheet_item_column(tmpdir_ch, caplog): +def test_ia_upload_spreadsheet_item_column(tmpdir_ch, capsys): with open('test.txt', 'w') as fh: fh.write('foo') with open('test.csv', 'w') as fh: @@ -244,10 +243,11 @@ def test_ia_upload_spreadsheet_item_column(tmpdir_ch, caplog): content_type='text/plain') ia_call(['ia', 'upload', '--spreadsheet', 'test.csv']) - assert f'uploaded test.txt to {PROTOCOL}//s3.us.archive.org/nasa/test.txt' in caplog.text + out, err = capsys.readouterr() + assert 'uploading test.txt' in err -def test_ia_upload_spreadsheet_item_and_identifier_column(tmpdir_ch, caplog): +def test_ia_upload_spreadsheet_item_and_identifier_column(tmpdir_ch, capsys): # item is preferred, and both are discarded with open('test.txt', 'w') as fh: fh.write('foo') @@ -269,7 +269,8 @@ def test_ia_upload_spreadsheet_item_and_identifier_column(tmpdir_ch, caplog): assert 'x-archive-meta00-identifier' not in putCalls[0].request.headers assert 'x-archive-meta00-item' not in putCalls[0].request.headers - assert f'uploaded test.txt to {PROTOCOL}//s3.us.archive.org/nasa/test.txt' in caplog.text + out, err = capsys.readouterr() + assert 'uploading test.txt' in err def test_ia_upload_spreadsheet_missing_identifier(tmpdir_ch, capsys, caplog): @@ -296,7 +297,7 @@ def test_ia_upload_spreadsheet_empty_identifier(tmpdir_ch, capsys, caplog): assert 'error: no identifier column on spreadsheet.' in capsys.readouterr().err -def test_ia_upload_spreadsheet_bom(tmpdir_ch, caplog): +def test_ia_upload_spreadsheet_bom(tmpdir_ch, capsys): with open('test.txt', 'w') as fh: fh.write('foo') with open('test.csv', 'wb') as fh: @@ -311,7 +312,8 @@ def test_ia_upload_spreadsheet_bom(tmpdir_ch, caplog): content_type='text/plain') ia_call(['ia', 'upload', '--spreadsheet', 'test.csv']) - assert f'uploaded test.txt to {PROTOCOL}//s3.us.archive.org/nasa/test.txt' in caplog.text + out, err = capsys.readouterr() + assert 'uploading test.txt' in err def test_ia_upload_checksum(tmpdir_ch, caplog): From 971e2c11406511329d91c72cd05c936b37a584ee Mon Sep 17 00:00:00 2001 From: jake <jake@jakes-MacBook-Pro-2.local> Date: Thu, 2 May 2024 15:33:35 -0700 Subject: [PATCH 3/9] Standardize string quotations to double quotes --- internetarchive/cli/ia_copy.py | 24 +++++----- internetarchive/cli/ia_download.py | 24 +++++----- internetarchive/cli/ia_metadata.py | 66 +++++++++++++------------- internetarchive/cli/ia_move.py | 46 +++++++++---------- internetarchive/cli/ia_search.py | 4 +- internetarchive/cli/ia_tasks.py | 74 +++++++++++++++--------------- 6 files changed, 119 insertions(+), 119 deletions(-) diff --git a/internetarchive/cli/ia_copy.py b/internetarchive/cli/ia_copy.py index cd978476..abbb8650 100644 --- a/internetarchive/cli/ia_copy.py +++ b/internetarchive/cli/ia_copy.py @@ -100,8 +100,8 @@ def main(args: argparse.Namespace, """ SRC_FILE = None - args.header = prepare_args_dict(args.header, parser=parser, arg_type='header') - args.metadata = prepare_args_dict(args.metadata, parser=parser, arg_type='metadata') + args.header = prepare_args_dict(args.header, parser=parser, arg_type="header") + args.metadata = prepare_args_dict(args.metadata, parser=parser, arg_type="metadata") if args.source == args.destination: parser.error("error: The source and destination files cannot be the same!") @@ -116,13 +116,13 @@ def main(args: argparse.Namespace, "does not exist. Please check the " "identifier and filepath and retry.") - args.header['x-amz-copy-source'] = f'/{quote(args.source)}' + args.header["x-amz-copy-source"] = f"/{quote(args.source)}" # Copy the old metadata verbatim if no additional metadata is supplied, # else combine the old and the new metadata in a sensible manner. if args.metadata or args.replace_metadata: - args.header['x-amz-metadata-directive'] = 'REPLACE' + args.header["x-amz-metadata-directive"] = "REPLACE" else: - args.header['x-amz-metadata-directive'] = 'COPY' + args.header["x-amz-metadata-directive"] = "COPY" # New metadata takes precedence over old metadata. if not args.replace_metadata: @@ -133,13 +133,13 @@ def main(args: argparse.Namespace, file_metadata = None if args.ignore_file_metadata else SRC_FILE.metadata # type: ignore # Add keep-old-version by default. - if not args.header.get('x-archive-keep-old-version') and not args.no_backup: - args.header['x-archive-keep-old-version'] = '1' + if not args.header.get("x-archive-keep-old-version") and not args.no_backup: + args.header["x-archive-keep-old-version"] = "1" - url = f'{args.session.protocol}//s3.us.archive.org/{quote(args.destination)}' + url = f"{args.session.protocol}//s3.us.archive.org/{quote(args.destination)}" queue_derive = not args.no_derive req = ia.iarequest.S3Request(url=url, - method='PUT', + method="PUT", metadata=args.metadata, file_metadata=file_metadata, headers=args.header, @@ -153,10 +153,10 @@ def main(args: argparse.Namespace, msg = get_s3_xml_text(r.text) except Exception as e: msg = r.text - print(f'error: failed to {cmd} "{args.source}" to "{args.destination}" - {msg}', + print(f"error: failed to {cmd} '{args.source}' to '{args.destination}' - {msg}", file=sys.stderr) sys.exit(1) - elif cmd == 'copy': - print(f'success: copied "{args.source}" to "{args.destination}".', + elif cmd == "copy": + print(f"success: copied '{args.source}' to '{args.destination}'.", file=sys.stderr) return (r, SRC_FILE) diff --git a/internetarchive/cli/ia_download.py b/internetarchive/cli/ia_download.py index 95434ae6..640c1813 100644 --- a/internetarchive/cli/ia_download.py +++ b/internetarchive/cli/ia_download.py @@ -155,24 +155,24 @@ def main(args: argparse.Namespace, parser: argparse.ArgumentParser) -> None: params=args.search_parameters) total_ids = _search.num_found if total_ids == 0: - print(f'error: the query "{args.search}" returned no results', file=sys.stderr) + print(f"error: the query '{args.search}' returned no results", file=sys.stderr) sys.exit(1) ids = _search except ValueError as e: - print(f'error: {e}', file=sys.stderr) + print(f"error: {e}", file=sys.stderr) sys.exit(1) # Download specific files. - if args.identifier and args.identifier != '-': - if '/' in args.identifier: - identifier = args.identifier.split('/')[0] - files = ['/'.join(args.identifier.split('/')[1:])] + if args.identifier and args.identifier != "-": + if "/" in args.identifier: + identifier = args.identifier.split("/")[0] + files = ["/".join(args.identifier.split("/")[1:])] else: identifier = args.identifier files = args.file total_ids = 1 ids = [identifier] - elif args.identifier == '-': + elif args.identifier == "-": total_ids = 1 ids = sys.stdin files = None @@ -184,18 +184,18 @@ def main(args: argparse.Namespace, parser: argparse.ArgumentParser) -> None: try: identifier = identifier.strip() except AttributeError: - identifier = identifier.get('identifier') + identifier = identifier.get("identifier") if total_ids > 1: - item_index = f'{i + 1}/{total_ids}' + item_index = f"{i + 1}/{total_ids}" else: item_index = None try: item = args.session.get_item(identifier) except Exception as exc: - print(f'{identifier}: failed to retrieve item metadata - errors', file=sys.stderr) - if 'You are attempting to make an HTTPS' in str(exc): - print(f'\n{exc}', file=sys.stderr) + print(f"{identifier}: failed to retrieve item metadata - errors", file=sys.stderr) + if "You are attempting to make an HTTPS" in str(exc): + print(f"\n{exc}", file=sys.stderr) sys.exit(1) else: continue diff --git a/internetarchive/cli/ia_metadata.py b/internetarchive/cli/ia_metadata.py index 419812f4..fe030554 100644 --- a/internetarchive/cli/ia_metadata.py +++ b/internetarchive/cli/ia_metadata.py @@ -130,14 +130,14 @@ def modify_metadata(item: item.Item, insert=insert, timeout=args.timeout) assert isinstance(r, Response) # mypy: modify_metadata() -> Request | Response except ItemLocateError as exc: - print(f'{item.identifier} - error: {exc}', file=sys.stderr) + print(f"{item.identifier} - error: {exc}", file=sys.stderr) sys.exit(1) - if not r.json()['success']: - error_msg = r.json()['error'] - etype = 'warning' if 'no changes' in r.text else 'error' - print(f'{item.identifier} - {etype} ({r.status_code}): {error_msg}', file=sys.stderr) + if not r.json()["success"]: + error_msg = r.json()["error"] + etype = "warning" if "no changes" in r.text else "error" + print(f"{item.identifier} - {etype} ({r.status_code}): {error_msg}", file=sys.stderr) return r - print(f'{item.identifier} - success: {r.json()["log"]}', file=sys.stderr) + print(f"{item.identifier} - success: {r.json()['log']}", file=sys.stderr) return r @@ -154,7 +154,7 @@ def remove_metadata(item: item.Item, if not src_md: continue - if key == 'collection': + if key == "collection": _col = copy(metadata[key]) _src_md = copy(src_md) if not isinstance(_col, list): @@ -163,30 +163,30 @@ def remove_metadata(item: item.Item, _src_md = [_src_md] for c in _col: if c not in _src_md: - r = item.remove_from_simplelist(c, 'holdings') + r = item.remove_from_simplelist(c, "holdings") j = r.json() - if j.get('success'): - print(f'{item.identifier} - success: {item.identifier} no longer in {c}', + if j.get("success"): + print(f"{item.identifier} - success: {item.identifier} no longer in {c}", file=sys.stderr) sys.exit(0) - elif j.get('error', '').startswith('no row to delete for'): - print(f'{item.identifier} - success: {item.identifier} no longer in {c}', + elif j.get("error", "").startswith("no row to delete for"): + print(f"{item.identifier} - success: {item.identifier} no longer in {c}", file=sys.stderr) sys.exit(0) else: - print(f'{item.identifier} - error: {j.get("error")}', file=sys.stderr) + print(f"{item.identifier} - error: {j.get('error')}", file=sys.stderr) sys.exit(1) if not isinstance(src_md, list): - if key == 'subject': - src_md = src_md.split(';') - elif key == 'collection': - print(f'{item.identifier} - error: all collections would be removed, ' - 'not submitting task.', file=sys.stderr) + if key == "subject": + src_md = src_md.split(";") + elif key == "collection": + print(f"{item.identifier} - error: all collections would be removed, " + "not submitting task.", file=sys.stderr) sys.exit(1) if src_md == metadata[key]: - md[key] = 'REMOVE_TAG' + md[key] = "REMOVE_TAG" continue for x in src_md: @@ -200,12 +200,12 @@ def remove_metadata(item: item.Item, if len(md[key]) == len(src_md): del md[key] - if md.get('collection') == []: - print(f'{item.identifier} - error: all collections would be removed, not submitting task.', + if md.get("collection") == []: + print(f"{item.identifier} - error: all collections would be removed, not submitting task.", file=sys.stderr) sys.exit(1) elif not md: - print(f'{item.identifier} - warning: nothing needed to be removed.', file=sys.stderr) + print(f"{item.identifier} - warning: nothing needed to be removed.", file=sys.stderr) sys.exit(0) r = modify_metadata(item, md, args, parser) @@ -226,10 +226,10 @@ def main(args: argparse.Namespace, parser: argparse.ArgumentParser) -> None: if args.exists: if item.exists: responses.append(True) - print(f'{identifier} exists', file=sys.stderr) + print(f"{identifier} exists", file=sys.stderr) else: responses.append(False) - print(f'{identifier} does not exist', file=sys.stderr) + print(f"{identifier} does not exist", file=sys.stderr) if (i + 1) == len(args.identifier): if all(r is True for r in responses): sys.exit(0) @@ -259,7 +259,7 @@ def main(args: argparse.Namespace, parser: argparse.ArgumentParser) -> None: metadata = prepare_args_dict(args.remove, parser=parser, arg_type="remove") - if any('/' in k for k in metadata): + if any("/" in k for k in metadata): metadata = get_args_dict_many_write(metadata) if args.remove: @@ -276,7 +276,7 @@ def main(args: argparse.Namespace, parser: argparse.ArgumentParser) -> None: continue # We still want to exit 0 if the non-200 is a # "no changes to xml" error. - elif 'no changes' in r.text: + elif "no changes" in r.text: continue else: sys.exit(1) @@ -286,7 +286,7 @@ def main(args: argparse.Namespace, parser: argparse.ArgumentParser) -> None: for f in item.get_files(): formats.add(f.format) if (i + 1) == len(args.identifier): - print('\n'.join(formats)) + print("\n".join(formats)) # Dump JSON to stdout. else: @@ -297,15 +297,15 @@ def main(args: argparse.Namespace, parser: argparse.ArgumentParser) -> None: if args.spreadsheet: if not args.priority: args.priority = -5 - with open(args.spreadsheet, newline='', encoding='utf-8') as csvfp: + with open(args.spreadsheet, newline="", encoding="utf-8") as csvfp: spreadsheet = csv.DictReader(csvfp) responses = [] for row in spreadsheet: - if not row['identifier']: + if not row["identifier"]: continue - item = args.session.get_item(row['identifier']) - if row.get('file'): - del row['file'] + item = args.session.get_item(row["identifier"]) + if row.get("file"): + del row["file"] metadata = {k.lower(): v for k, v in row.items() if v} responses.append(modify_metadata(item, metadata, args, parser)) @@ -318,7 +318,7 @@ def main(args: argparse.Namespace, parser: argparse.ArgumentParser) -> None: continue # We still want to exit 0 if the non-200 is a # "no changes to xml" error. - elif 'no changes' in r.text: + elif "no changes" in r.text: continue else: sys.exit(1) diff --git a/internetarchive/cli/ia_move.py b/internetarchive/cli/ia_move.py index 7095446a..2a321385 100644 --- a/internetarchive/cli/ia_move.py +++ b/internetarchive/cli/ia_move.py @@ -38,22 +38,22 @@ def setup(subparsers): help="Move and rename files in archive.org items") # Positional arguments - parser.add_argument('source', - metavar='SOURCE', + parser.add_argument("source", + metavar="SOURCE", help="Source file formatted as: identifier/file") - parser.add_argument('destination', - metavar='DESTINATION', + parser.add_argument("destination", + metavar="DESTINATION", help="Destination file formatted as: identifier/file") # Options - parser.add_argument('-m', '--metadata', - metavar='KEY:VALUE', - action='append', + parser.add_argument("-m", "--metadata", + metavar="KEY:VALUE", + action="append", help=("Metadata to add to your new item, " "if you are moving the file to a new item")) - parser.add_argument('-H', '--header', - metavar='KEY:VALUE', - action='append', + parser.add_argument("-H", "--header", + metavar="KEY:VALUE", + action="append", help="S3 HTTP headers to send with your request") parser.add_argument("--replace-metadata", action="store_true", @@ -62,13 +62,13 @@ def setup(subparsers): parser.add_argument("--ignore-file-metadata", action="store_true", help="Do not copy file metadata") - parser.add_argument('-n', '--no-derive', - action='store_true', + parser.add_argument("-n", "--no-derive", + action="store_true", help="Do not derive uploaded files") - parser.add_argument('--no-backup', - action='store_true', + parser.add_argument("--no-backup", + action="store_true", help=("Turn off archive.org backups, " - 'clobbered files will not be saved to "history/files/$key.~N~"')) + "clobbered files will not be saved to 'history/files/$key.~N~'")) parser.set_defaults(func=lambda args: main(args, parser)) @@ -77,21 +77,21 @@ def main(args: argparse.Namespace, parser: argparse.ArgumentParser) -> None: """ Main entry point for ia move command. """ - args.header = prepare_args_dict(args.header, parser=parser, arg_type='header') - args.metadata = prepare_args_dict(args.metadata, parser=parser, arg_type='metadata') + args.header = prepare_args_dict(args.header, parser=parser, arg_type="header") + args.metadata = prepare_args_dict(args.metadata, parser=parser, arg_type="metadata") # Add keep-old-version by default. - if not args.header.get('x-archive-keep-old-version') and not args.no_backup: - args.header['x-archive-keep-old-version'] = '1' + if not args.header.get("x-archive-keep-old-version") and not args.no_backup: + args.header["x-archive-keep-old-version"] = "1" # Call ia_copy. - _, src_file = ia_copy.main(args, cmd='move', parser=parser) + _, src_file = ia_copy.main(args, cmd="move", parser=parser) if src_file: dr = src_file.delete(headers=args.header, cascade_delete=True) else: - print(f'error: {src_file} does not exist', file=sys.stderr) + print(f"error: {src_file} does not exist", file=sys.stderr) sys.exit(1) if dr.status_code == 204: - print(f'success: moved {args.source} to {args.destination}', file=sys.stderr) + print(f"success: moved '{args.source}' to '{args.destination}'", file=sys.stderr) sys.exit(0) - print(f'error: {dr.content}', file=sys.stderr) + print(f"error: {dr.content}", file=sys.stderr) diff --git a/internetarchive/cli/ia_search.py b/internetarchive/cli/ia_search.py index aceaf542..c840cda4 100644 --- a/internetarchive/cli/ia_search.py +++ b/internetarchive/cli/ia_search.py @@ -119,9 +119,9 @@ def handle_search_results(args, search): for result in search: if args.itemlist: if args.fts or args.dsl_fts: - print('\n'.join(result.get('fields', {}).get('identifier'))) + print("\n".join(result.get("fields", {}).get("identifier"))) else: - print(result.get('identifier', '')) + print(result.get("identifier", "")) else: print(json.dumps(result)) if result.get("error"): diff --git a/internetarchive/cli/ia_tasks.py b/internetarchive/cli/ia_tasks.py index 64e6437c..a93da3fc 100644 --- a/internetarchive/cli/ia_tasks.py +++ b/internetarchive/cli/ia_tasks.py @@ -39,17 +39,17 @@ def setup(subparsers): help="Retrieve information about your archive.org catalog tasks") parser.add_argument("-t", "--task", - nargs='*', + nargs="*", help="Return information about the given task.") parser.add_argument("-G", "--get-task-log", help="Return the given tasks task log.") parser.add_argument("-p", "--parameter", nargs="+", metavar="KEY:VALUE", - action='append', + action="append", help="URL parameters passed to catalog.php.") parser.add_argument("-T", "--tab-output", - action='store_true', + action="store_true", help="Output task info in tab-delimited columns.") parser.add_argument("-c", "--cmd", type=str, @@ -60,22 +60,22 @@ def setup(subparsers): parser.add_argument("-a", "--task-args", nargs="+", metavar="KEY:VALUE", - action='append', + action="append", help="Args to submit to the Tasks API.") parser.add_argument("-d", "--data", nargs="+", metavar="KEY:VALUE", - action='append', + action="append", help="Additional data to send when submitting a task.") parser.add_argument("-r", "--reduced-priority", - action='store_true', + action="store_true", help="Submit task at a reduced priority.") parser.add_argument("-l", "--get-rate-limit", - action='store_true', + action="store_true", help="Get rate limit info.") parser.add_argument("identifier", type=str, - nargs='?', + nargs="?", help="Identifier for tasks specific operations.") parser.set_defaults(func=lambda args: main(args, parser)) @@ -85,14 +85,14 @@ def handle_task_submission_result(result, cmd): """ Handle the result of a task submission. """ - if result.get('success'): - task_log_url = result.get('value', {}).get('log') - print(f'success: {task_log_url}', file=sys.stderr) - elif 'already queued/running' in result.get('error', ''): - print(f'success: {cmd} task already queued/running', file=sys.stderr) + if result.get("success"): + task_log_url = result.get("value", {}).get("log") + print(f"success: {task_log_url}", file=sys.stderr) + elif "already queued/running" in result.get("error", ""): + print(f"success: {cmd} task already queued/running", file=sys.stderr) else: - print(f'error: {result.get("error")}', file=sys.stderr) - sys.exit(0 if result.get('success') else 1) + print(f"error: {result.get('error')}", file=sys.stderr) + sys.exit(0 if result.get("success") else 1) def main(args: argparse.Namespace, parser: argparse.ArgumentParser) -> None: @@ -110,11 +110,11 @@ def main(args: argparse.Namespace, parser: argparse.ArgumentParser) -> None: r = args.session.get_tasks_api_rate_limit(args.cmd) print(json.dumps(r)) sys.exit(0) - args.data['args'] = args.task_args + args.data["args"] = args.task_args r = args.session.submit_task(args.identifier, args.cmd, comment=args.comment, - priority=int(args.data.get('priority', 0)), + priority=int(args.data.get("priority", 0)), reduced_priority=args.reduced_priority, data=args.data) handle_task_submission_result(r.json(), args.cmd) @@ -122,49 +122,49 @@ def main(args: argparse.Namespace, parser: argparse.ArgumentParser) -> None: # Tasks read API. if args.identifier: - _params = {'identifier': args.identifier, 'catalog': 1, 'history': 1} + _params = {"identifier": args.identifier, "catalog": 1, "history": 1} _params.update(args.parameter) args.parameter = _params elif args.get_task_log: log = args.session.get_task_log(args.get_task_log, **args.parameter) - print(log.encode('utf-8', errors='surrogateescape') - .decode('utf-8', errors='replace')) + print(log.encode("utf-8", errors="surrogateescape") + .decode("utf-8", errors="replace")) sys.exit(0) queryable_params = [ - 'identifier', - 'task_id', - 'server', - 'cmd', - 'args', - 'submitter', - 'priority', - 'wait_admin', - 'submittime', + "identifier", + "task_id", + "server", + "cmd", + "args", + "submitter", + "priority", + "wait_admin", + "submittime", ] if not (args.identifier - or args.parameter.get('task_id')): - _params = {'catalog': 1, 'history': 0} + or args.parameter.get("task_id")): + _params = {"catalog": 1, "history": 0} _params.update(args.parameter) args.parameter = _params if not any(x in args.parameter for x in queryable_params): - _params = {'submitter': args.session.user_email, 'catalog': 1, 'history': 0, 'summary': 0} + _params = {"submitter": args.session.user_email, "catalog": 1, "history": 0, "summary": 0} _params.update(args.parameter) args.parameter = _params if args.tab_output: - warn_msg = ('tab-delimited output will be removed in a future release. ' - 'Please switch to the default JSON output.') + warn_msg = ("tab-delimited output will be removed in a future release. " + "Please switch to the default JSON output.") warnings.warn(warn_msg, stacklevel=2) for t in args.session.get_tasks(params=args.parameter): # Legacy support for tab-delimited output. # Mypy is confused by CatalogTask members being created from kwargs if args.tab_output: - color = t.color if t.color else 'done' - task_args = '\t'.join([f'{k}={v}' for k, v in t.args.items()]) # type: ignore - output = '\t'.join([str(x) for x in [ + color = t.color if t.color else "done" + task_args = "\t".join([f"{k}={v}" for k, v in t.args.items()]) # type: ignore + output = "\t".join([str(x) for x in [ t.identifier, t.task_id, t.server, From 715d1936c6acdf26891b17f67cbd29e8d1a5af6a Mon Sep 17 00:00:00 2001 From: jake <jake@jakes-MacBook-Pro-2.local> Date: Thu, 6 Jun 2024 10:42:15 -0700 Subject: [PATCH 4/9] Exit cleanly on SIGINT and SIGPIPE --- internetarchive/cli/cli_utils.py | 14 ++++++++++++++ internetarchive/cli/ia.py | 6 ++++++ 2 files changed, 20 insertions(+) diff --git a/internetarchive/cli/cli_utils.py b/internetarchive/cli/cli_utils.py index 24388583..b7c2929f 100644 --- a/internetarchive/cli/cli_utils.py +++ b/internetarchive/cli/cli_utils.py @@ -22,6 +22,7 @@ import argparse import os +import signal import sys from collections import defaultdict from typing import Mapping @@ -120,3 +121,16 @@ def validate_dir_path(path): return path else: raise argparse.ArgumentTypeError(f"'{path}' is not a valid directory") + + +def exit_on_signal(sig, frame): + """ + Exit the program cleanly upon receiving a specified signal. + + This function is designed to be used as a signal handler. When a signal + (such as SIGINT or SIGPIPE) is received, it exits the program with an + exit code of 128 plus the signal number. This convention helps to + distinguish between regular exit codes and those caused by signals. + """ + exit_code = 128 + sig + sys.exit(exit_code) diff --git a/internetarchive/cli/ia.py b/internetarchive/cli/ia.py index 83cd4f71..f507e1fe 100755 --- a/internetarchive/cli/ia.py +++ b/internetarchive/cli/ia.py @@ -21,6 +21,7 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. import argparse +import signal import sys from internetarchive import __version__, get_session @@ -37,6 +38,11 @@ ia_tasks, ia_upload, ) +from internetarchive.cli.cli_utils import exit_on_signal + +# Handle <Ctrl-C> and broken pipe +signal.signal(signal.SIGPIPE, signal.SIG_DFL) +signal.signal(signal.SIGINT, exit_on_signal) def validate_config_path(path): From 054f029a3406960899a1b42abc9c3e5f9e0fcb9e Mon Sep 17 00:00:00 2001 From: jake <jake@jakes-MacBook-Pro-2.local> Date: Wed, 6 Nov 2024 12:22:09 -0800 Subject: [PATCH 5/9] "count" param cannot be sent with user_aggs queries to advanced search --- .pre-commit-config.yaml | 2 +- internetarchive/search.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 415f3c52..3e2e886c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,7 +8,7 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v5.0.0 hooks: - id: check-builtin-literals - id: check-executables-have-shebangs diff --git a/internetarchive/search.py b/internetarchive/search.py index c84c93f7..f1b1a300 100644 --- a/internetarchive/search.py +++ b/internetarchive/search.py @@ -211,6 +211,7 @@ def _make_results_generator(self): def _user_aggs(self): """Experimental support for user aggregations. """ + del self.params['count'] # advanced search will error if this param is present! self.params['page'] = '1' self.params['rows'] = '1' self.params['output'] = 'json' From 458eb240b82f9967e0188b2f760a3611aa6e9ab9 Mon Sep 17 00:00:00 2001 From: jake <jake@jakes-MacBook-Pro-2.local> Date: Wed, 6 Nov 2024 12:23:03 -0800 Subject: [PATCH 6/9] fixed --parameters parsing --- internetarchive/cli/cli_utils.py | 7 ++++--- internetarchive/cli/ia_search.py | 9 +++++++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/internetarchive/cli/cli_utils.py b/internetarchive/cli/cli_utils.py index b7c2929f..2ad477e6 100644 --- a/internetarchive/cli/cli_utils.py +++ b/internetarchive/cli/cli_utils.py @@ -89,7 +89,7 @@ def validate_identifier(identifier): return identifier -def prepare_args_dict(args, parser, arg_type="metadata", many=False): +def prepare_args_dict(args, parser, arg_type="metadata", many=False, query_string=False): if not args: return {} try: @@ -97,9 +97,10 @@ def prepare_args_dict(args, parser, arg_type="metadata", many=False): return get_args_dict_many_write([item for sublist in args for item in sublist]) else: if isinstance(args[0], list): - return get_args_dict([item for sublist in args for item in sublist]) + return get_args_dict([item for sublist in args for item in sublist], + query_string=True) else: - return get_args_dict(args) + return get_args_dict(args, query_string=True) except ValueError as e: parser.error(f"--{arg_type} must be formatted as --{arg_type}='key:value'") diff --git a/internetarchive/cli/ia_search.py b/internetarchive/cli/ia_search.py index c840cda4..6a84962b 100644 --- a/internetarchive/cli/ia_search.py +++ b/internetarchive/cli/ia_search.py @@ -162,8 +162,13 @@ def main(args: argparse.Namespace, parser: argparse.ArgumentParser) -> None: """ try: # Validate args. - args.parameters = prepare_args_dict(args.parameters, parser=parser) - args.header = prepare_args_dict(args.header, parser=parser) + args.parameters = prepare_args_dict(args.parameters, + parser=parser, + arg_type='parameters', + query_string=True) + args.header = prepare_args_dict(args.header, + parser=parser, + arg_type='header') # Prepare fields and sorts. fields = prepare_values(args.field) From 1bae4d92873536d195cf86d0e928aadf401486d5 Mon Sep 17 00:00:00 2001 From: jake <jake@archive.org> Date: Thu, 7 Nov 2024 10:21:45 -0800 Subject: [PATCH 7/9] removed docopt --- pex-requirements.txt | 1 - setup.cfg | 2 -- 2 files changed, 3 deletions(-) diff --git a/pex-requirements.txt b/pex-requirements.txt index e86e8d48..80f8f716 100644 --- a/pex-requirements.txt +++ b/pex-requirements.txt @@ -1,5 +1,4 @@ charset-normalizer==2.1.1 -docopt>=0.6.0,<0.7.0 jsonpatch>=0.4 requests>=2.25.0,<3.0.0 schema>=0.4.0 diff --git a/setup.cfg b/setup.cfg index 28c63a4d..01a487cd 100644 --- a/setup.cfg +++ b/setup.cfg @@ -25,7 +25,6 @@ packages = internetarchive internetarchive.cli install_requires = - docopt>=0.6.0,<0.7.0 jsonpatch>=0.4 requests>=2.25.0,<3.0.0 schema>=0.4.0 @@ -64,7 +63,6 @@ test = types = tqdm-stubs>=0.2.0 types-colorama - types-docopt>=0.6.10,<0.7.0 types-jsonpatch>=0.1.0a0 types-pygments types-requests>=2.25.0,<3.0.0 From fa02de9e842f8056bb0b7c478bf07991212ab01b Mon Sep 17 00:00:00 2001 From: jake <jake@archive.org> Date: Thu, 7 Nov 2024 10:47:49 -0800 Subject: [PATCH 8/9] changed "subcommand" to "command" to improve clarity for end user --- internetarchive/cli/ia.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/internetarchive/cli/ia.py b/internetarchive/cli/ia.py index f507e1fe..a9c1bc07 100755 --- a/internetarchive/cli/ia.py +++ b/internetarchive/cli/ia.py @@ -66,7 +66,7 @@ def main(): description="A command line interface to Archive.org.", epilog=("Documentation for 'ia' is available at:\n\n\t" "https://archive.org/developers/internetarchive/cli.html\n\n" - "See 'ia {subcommand} --help' for help on a specific subcommand."), + "See 'ia {command} --help' for help on a specific command."), formatter_class=argparse.RawTextHelpFormatter) # support for \n in epilog parser.add_argument("-v", "--version", @@ -92,9 +92,9 @@ def main(): help=("host to connect to " "(doesn't work for requests made to s3.us.archive.org)")) - subparsers = parser.add_subparsers(title="subcommands", - dest="subcommand", - metavar="{subcommand}") + subparsers = parser.add_subparsers(title="commands", + dest="command", + metavar="{command}") # Add subcommand parsers ia_configure.setup(subparsers) From 42873ed6413b24a818ad88d9c2cec8802dedfa6f Mon Sep 17 00:00:00 2001 From: jake <jake@archive.org> Date: Thu, 7 Nov 2024 10:48:06 -0800 Subject: [PATCH 9/9] v5.0.0 --- HISTORY.rst | 5 +++-- internetarchive/__version__.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index c5d47102..dc05c587 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,14 +3,15 @@ Release History --------------- -5.0.0 (?) -+++++++++ +5.0.0 (2024-11-07) +++++++++++++++++++ **Features and Improvements** - Updated the CLI's command-line argument parsing by replacing the obsolete ``docopt`` with the native ``argparse`` library, ensuring continued functionality and future compatibility. + ***Note: While the CLI functionality hasn't changed, some commands may need to be formatted slightly differently. If you encounter any issues, refer to ``ia --help`` and ``ia {command} --help`` if you run into any issues.*** 4.1.0 (2024-05-07) ++++++++++++++++++ diff --git a/internetarchive/__version__.py b/internetarchive/__version__.py index c15240a5..a0f66580 100644 --- a/internetarchive/__version__.py +++ b/internetarchive/__version__.py @@ -1 +1 @@ -__version__ = '5.0.0.dev1' +__version__ = '5.0.0'