Skip to content

Commit

Permalink
dvc: rename cache -> odb (#5481)
Browse files Browse the repository at this point in the history
  • Loading branch information
efiop authored Feb 16, 2021
1 parent a814a49 commit e0842ce
Show file tree
Hide file tree
Showing 53 changed files with 308 additions and 331 deletions.
5 changes: 0 additions & 5 deletions dvc/cache/gdrive.py

This file was deleted.

8 changes: 4 additions & 4 deletions dvc/data_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def push(
remote = self.get_remote(remote, "push")

return remote.push(
self.repo.cache.local,
self.repo.odb.local,
cache,
jobs=jobs,
show_checksums=show_checksums,
Expand All @@ -86,7 +86,7 @@ def pull(
remote = self.get_remote(remote, "pull")

return remote.pull(
self.repo.cache.local,
self.repo.odb.local,
cache,
jobs=jobs,
show_checksums=show_checksums,
Expand Down Expand Up @@ -133,7 +133,7 @@ def status(
"""
remote = self.get_remote(remote, "status")
return remote.status(
self.repo.cache.local,
self.repo.odb.local,
cache,
jobs=jobs,
show_checksums=show_checksums,
Expand All @@ -142,4 +142,4 @@ def status(

def get_url_for(self, remote, checksum):
remote = self.get_remote(remote)
return str(remote.cache.hash_to_path_info(checksum))
return str(remote.odb.hash_to_path_info(checksum))
14 changes: 5 additions & 9 deletions dvc/dependency/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,22 +78,18 @@ def download(self, to, jobs=None):
from dvc.objects import save
from dvc.objects.stage import stage

cache = self.repo.cache.local
odb = self.repo.odb.local

with self._make_repo(cache_dir=cache.cache_dir) as repo:
with self._make_repo(cache_dir=odb.cache_dir) as repo:
if self.def_repo.get(self.PARAM_REV_LOCK) is None:
self.def_repo[self.PARAM_REV_LOCK] = repo.get_rev()
path_info = PathInfo(repo.root_dir) / self.def_path
obj = stage(
cache,
path_info,
repo.repo_fs,
jobs=jobs,
follow_subrepos=False,
odb, path_info, repo.repo_fs, jobs=jobs, follow_subrepos=False,
)
save(cache, obj, jobs=jobs)
save(odb, obj, jobs=jobs)

checkout(to.path_info, to.fs, obj, cache)
checkout(to.path_info, to.fs, obj, odb)

def update(self, rev=None):
if rev:
Expand Down
2 changes: 1 addition & 1 deletion dvc/external_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def _get_remote_config(url):
name = "auto-generated-upstream"
return {
"core": {"remote": name},
"remote": {name: {"url": repo.cache.local.cache_dir}},
"remote": {name: {"url": repo.odb.local.cache_dir}},
}

# Use original remote to make sure that we are using correct url,
Expand Down
4 changes: 2 additions & 2 deletions dvc/fs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,8 @@ def _check_requires(self):
)

@property
def cache(self):
return getattr(self.repo.cache, self.scheme)
def odb(self):
return getattr(self.repo.odb, self.scheme)

def open(self, path_info, mode: str = "r", encoding: str = None, **kwargs):
if hasattr(self, "_generate_download_url"):
Expand Down
6 changes: 3 additions & 3 deletions dvc/fs/dvc.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,14 +78,14 @@ def open( # type: ignore
checksum = self._get_granular_hash(path, out).value
else:
checksum = out.hash_info.value
remote_info = remote_obj.cache.hash_to_path_info(checksum)
remote_info = remote_obj.odb.hash_to_path_info(checksum)
return remote_obj.fs.open(
remote_info, mode=mode, encoding=encoding
)

if out.is_dir_checksum:
checksum = self._get_granular_hash(path, out).value
cache_path = out.cache.hash_to_path_info(checksum).url
cache_path = out.odb.hash_to_path_info(checksum).url
else:
cache_path = out.cache_path
return open(cache_path, mode=mode, encoding=encoding)
Expand Down Expand Up @@ -137,7 +137,7 @@ def _fetch_dir(self, out, **kwargs):

from dvc.objects import Tree

hash_info = Tree.save_dir_info(out.cache, dir_cache)
hash_info = Tree.save_dir_info(out.odb, dir_cache)
if hash_info != out.hash_info:
raise FileNotFoundError

Expand Down
8 changes: 4 additions & 4 deletions dvc/info.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,16 @@ def get_dvc_info():
# can't auto-create it, as it might cause issues if the user
# later decides to enable shared cache mode with
# `dvc config cache.shared group`.
if os.path.exists(repo.cache.local.cache_dir):
if os.path.exists(repo.odb.local.cache_dir):
info.append(
"Cache types: {}".format(_get_linktype_support_info(repo))
)
fs_type = get_fs_type(repo.cache.local.cache_dir)
fs_type = get_fs_type(repo.odb.local.cache_dir)
info.append(f"Cache directory: {fs_type}")
else:
info.append("Cache types: " + error_link("no-dvc-cache"))

info.append(f"Caches: {_get_caches(repo.cache)}")
info.append(f"Caches: {_get_caches(repo.odb)}")

info.append(f"Remotes: {_get_remotes(repo.config)}")

Expand Down Expand Up @@ -91,7 +91,7 @@ def _get_linktype_support_info(repo):
}

fname = "." + str(uuid.uuid4())
src = os.path.join(repo.cache.local.cache_dir, fname)
src = os.path.join(repo.odb.local.cache_dir, fname)
open(src, "w").close()
dst = os.path.join(repo.root_dir, fname)

Expand Down
55 changes: 24 additions & 31 deletions dvc/cache/__init__.py → dvc/objects/db/__init__.py
Original file line number Diff line number Diff line change
@@ -1,44 +1,37 @@
"""Manages cache of a DVC repo."""
from collections import defaultdict

from ..scheme import Schemes
from dvc.scheme import Schemes


def get_cloud_cache(fs):
from .base import CloudCache
from .gdrive import GDriveCache
from .local import LocalCache
from .ssh import SSHCache
def get_odb(fs):
from .base import ObjectDB
from .gdrive import GDriveObjectDB
from .local import LocalObjectDB
from .ssh import SSHObjectDB

if fs.scheme == Schemes.LOCAL:
return LocalCache(fs)
return LocalObjectDB(fs)

if fs.scheme == Schemes.SSH:
return SSHCache(fs)
return SSHObjectDB(fs)

if fs.scheme == Schemes.GDRIVE:
return GDriveCache(fs)
return GDriveObjectDB(fs)

return CloudCache(fs)
return ObjectDB(fs)


def _get_cache(repo, settings):
from ..fs import get_cloud_fs
def _get_odb(repo, settings):
from dvc.fs import get_cloud_fs

if not settings:
return None

fs = get_cloud_fs(repo, **settings)
return get_cloud_cache(fs)
return get_odb(fs)


class Cache:
"""Class that manages cache locations of a DVC repo.
Args:
repo (dvc.repo.Repo): repo instance that this cache belongs to.
"""

class ODBManager:
CACHE_DIR = "cache"
CLOUD_SCHEMES = [
Schemes.S3,
Expand All @@ -51,7 +44,7 @@ class Cache:
def __init__(self, repo):
self.repo = repo
self.config = config = repo.config["cache"]
self._cache = {}
self._odb = {}

local = config.get("local")

Expand All @@ -60,33 +53,33 @@ def __init__(self, repo):
elif "dir" not in config:
settings = None
else:
from ..config_schema import LOCAL_COMMON
from dvc.config_schema import LOCAL_COMMON

settings = {"url": config["dir"]}
for opt in LOCAL_COMMON.keys():
if opt in config:
settings[str(opt)] = config.get(opt)

self._cache[Schemes.LOCAL] = _get_cache(repo, settings)
self._odb[Schemes.LOCAL] = _get_odb(repo, settings)

def _initalize_cloud_cache(self, schemes):
def _init_odb(self, schemes):
for scheme in schemes:
remote = self.config.get(scheme)
settings = {"name": remote} if remote else None
self._cache[scheme] = _get_cache(self.repo, settings)
self._odb[scheme] = _get_odb(self.repo, settings)

def __getattr__(self, name):
if name not in self._cache and name in self.CLOUD_SCHEMES:
self._initalize_cloud_cache([name])
if name not in self._odb and name in self.CLOUD_SCHEMES:
self._init_odb([name])

try:
return self._cache[name]
return self._odb[name]
except KeyError as exc:
raise AttributeError from exc

def by_scheme(self):
self._initalize_cloud_cache(self.CLOUD_SCHEMES)
yield from self._cache.items()
self._init_odb(self.CLOUD_SCHEMES)
yield from self._odb.items()


class NamedCacheItem:
Expand Down
4 changes: 2 additions & 2 deletions dvc/cache/base.py → dvc/objects/db/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@

from dvc.progress import Tqdm

from ..objects import HashFile, ObjectFormatError
from .. import HashFile, ObjectFormatError

logger = logging.getLogger(__name__)


class CloudCache:
class ObjectDB:

DEFAULT_VERIFY = False
DEFAULT_CACHE_TYPES = ["copy"]
Expand Down
5 changes: 5 additions & 0 deletions dvc/objects/db/gdrive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from .base import ObjectDB


class GDriveObjectDB(ObjectDB):
DEFAULT_VERIFY = True
8 changes: 4 additions & 4 deletions dvc/cache/local.py → dvc/objects/db/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@
from dvc.objects import ObjectFormatError
from dvc.path_info import PathInfo
from dvc.progress import Tqdm
from dvc.utils import relpath
from dvc.utils.fs import copyfile, remove, umask, walk_files

from ..utils import relpath
from ..utils.fs import copyfile, remove, umask, walk_files
from .base import CloudCache
from .base import ObjectDB

logger = logging.getLogger(__name__)


class LocalCache(CloudCache):
class LocalObjectDB(ObjectDB):
DEFAULT_CACHE_TYPES = ["reflink", "copy"]
CACHE_MODE = 0o444
UNPACKED_DIR_SUFFIX = ".unpacked"
Expand Down
4 changes: 2 additions & 2 deletions dvc/cache/ssh.py → dvc/objects/db/ssh.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
from dvc.progress import Tqdm
from dvc.utils import to_chunks

from .base import CloudCache
from .base import ObjectDB

logger = logging.getLogger(__name__)


class SSHCache(CloudCache):
class SSHObjectDB(ObjectDB):
def batch_exists(self, path_infos, callback):
def _exists(chunk_and_channel):
chunk, channel = chunk_and_channel
Expand Down
8 changes: 3 additions & 5 deletions dvc/objects/stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def get_dir_hash(path_info, fs, name, **kwargs):
from . import Tree

dir_info = _collect_dir(path_info, fs, name, **kwargs)
hash_info = Tree.save_dir_info(fs.repo.cache.local, dir_info)
hash_info = Tree.save_dir_info(fs.repo.odb.local, dir_info)
hash_info.size = dir_info.size
hash_info.dir_info = dir_info
return hash_info
Expand All @@ -118,9 +118,7 @@ def get_hash(path_info, fs, name, **kwargs):
if (
hash_info
and hash_info.isdir
and not fs.cache.fs.exists(
fs.cache.hash_to_path_info(hash_info.value)
)
and not fs.odb.fs.exists(fs.odb.hash_to_path_info(hash_info.value))
):
hash_info = None

Expand All @@ -129,7 +127,7 @@ def get_hash(path_info, fs, name, **kwargs):
from . import Tree

# NOTE: loading the fs will restore hash_info.dir_info
Tree.load(fs.cache, hash_info)
Tree.load(fs.odb, hash_info)
assert hash_info.name == name
return hash_info

Expand Down
Loading

0 comments on commit e0842ce

Please sign in to comment.