Skip to content

Commit

Permalink
Use toarray() instead of .A (#1214)
Browse files Browse the repository at this point in the history
* Use `toarray()` instead of `.A`

* Convert to sparse array

* Fix test
  • Loading branch information
michalk8 authored Aug 5, 2024
1 parent 43c32bb commit b93a78a
Show file tree
Hide file tree
Showing 10 changed files with 61 additions and 58 deletions.
4 changes: 2 additions & 2 deletions src/cellrank/estimators/mixins/decomposition/_schur.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,9 +163,9 @@ def compute_schur(
verbose = method == "brandts"

tmat = self.transition_matrix
if method == "brandts" and sp.issparse(self.transition_matrix):
if method == "brandts" and sp.issparse(tmat):
logg.warning("For `method='brandts'`, dense matrix is required. Densifying")
tmat = tmat.A
tmat = tmat.toarray()

self._gpcca = GPCCA(tmat, eta=initial_distribution, z=which, method=method)
start = logg.info("Computing Schur decomposition")
Expand Down
2 changes: 1 addition & 1 deletion src/cellrank/kernels/_cytotrace_kernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ def _compute_score(
# fmt: off
imputed_exp = self.adata[:, top_genes].X if layer == "X" else self.adata[:, top_genes].layers[layer]
if sp.issparse(imputed_exp) and aggregation not in (CytoTRACEAggregation.MEAN, CytoTRACEAggregation.MEDIAN):
imputed_exp = imputed_exp.A
imputed_exp = imputed_exp.toarray()

if aggregation == CytoTRACEAggregation.MEAN:
cytotrace_score = np.asarray(imputed_exp.mean(axis=1)).reshape((-1,))
Expand Down
2 changes: 1 addition & 1 deletion src/cellrank/kernels/utils/_projection.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def project(
if np.any(np.isnan(dX)):
T_emb[row_id, :] = np.nan
else:
probs = row[:, conn_idxs].A.squeeze() if sp.issparse(row) else row[conn_idxs]
probs = row[:, conn_idxs].toarray().squeeze() if sp.issparse(row) else row[conn_idxs]
dX /= np.linalg.norm(dX, axis=1)[:, None]
dX = np.nan_to_num(dX)
T_emb[row_id, :] = probs.dot(dX) - dX.sum(0) / dX.shape[0]
Expand Down
2 changes: 1 addition & 1 deletion src/cellrank/kernels/utils/_random_walk.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ def _should_stop(self, ix: int) -> bool:
def _sample(self, ix: int, *, rs: np.random.RandomState) -> int:
return rs.choice(
self._ixs,
p=self._tmat[ix].A.squeeze() if self._is_sparse else self._tmat[ix],
p=self._tmat[ix].toarray().squeeze() if self._is_sparse else self._tmat[ix],
)

def _max_iter(self, max_iter: Union[int, float]) -> int:
Expand Down
4 changes: 2 additions & 2 deletions src/cellrank/kernels/utils/_tmat_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def compute_flow(
def default_helper(t1: Numeric_t, t2: Numeric_t) -> pd.DataFrame:
subset, row_cls, col_cls = self._get_time_subset(t1, t2)

df = pd.DataFrame(subset.A if sp.issparse(subset) else subset)
df = pd.DataFrame(subset.toarray() if sp.issparse(subset) else subset)
df = df.groupby(row_cls).sum().T.groupby(col_cls).sum().T

res = pd.DataFrame(np.zeros((n, n)), index=categories, columns=categories)
Expand All @@ -178,7 +178,7 @@ def default_helper(t1: Numeric_t, t2: Numeric_t) -> pd.DataFrame:
def cluster_helper(t1: Numeric_t, t2: Numeric_t) -> pd.DataFrame:
subset, row_cls, col_cls = self._get_time_subset(t1, t2, cluster=cluster)

df = pd.DataFrame(subset.A if sp.issparse(subset) else subset).sum(0)
df = pd.DataFrame(subset.toarray() if sp.issparse(subset) else subset).sum(0)
df = df.groupby(col_cls).sum()
df = pd.DataFrame([df], index=[cluster], columns=df.index)

Expand Down
6 changes: 3 additions & 3 deletions src/cellrank/models/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ def _(

ref = x[ref_ix]
if sp.issparse(ref):
ref = ref.A.squeeze(0) # (genes,)
ref = ref.toarray().squeeze(0) # (genes,)

return parallelize(
_calc_factor_weighted,
Expand Down Expand Up @@ -283,7 +283,7 @@ def _(x: Union[np.ndarray, sp.spmatrix], **_) -> np.ndarray:
mask = np.array((x > 0).sum(0)).squeeze() == x.shape[0]
tmp = x[:, mask]
if sp.issparse(tmp):
tmp = tmp.A
tmp = tmp.toarray()

gm = np.array(np.exp(np.mean(np.log(tmp), axis=0))).squeeze()
gm_mask = (gm > 0) & np.isfinite(gm)
Expand Down Expand Up @@ -323,7 +323,7 @@ def _calc_factor_weighted(

obs = obs_[ix]
if sp.issparse(obs):
obs = obs.A.squeeze(0) # (genes,)
obs = obs.toarray().squeeze(0) # (genes,)
obs_lib_size = obs_lib_size_[ix]

res[i] = _calc_factor_weighted_helper(
Expand Down
4 changes: 2 additions & 2 deletions tests/_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def create_kernels(
ck._transition_matrix[-1, -1] = 1
ck._transition_matrix = sp.csr_matrix(ck._transition_matrix)

np.testing.assert_allclose(np.sum(ck._transition_matrix.A, axis=1), 1) # sanity check
np.testing.assert_allclose(np.sum(ck._transition_matrix.toarray(), axis=1), 1) # sanity check

return vk, ck

Expand Down Expand Up @@ -286,7 +286,7 @@ def check_arrays(x, y):
assert actual.adata is actual.kernel.adata
assert actual.kernel.backward == expected.kernel.backward

np.testing.assert_array_equal(actual.transition_matrix.A, expected.transition_matrix.A)
np.testing.assert_array_equal(actual.transition_matrix.toarray(), expected.transition_matrix.toarray())

k1 = sorted(expected.__dict__.keys())
k2 = sorted(actual.__dict__.keys())
Expand Down
53 changes: 28 additions & 25 deletions tests/test_kernels.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,9 +250,9 @@ def test_kernel_kernel_multiplication(self, adata: AnnData):
ck = ConnectivityKernel(adata).compute_transition_matrix()

actual = (vk * ck).transition_matrix
expected = _normalize(vk.transition_matrix.A * ck.transition_matrix.A)
expected = _normalize(vk.transition_matrix.toarray() * ck.transition_matrix.toarray())

np.testing.assert_allclose(actual.A, expected)
np.testing.assert_allclose(actual.toarray(), expected)

def test_constant(self, adata: AnnData):
k = 9 * VelocityKernel(adata) + 1 * ConnectivityKernel(adata)
Expand Down Expand Up @@ -399,7 +399,7 @@ def test_kernel_reads_correct_connectivities(self, adata: AnnData, key_added: Op
k = clazz(**kwargs)

if isinstance(k, ConnectivityMixin):
np.testing.assert_array_equal(k.connectivities.A, conn.A)
np.testing.assert_array_equal(k.connectivities.toarray(), conn.toarray())
else:
assert not hasattr(k, "_conn")

Expand Down Expand Up @@ -458,13 +458,13 @@ def test_pseudotime_frac_to_keep(self, adata: AnnData, k: int):
)
T_2 = pk.transition_matrix

np.testing.assert_allclose(T_1.A, T_2.A, rtol=_rtol)
np.testing.assert_allclose(T_1.toarray(), T_2.toarray(), rtol=_rtol)

def test_pseudotime_parallelize(self, adata: AnnData):
pk1 = PseudotimeKernel(adata, time_key="latent_time").compute_transition_matrix(n_jobs=None)
pk2 = PseudotimeKernel(adata, time_key="latent_time").compute_transition_matrix(n_jobs=2)

np.testing.assert_allclose(pk1.transition_matrix.A, pk2.transition_matrix.A, rtol=_rtol)
np.testing.assert_allclose(pk1.transition_matrix.toarray(), pk2.transition_matrix.toarray(), rtol=_rtol)

def test_pseudotime_inverse(self, adata: AnnData):
pk = PseudotimeKernel(adata, time_key="latent_time")
Expand Down Expand Up @@ -521,7 +521,7 @@ def test_manual_combination(self, adata: AnnData, model: str):
comb_kernel = 0.8 * vk + 0.2 * ck
T_comb_kernel = comb_kernel.transition_matrix

np.testing.assert_allclose(T_comb_kernel.A, T_comb_manual.A, rtol=_rtol)
np.testing.assert_allclose(T_comb_kernel.toarray(), T_comb_manual.toarray(), rtol=_rtol)

def test_manual_combination_no_precomputed(self, adata: AnnData):
density_normalize = False
Expand All @@ -539,7 +539,7 @@ def test_manual_combination_no_precomputed(self, adata: AnnData):
comb_kernel.compute_transition_matrix()
T_comb_kernel = comb_kernel.transition_matrix

np.testing.assert_allclose(T_comb_manual.A, T_comb_kernel.A, rtol=_rtol)
np.testing.assert_allclose(T_comb_manual.toarray(), T_comb_kernel.toarray(), rtol=_rtol)

@pytest.mark.parametrize("density_normalize", [False, True])
def test_manual_combination_backward(self, adata: AnnData, density_normalize):
Expand All @@ -555,7 +555,7 @@ def test_manual_combination_backward(self, adata: AnnData, density_normalize):
comb_kernel = 0.8 * vk + 0.2 * ck
T_comb_kernel = comb_kernel.transition_matrix

np.testing.assert_allclose(T_comb_manual.A, T_comb_kernel.A, rtol=_rtol)
np.testing.assert_allclose(T_comb_manual.toarray(), T_comb_kernel.toarray(), rtol=_rtol)

@pytest.mark.parametrize("density_normalize", [False, True])
def test_dnorm_scanpy(self, adata: AnnData, density_normalize: bool):
Expand All @@ -568,7 +568,7 @@ def test_dnorm_scanpy(self, adata: AnnData, density_normalize: bool):
T_sc = neigh.transitions

assert T_sc.shape == T_cr.shape
np.testing.assert_allclose(T_cr.A, T_cr.A)
np.testing.assert_allclose(T_cr.toarray(), T_cr.toarray())

def test_connectivities_key_kernel(self, adata: AnnData):
key = "foobar"
Expand All @@ -580,7 +580,7 @@ def test_connectivities_key_kernel(self, adata: AnnData):

assert key == ck.params["key"]
assert T_cr is not adata.obsp[key]
np.testing.assert_array_equal(T_cr.A, adata.obsp[key])
np.testing.assert_array_equal(T_cr.toarray(), adata.obsp[key])

@pytest.mark.parametrize("cluster_pair", [("Granule immature", "Granule mature"), ("nIPC", "Neuroblast")])
@pytest.mark.parametrize("graph_key", ["distances", "connectivities"])
Expand Down Expand Up @@ -654,7 +654,7 @@ def test_simple_addition(self, adata: AnnData):
expected = np.eye(adata.n_obs) * 0.75 + np.eye(adata.n_obs, k=1) * 0.25
expected[-1, -1] = 1

np.testing.assert_allclose(k.transition_matrix.A, expected)
np.testing.assert_allclose(k.transition_matrix.toarray(), expected)

def test_addtion_with_constant(self, adata: AnnData):
vk, ck = create_kernels(adata) # diagonal + upper diag
Expand All @@ -667,14 +667,16 @@ def test_addtion_with_constant(self, adata: AnnData):
)
expected[-1, -1] = 1

np.testing.assert_allclose(k.transition_matrix.A, expected)
np.testing.assert_allclose(k.transition_matrix.toarray(), expected)

def test_addition_3_kernels(self, adata: AnnData):
vk, ck = create_kernels(adata) # diagonal + upper diag
vk1 = VelocityKernel(adata)
vk1._transition_matrix = np.eye(adata.n_obs, k=-1) / 2 + np.eye(adata.n_obs) / 2
vk1._transition_matrix[0, 0] = 1
np.testing.assert_allclose(np.sum(ck._transition_matrix, axis=1), 1) # sanity check
# convert to a sparse array, otherwise the resulting kernel will be `matrix`
# which doesn't have `.toarray()`
vk1._transition_matrix = sp.csr_matrix(vk1._transition_matrix)

k = (vk + ck + vk1).compute_transition_matrix()
expected = (
Expand All @@ -685,7 +687,8 @@ def test_addition_3_kernels(self, adata: AnnData):
expected[0, 0] = expected[-1, -1] = 2 / 3 + 1 / 3 * 0.5
expected[0, 1] = expected[-1, -2] = 1 - expected[0, 0]

np.testing.assert_allclose(k.transition_matrix.A, expected)
np.testing.assert_allclose(np.sum(ck._transition_matrix, axis=1), 1) # sanity check
np.testing.assert_allclose(k.transition_matrix.toarray(), expected)


class TestKernelCopy:
Expand All @@ -706,7 +709,7 @@ def test_copy_transition_matrix(self, adata: AnnData):
vk1 = VelocityKernel(adata).compute_transition_matrix(softmax_scale=4)
vk2 = vk1.copy()

np.testing.assert_array_equal(vk1.transition_matrix.A, vk2.transition_matrix.A)
np.testing.assert_array_equal(vk1.transition_matrix.toarray(), vk2.transition_matrix.toarray())

def test_copy_params(self, adata: AnnData):
vk1 = VelocityKernel(adata).compute_transition_matrix(softmax_scale=4)
Expand All @@ -719,7 +722,7 @@ def test_copy_velocity_kernel(self, adata: AnnData):
vk1 = VelocityKernel(adata).compute_transition_matrix(softmax_scale=4)
vk2 = vk1.copy()

np.testing.assert_array_equal(vk1.transition_matrix.A, vk2.transition_matrix.A)
np.testing.assert_array_equal(vk1.transition_matrix.toarray(), vk2.transition_matrix.toarray())

assert vk1.params == vk2.params
assert vk1.backward == vk2.backward
Expand All @@ -728,15 +731,15 @@ def test_copy_connectivity_kernel(self, adata: AnnData):
ck1 = ConnectivityKernel(adata).compute_transition_matrix()
ck2 = ck1.copy()

np.testing.assert_array_equal(ck1.transition_matrix.A, ck2.transition_matrix.A)
np.testing.assert_array_equal(ck1.transition_matrix.toarray(), ck2.transition_matrix.toarray())
assert ck1.params == ck2.params
assert ck1.backward == ck2.backward

def test_copy_palantir_kernel(self, adata: AnnData):
pk1 = PseudotimeKernel(adata, time_key="dpt_pseudotime").compute_transition_matrix()
pk2 = pk1.copy()

np.testing.assert_array_equal(pk1.transition_matrix.A, pk2.transition_matrix.A)
np.testing.assert_array_equal(pk1.transition_matrix.toarray(), pk2.transition_matrix.toarray())
assert pk1.params == pk2.params
assert pk1.backward == pk2.backward

Expand Down Expand Up @@ -895,9 +898,9 @@ def test_implementations_differ(self, adata: AnnData, backward: bool):
np.testing.assert_allclose(vk_cor.transition_matrix.sum(1), 1.0)
np.testing.assert_allclose(vk_cor.transition_matrix.sum(1), 1.0)

assert not np.allclose(vk_dot.transition_matrix.A, vk_cos.transition_matrix.A)
assert not np.allclose(vk_cos.transition_matrix.A, vk_cor.transition_matrix.A)
assert not np.allclose(vk_cor.transition_matrix.A, vk_dot.transition_matrix.A)
assert not np.allclose(vk_dot.transition_matrix.toarray(), vk_cos.transition_matrix.toarray())
assert not np.allclose(vk_cos.transition_matrix.toarray(), vk_cor.transition_matrix.toarray())
assert not np.allclose(vk_cor.transition_matrix.toarray(), vk_dot.transition_matrix.toarray())

@pytest.mark.parametrize(
("key", "fn"),
Expand All @@ -917,7 +920,7 @@ def test_function_and_string_key(self, adata: AnnData, key: str, fn: Callable):
vk_k.compute_transition_matrix(model="deterministic", softmax_scale=4, similarity=key)
vk_fn.compute_transition_matrix(model="deterministic", softmax_scale=4, similarity=fn)

np.testing.assert_allclose(vk_k.transition_matrix.A, vk_fn.transition_matrix.A)
np.testing.assert_allclose(vk_k.transition_matrix.toarray(), vk_fn.transition_matrix.toarray())

@pytest.mark.parametrize("backward", [True, False])
def test_custom_function(self, adata: AnnData, backward: bool):
Expand Down Expand Up @@ -1207,7 +1210,7 @@ def test_explicit_shuffle(self, adata_large: AnnData):

for src, tgt in zip(cats[:-1], cats[1:]):
src_mask, tgt_mask = tmk.time == src, tmk.time == tgt
np.testing.assert_allclose(tmat[src_mask, :][:, tgt_mask].A, expected[src, tgt])
np.testing.assert_allclose(tmat[src_mask, :][:, tgt_mask].toarray(), expected[src, tgt])

@pytest.mark.parametrize(
("problem", "sparse_mode", "policy"),
Expand Down Expand Up @@ -1502,7 +1505,7 @@ def test_read_write(self, kernel: Kernel, tmpdir, write_adata: bool, copy: bool)
else:
assert k.adata is kernel.adata

np.testing.assert_array_equal(k.transition_matrix.A, kernel.transition_matrix.A)
np.testing.assert_array_equal(k.transition_matrix.toarray(), kernel.transition_matrix.toarray())

@pytest.mark.parametrize(
"clazz",
Expand Down Expand Up @@ -1534,6 +1537,6 @@ def test_from_adata(self, adata: AnnData, clazz: Type[Kernel]):
assert k1.backward == k2.backward
assert k1.params == k2.params
if sp.issparse(k1.transition_matrix):
np.testing.assert_almost_equal(k1.transition_matrix.A, k2.transition_matrix.A)
np.testing.assert_almost_equal(k1.transition_matrix.toarray(), k2.transition_matrix.toarray())
else:
np.testing.assert_almost_equal(k1.transition_matrix, k2.transition_matrix)
Loading

0 comments on commit b93a78a

Please sign in to comment.