diff --git a/paper.bib b/paper.bib
index 9999375..5cbd977 100644
--- a/paper.bib
+++ b/paper.bib
@@ -174,7 +174,7 @@ @article{Bregman
 doi = {10.1016/0041-5553(67)90040-7},
 url = {https://www.sciencedirect.com/science/article/pii/0041555367900407},
 author = {L.M. Bregman},
-abstract = {IN this paper we consider an iterative method of finding the common point of convex sets. This method can be regarded as a generalization of the methods discussed in [1–4]. Apart from problems which can be reduced to finding some point of the intersection of convex sets, the method considered can be applied to the approximate solution of problems in linear and convex programming.}
+abstract = {In this paper we consider an iterative method of finding the common point of convex sets. This method can be regarded as a generalization of the methods discussed in [1–4]. Apart from problems which can be reduced to finding some point of the intersection of convex sets, the method considered can be applied to the approximate solution of problems in linear and convex programming.}
 }
 
 @misc{logexp,
diff --git a/paper.md b/paper.md
index 1ee4cc1..20ef68e 100644
--- a/paper.md
+++ b/paper.md
@@ -138,7 +138,7 @@ $$
 
 ### Regularization
 
-To ensure the optimum converges, we introduce a regularization term
+Following @EPCA, we introduce a regularization term to ensure the optimum converges
 
 $$\begin{aligned}
 & \underset{\Theta}{\text{minimize}}
@@ -147,8 +147,9 @@ $$\begin{aligned}
 & & \mathrm{rank}\left(\Theta\right) = k
 \end{aligned}$$
 
-where $\epsilon > 0$ and $\mu_0 \in \mathrm{range}(g)$.
+where $\epsilon > 0$ and $\mu_0 \in \mathrm{range}(g)$.[^2] 
 
+[^2]: In practice, we allow $\epsilon \geq 0$, because special cases of EPCA like traditional PCA are well-known to converge without regularization.
 
 ### Example: Poisson EPCA
 
@@ -158,7 +159,7 @@ This is useful in applications like belief compression in reinforcement learning
 
 ![Left - KL Divergence for Poisson EPCA versus PCA. Right - Reconstructions from the models.](./scripts/combo.png)
 
-For a larger environment with $200$ states, PCA struggles even with $10$ basis.
+For a larger environment with $200$ states, PCA struggles even with $10$ basis components.
 
 # API