diff --git a/examples/many_strands_no_common_domains.py b/examples/many_strands_no_common_domains.py index ff5d4fd0..9f56a21f 100644 --- a/examples/many_strands_no_common_domains.py +++ b/examples/many_strands_no_common_domains.py @@ -8,11 +8,6 @@ import nuad.constraints as nc # type: ignore import nuad.vienna_nupack as nv # type: ignore import nuad.search as ns # type: ignore -from nuad.constraints import NumpyFilter - - -def f(x: int | float) -> float: - return x / 2 # command-line arguments @@ -51,13 +46,13 @@ def main() -> None: random_seed = 1 # many 4-domain strands with no common domains, 4 domains each, every domain length = 10 - # just for testing parallel processing # num_strands = 3 + # num_strands = 5 + # num_strands = 10 # num_strands = 10 - num_strands = 26 # num_strands = 50 - # num_strands = 100 + num_strands = 100 # num_strands = 355 design = nc.Design() @@ -77,7 +72,7 @@ def main() -> None: parallel = False # parallel = True - numpy_filters: List[NumpyFilter] = [ + numpy_filters: List[nc.NumpyFilter] = [ nc.NearestNeighborEnergyFilter(-9.3, -9.0, 52.0), # nc.BaseCountFilter(base='G', high_count=1), # nc.BaseEndFilter(bases=('C', 'G')), @@ -157,11 +152,11 @@ def main() -> None: params = ns.SearchParameters(constraints=[ # domain_nupack_ss_constraint, # strand_individual_ss_constraint, - # strand_pairs_rna_duplex_constraint, + strand_pairs_rna_duplex_constraint, # strand_pairs_rna_plex_constraint, # strand_pair_nupack_constraint, # domain_pair_nupack_constraint, - domain_pairs_rna_plex_constraint, + # domain_pairs_rna_plex_constraint, # domain_pairs_rna_duplex_constraint, # strand_base_pair_prob_constraint, # nc.domains_not_substrings_of_each_other_constraint(), diff --git a/notebooks/result-allocate-time-trials.ipynb b/notebooks/result-allocate-time-trials.ipynb index cdefe013..e3013d6a 100644 --- a/notebooks/result-allocate-time-trials.ipynb +++ b/notebooks/result-allocate-time-trials.ipynb @@ -87,6 +87,75 @@ "%timeit collect_results_into_noparse_nonormalize(energies, threshold, results)" ] }, + { + "cell_type": "code", + "execution_count": 6, + "id": "fa645805-54fd-4e64-9940-ae0a4da5ebc8", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAEWCAYAAACKSkfIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAfWklEQVR4nO3de5wcZZ3v8c+XhJsg9xFDbgMY2AWEgBHwKAqCmCAL4rIQkLsSEFHx6Ip4zerhrCCsRw9uMEAWWOUqIjmccAkosKIBAkYSkGi4JkMuQ7gpCAj89o/nGSkmPTM9M53uJM/3/Xr1a7qfeqrqqerqb1c9VV2jiMDMzMqwVqsbYGZmzePQNzMriEPfzKwgDn0zs4I49M3MCuLQNzMriEN/gCQdJ+lXK2naf5a0zcqYdqM0cvkl3Sbpk42Ylq06JE2W9ONVfZo9zKddUkgaurLn1WxrROg3a0NolojYMCIeqbd+Ds2X8pfFU5J+JmlYZfjkvAEfVikbmsvau02rq+4eDVkYW6VJWkfSTyU9lt/3vfuov5mkayW9IOlxSUc2p6W9tukISZe1uh1d6tkhknSxpFfyZ7brMSQP21PSTElPS+qUdHX185zr7C5phqRnc727JR1fT/vWiNDvi5I1Ylm7NowaTo2IDYF3ABsC53Qb/jTwL72MjyQBx+S6xzSguWu8Ru8J5lBdu5HTrMOvgKOAJXXU/SHwCrAl8HFgiqQdV2Lb6vERYEaL2zAQZ+cdvK7Ha7l8U2Aq0A6MBv4E/EfXSJLeA/wCuJ30ed8c+BQwoZ6ZrlZBKOl0SR2S/iRpvqR9JY0HvgIcnr8tf5fr3ibpTEl3Ai8C20g6XtLv8/iPSDqpMu29JS2S9AVJyyQtrn5zStpc0nRJz0u6G9i2l3Z2HRpOkvRkntYXK8N3l/Sb/C29WNJ5ktapDA9J78jPL5Y0JX+rvwDs09s6iohngZ8DY7sNupH0YT2ql9H3AoYBnwUmdmtTj8tf61C42mWT93zuzMv5nKSHJO3bUyMknZDfp2ck3SRpdC4/XdJdXfOR9ClJD0har4fpHChpTl7Pv5a0c2XYY5K+KOn+3KYrq9OpY9zTJd0PvKB01HSM0p7vcklfz3X2k/R2SS9K2rwy/m5Ke3C1wv1DwCJJ50raqad11CgR8UpE/J+I+BXwWm91JW0A/CPw9Yj4cx5nOnB0X/ORtLakyyVdo3R0MUTSVyQ9nD+P90oamet+X9LCvK3dK2mvXqa7Fmmd3VjZDo/P4z8j6WRJ787v87OSzquOK+lr+X1bJulSSRvXs94kfbnS9gclHZLL/x44H3iPUh49W8/0qiLihoi4OiKej4gXgfOA91aqfBe4JCLOioinIrk3Ig6rPcUVZ7BaPIDtgYXAVvl1O7Btfj4Z+HG3+rcBTwA7AkOBtUl7BNsCAj5A+jLYLdffG3gV+Faue0AevmkefgVwFbABsBPQAfyqh7a2AwFcnuu/E+gE9svD3wXsmdvVDvweOK0yfgDvyM8vBp7Lb/pawHo15ncb8Mn8fHPgFuC6yvDJwI+Bg4BH8vINzfNpr9S7KC/j2sBy4B8rw3pc/sryDu2hTcfldfv5PO3D8zJtVqPuwcAC4O9zG78G/DoPWwu4Iy/PGOAZYNce3oNdgWXAHsAQ4FjgMWDdPPwx4G5gK2Cz/B6c3I9x5wAjgfWBHYA/A+8D1iEdZf218n7PAD5Vadv3gP/by7a+E+mD/SRwD3AKeTus1DkSeLaXx6gBfMYWAXv3MnxX4MVuZV8E/l8P9SeTtrv1gf9P2paH5GH/DMwlfa4F7AJsnocdRdqOhwJfIB2BrFfrs076HP2m23Z4PrAesD/wEmkn6G3A8Py+fiDXP4G0rW1DOjr+GfCfPW3T3Zbtn/K2sxZpe34BGFbZ3mtmQ2X8i0lH1E8D91L5rNWoexowKz9/C+nLeZ8BZ+lAR2z2g3QYswzYD1i71sbVrew24Ft9TPPnwOfy872Bv/Dm4FqWN6ohpA/x31WG/e+e3tjKBlOtfzZwUS9v6rWV191D/9I+luM20hfUc3ncOVQ+9NX1A9xFOhR8U+jnjel54KP59Y/IXxx9LX+tDwgrhv6TgCrD7waOrlH3BuATlXpr5WUbXZnX06SQPqOXdTIF+Ha3svm88YF/DDiq2/tzfj/GPaEy7BvA5ZXXbyEdVXWF/uHAnZV1uQTYvY5tfghpR+UqUpBfAWy0Ej9jfYX+XsCSbmUnArf1UH8y6UjgduAH3d7/+cDBdbbrGWCX7ttyfv1t0pFHdTscXhm+HDi88voa8g4WcCtwSmXY9qTtfCh9hH6NNs7pWh7qC/3deOOL7QBSF857a9TbOW/ve+XXw+mWLf19rDbdOxGxgBSOk4Flkq6QtFUfoy2svpA0QdIspRMfz5JW9haVKssj4tXK6xdJewBtpDenOr3H62h29/pb5XZsJ+l6SUskPU8K0C1qTaDWcvTgsxGxMWkj2RQY0UO9rwFfJe0JVR1C2hvv6hv9CTBBUhsDX/6qjshbbWX8Wu/faOD7+VD8WdIGL9LGTkQ8BvyS9KH8YS/zGw18oWs6eVoju82z2ofd9V7XO251XWxVfR3pkHx5Zfh1wA6StiZ1RTwXEXf30vau6bxG2hv+HWk97EQ6UhoQSaNUOXE4gEn8GdioW9lGpMDqyZ6kbfI73d7/kcDDPbTzi7l777m87jem58/HAazYn7+08vwvNV53vc9b8ebt+HHSdr5lj0vzRhuPqXT/PUt6b3r7DL9JRNwXEcsj4tWImEH6vH2s2zzeQdoJ+lxE/FcufgZ4ndQNOyCrTegDRMRlEfE+0ocygLO6BvU0StcTSeuSvuXPAbaMiE1IG4vqmHUnKRBHVspG1TFe9/pP5udTgIeAMRGxEemcRG/t6Gn5VqwYMRf4X8APJa0wzYiYSTqkPaXboGNJH4YnJC0BriYFzJH0vfwv5L9vqZS9vdv0h3drT3V9VC0EToqITSqP9SPi1wCSPgK8h7SX9t0a41enc2a36bwlIi7vZZz+jFt9TxZT+ZKVtD5pLy5VjHiJtLd+FKn/+z97m7mkDZXOg/wCuI/0hXd4ROwUEctznY/rzVd+dH+ssH1GxBNROXFYx3ro7g/AUEljKmW7AA/0Ms7NwL8Ct0qqhulCapwXy/33XwIOI3VpbUI6gl1hW5b0dlL43de/xfibJ0lZ0mUUaTtfWrv63+Y7GrgAOJXUJbUJMK/Sxro/rxVRGb9rHreQjjj/tr3kHYrfkM6tDMhqE/qStpf0wRzeL5G+sV/Pg5cC7er9Cp11gHXJASZpAqnPr095j+tnwGRJb5G0Aykk+/L1XH9H4Hjgylz+VlJXyp8l/R2pu6WRLiHtrRzUw/Cvkj5YAEgaDuwLHEg6ATyW9GE+Czimr+WPiE5SH/9R+QTdCaz4gX4b8Nl8Qu+fSH32ta64OB84I68zJG2c6yNpC+BC4JN5/v8g6YAelvEC4GRJeyjZQNJHJL21h/qDGfenuS3/Q+nk92RWDKlLSYf9B9FL6CtdmPAkqUvoR6SuilMi4p5qvYj4Sbz5yo/ujyfqWM6uea6rN05iryNpvR52GF4gbQffyuvkvaRzML1+iUXE2cBlpODv2hu+EPi2pDF5He+sdLL7raTg7SR9wXyDFY8uukwAbux2BNEflwOfl7S1pA1JR9xXdjvar2UDUkh3Aihd8FE96b4UGKHKhRDdSTo0f7mvJWl/0g7B9DxsOOnqnPMi4vwao38JOE7SP+d1hqRdJF1RxzKvPqFPCuzvAE+RDsvfBpyRh12d/y6XVPNbPyL+RLoq5SrSIdKR5JVcp1NJe8JLSP3s/1HHOLeT9qpvBc6JiJtz+Rfz/P9ECpgra48+MBHxCvB94Os9DL+T1Kfe5WhgTkTcHBFLuh6kftidla4i6Wv5TySdnFtOOnn+627D7yKdfH0KOBM4tGuvtVvbriV92VyRu77m8calaFNJ5xlm5HE/AVyoypUxlenMzm06j/R+LyCFbp/6O25EPAB8htTnvpjUDbIMeLlS507STsp9EdFb19h8Un/thIi4MiJe7qVuo8wn7UQNB27Kz0cDKF1hc0Ol7imkE7PLSKH5qbz8vYqIb5POod0iaTPg30ifxZtJO0AX5eneRLrS7A+k7paX6Ll7c7CXak4jfWHdATya5/WZvkaKiAeBc0l73EtJF2rcWanyC9LRzxJJT/Uwmc+RdpSeJR2xnhgRt+VhnySdXJ5cqzsuH/V+MD8ekfQ06bNR17rQwL8krSdKP3h6lHTCua+9hjWepONIJ2rf1+q2NEPea3yW1H33aKX8F8BlEXFhq9q2plC6bHcJsE1EPN/q9qxOVqc9fbNVlqR/yF1fG5DOG80lXeXTNfzdpCs2GnpUV7DNSFftOPD7yaFv1hgHk/rinyR1Y03s6muWdAnppNxpuZvRBikilkXElFa3Y3Xk7h0zs4J4T9/MrCCr/G1Dt9hii2hvb291M8zMVhv33nvvUxHRVmvYKh/67e3tzJ49u9XNMDNbbUjq8bJgd++YmRXEoW9mVhCHvplZQRz6ZmYFceibmRXEoW9mVhCHvplZQRz6ZmYFceibmRXEoW9mq7xhI0YhqSWPYSPq+c+oq49V/jYMZmZLOhYy+vTrWzLvx886sCXzXVm8p29mVhCHvplZQRz6ZmYF6TP0JU2TtEzSvErZlZLm5Mdjkubk8nZJf6kMO78yzrskzZW0QNIPJGmlLJGZmfWonhO5FwPnAZd2FUTE4V3PJZ0LPFep/3BEjK0xnSnAicBdwAxgPHBDv1tsZmYD1ueefkTcATxda1jeWz8MuLy3aUgaBmwUEbPyP4u+FPhov1trZmaDMtg+/b2ApRHxx0rZ1pJ+K+l2SXvlsuHAokqdRbnMzMyaaLDX6R/Bm/fyFwOjImK5pHcBP5e0Y38nKmkSMAlg1Kg164cRZmatNOA9fUlDgY8BV3aVRcTLEbE8P78XeBjYDugARlRGH5HLaoqIqRExLiLGtbXV/N++ZmY2AIPp3tkPeCgi/tZtI6lN0pD8fBtgDPBIRCwGnpe0Zz4PcAxw3SDmbWZmA1DPJZuXA78Btpe0SNIn8qCJrHgC9/3A/fkSzp8CJ0dE10ngU4ALgQWkIwBfuWNm1mR99ulHxBE9lB9Xo+wa4Joe6s8Gdupn+8zMrIH8i1wzs4I49M3MCuLQNzMriEPfzKwgDn0zs4I49M3MCuLQNzMriEPfzKwgDn0zs4I49M3MCuLQNzMriEPfzKwgDn0zs4I49M3MCuLQNzMriEPfzKwgDn0zs4I49M3MCuLQNzMriEPfzKwgfYa+pGmSlkmaVymbLKlD0pz8OKAy7AxJCyTNl/ThSvn4XLZA0pcbvyhmZtaXevb0LwbG1yj/XkSMzY8ZAJJ2ACYCO+Zx/l3SEElDgB8CE4AdgCNyXTMza6KhfVWIiDsktdc5vYOBKyLiZeBRSQuA3fOwBRHxCICkK3LdB/vfZDMzG6jB9OmfKun+3P2zaS4bDiys1FmUy3oqr0nSJEmzJc3u7OwcRBPNzKxqoKE/BdgWGAssBs5tVIMAImJqRIyLiHFtbW2NnLSZWdH67N6pJSKWdj2XdAFwfX7ZAYysVB2Ry+il3MzMmmRAe/qShlVeHgJ0XdkzHZgoaV1JWwNjgLuBe4AxkraWtA7pZO/0gTfbzMwGos89fUmXA3sDW0haBHwT2FvSWCCAx4CTACLiAUlXkU7Qvgp8OiJey9M5FbgJGAJMi4gHGr0wZmbWu3qu3jmiRvFFvdQ/EzizRvkMYEa/WmdmZg3lX+SamRXEoW9mVhCHvplZQRz6ZmYFceibmRXEoW9mVhCHvplZQRz6ZmYFceibmRXEoW9mVhCHvplZQRz6ZmYFceibmRXEoW9mVhCHvplZQRz6ZmYFceibmRXEoW9mVhCHvplZQfoMfUnTJC2TNK9S9l1JD0m6X9K1kjbJ5e2S/iJpTn6cXxnnXZLmSlog6QeStFKWyMzMelTPnv7FwPhuZTOBnSJiZ+APwBmVYQ9HxNj8OLlSPgU4ERiTH92naWZmK1mfoR8RdwBPdyu7OSJezS9nASN6m4akYcBGETErIgK4FPjogFpsZmYD1og+/ROAGyqvt5b0W0m3S9orlw0HFlXqLMplNUmaJGm2pNmdnZ0NaKKZmcEgQ1/SV4FXgZ/kosXAqIjYFfifwGWSNurvdCNiakSMi4hxbW1tg2mimdngDFkbSU1/DBsxaqUsztCBjijpOOBAYN/cZUNEvAy8nJ/fK+lhYDuggzd3AY3IZWZmq7bX/sro069v+mwfP+vAlTLdAe3pSxoPfAk4KCJerJS3SRqSn29DOmH7SEQsBp6XtGe+aucY4LpBt97MmmrYiFEt2eu1xulzT1/S5cDewBaSFgHfJF2tsy4wM78hs/KVOu8HviXpr8DrwMkR0XUS+BTSlUDrk84BVM8DmNlqYEnHwjVqr7dEfYZ+RBxRo/iiHupeA1zTw7DZwE79ap2ZmTWUf5FrZlYQh76ZWUEc+mZmBXHom5kVxKFvZlYQh76ZWUEc+mZmBXHom5kVxKFvZlYQh76ZWUEc+mZmBXHom5kVxKFvZlYQh76ZWUEc+mZmBXHom5kVxKFvZlYQh76ZWUEc+mZmBakr9CVNk7RM0rxK2WaSZkr6Y/67aS6XpB9IWiDpfkm7VcY5Ntf/o6RjG784ZmbWm3r39C8Gxncr+zJwa0SMAW7NrwEmAGPyYxIwBdKXBPBNYA9gd+CbXV8UZmbWHHWFfkTcATzdrfhg4JL8/BLgo5XySyOZBWwiaRjwYWBmRDwdEc8AM1nxi8TMzFaiwfTpbxkRi/PzJcCW+flwYGGl3qJc1lO5mZk1SUNO5EZEANGIaQFImiRptqTZnZ2djZqsmVnxBhP6S3O3DfnvslzeAYys1BuRy3oqX0FETI2IcRExrq2tbRBNNDOzqsGE/nSg6wqcY4HrKuXH5Kt49gSey91ANwH7S9o0n8DdP5eZmVmTDK2nkqTLgb2BLSQtIl2F8x3gKkmfAB4HDsvVZwAHAAuAF4HjASLiaUnfBu7J9b4VEd1PDpuZ2UpUV+hHxBE9DNq3Rt0APt3DdKYB0+punZmZNZR/kWtmVhCHvplZQRz6ZmYFceibmRXEoW9mVhCHvplZQRz6ZmYFceibmRXEoW9mVhCHvplZQRz6ZmYFceibmRXEoW9mVhCHvplZQRz6ZmYFceibmRXEoW9mVhCHvplZQRz6ZmYFceibmRVkwKEvaXtJcyqP5yWdJmmypI5K+QGVcc6QtEDSfEkfbswimJlZvYYOdMSImA+MBZA0BOgArgWOB74XEedU60vaAZgI7AhsBdwiabuIeG2gbTAzs/5pVPfOvsDDEfF4L3UOBq6IiJcj4lFgAbB7g+ZvZmZ1aFToTwQur7w+VdL9kqZJ2jSXDQcWVuosymUrkDRJ0mxJszs7OxvURDMzG3ToS1oHOAi4OhdNAbYldf0sBs7t7zQjYmpEjIuIcW1tbYNtopmZZY3Y058A3BcRSwEiYmlEvBYRrwMX8EYXTgcwsjLeiFxmZmZN0ojQP4JK146kYZVhhwDz8vPpwERJ60raGhgD3N2A+ZuZWZ0GfPUOgKQNgA8BJ1WKz5Y0Fgjgsa5hEfGApKuAB4FXgU/7yh0zs+YaVOhHxAvA5t3Kju6l/pnAmYOZp5mZDZx/kWtmVhCHvplZQRz6ZmYFceibmRXEoW9mVhCHvplZQRz6ZmYFceibmRXEoW9mVhCHvplZQRz6ZmYFceibmRXEoW9mVhCHvplZQRz6ZmYFceibmRXEoW9mVhCHvplZQRz6ZmYFGXToS3pM0lxJcyTNzmWbSZop6Y/576a5XJJ+IGmBpPsl7TbY+ZuZWf0atae/T0SMjYhx+fWXgVsjYgxwa34NMAEYkx+TgCkNmr+ZmdVhZXXvHAxckp9fAny0Un5pJLOATSQNW0ltMDOzbhoR+gHcLOleSZNy2ZYRsTg/XwJsmZ8PBxZWxl2Uy95E0iRJsyXN7uzsbEATzcwMYGgDpvG+iOiQ9DZgpqSHqgMjIiRFfyYYEVOBqQDjxo3r17hmZtazQe/pR0RH/rsMuBbYHVja1W2T/y7L1TuAkZXRR+QyMzNrgkGFvqQNJL216zmwPzAPmA4cm6sdC1yXn08HjslX8ewJPFfpBjKzOgwbMQpJLXnY6m+w3TtbAtfmjWEocFlE3CjpHuAqSZ8AHgcOy/VnAAcAC4AXgeMHOX+z4izpWMjo069vybwfP+vAlszXGmdQoR8RjwC71ChfDuxbozyATw9mnmZmNnD+Ra6ZWUEc+mZmBXHom5kVxKFvZlYQh76ZWUEc+mZmBXHom5kVxKFvZlYQh76ZWUEc+mZmBXHom5kVxKFvZlYQh76ZWUEc+mZmBXHom5kVxKFvZlYQh76ZWUEc+mZmBXHom5kVZMChL2mkpF9KelDSA5I+l8snS+qQNCc/DqiMc4akBZLmS/pwIxbAzMzqN5h/jP4q8IWIuE/SW4F7Jc3Mw74XEedUK0vaAZgI7AhsBdwiabuIeG0QbTAzs34Y8J5+RCyOiPvy8z8BvweG9zLKwcAVEfFyRDwKLAB2H+j8zcys/xrSpy+pHdgVuCsXnSrpfknTJG2ay4YDCyujLaKHLwlJkyTNljS7s7OzEU00MzMaEPqSNgSuAU6LiOeBKcC2wFhgMXBuf6cZEVMjYlxEjGtraxtsE83MLBtU6EtamxT4P4mInwFExNKIeC0iXgcu4I0unA5gZGX0EbnMzMyaZDBX7wi4CPh9RPxbpXxYpdohwLz8fDowUdK6krYGxgB3D3T+ZmbWf4O5eue9wNHAXElzctlXgCMkjQUCeAw4CSAiHpB0FfAg6cqfT/vKHTOz5hpw6EfErwDVGDSjl3HOBM4c6DzNzGxw/ItcM7OCOPTNzAri0DczK4hD38ysIIO5esesaMNGjGJJx8K+K5qtQhz6ZgO0pGMho0+/vunzffysA5s+T1tzuHvHzKwgDn0zs4I49M3MCuLQNzMriEPfzKwgDn0zs4I49M3MCuLQNzMriEPfzKwg/kWurfZ8OwSz+jn0bbXn2yGY1c/dO2ZmBXHom5kVxKFvZlaQpoe+pPGS5ktaIOnLzZ6/rRzDRoxCUkseZla/pp7IlTQE+CHwIWARcI+k6RHxYDPbsSZr5ZUsrTiZCj6hatYfzb56Z3dgQUQ8AiDpCuBgYI0K/VZfQugrWcysJ4qI5s1MOhQYHxGfzK+PBvaIiFO71ZsETMovtwfmN62RrbcF8FSrG9FiXgdeB6UvPwxuHYyOiLZaA1bJ6/QjYiowtdXtaAVJsyNiXKvb0UpeB14HpS8/rLx10OwTuR3AyMrrEbnMzMyaoNmhfw8wRtLWktYBJgLTm9wGM7NiNbV7JyJelXQqcBMwBJgWEQ80sw2rgSK7tbrxOvA6KH35YSWtg6aeyDUzs9byL3LNzAri0DczK4hDfxUkaaykWZLmSJotafdWt6nZJH1G0kOSHpB0dqvb0yqSviApJG3R6rY0m6Tv5m3gfknXStqk1W1qlpV5uxqH/qrpbOBfImIs8I38uhiS9iH9UnuXiNgROKfFTWoJSSOB/YEnWt2WFpkJ7BQROwN/AM5ocXuaonK7mgnADsARknZo1PQd+qumADbKzzcGnmxhW1rhU8B3IuJlgIhY1uL2tMr3gC+RtofiRMTNEfFqfjmL9LueEvztdjUR8QrQdbuahnDor5pOA74raSFpL7eIPZyK7YC9JN0l6XZJ7251g5pN0sFAR0T8rtVtWUWcANzQ6kY0yXCgevOuRbmsIVbJ2zCUQNItwNtrDPoqsC/w+Yi4RtJhwEXAfs1s38rWx/IPBTYD9gTeDVwlaZtYw64v7mMdfIXUtbNG620dRMR1uc5XgVeBnzSzbWsqX6e/CpL0HLBJRITSDeOfi4iN+hpvTSHpRuCsiPhlfv0wsGdEdLa2Zc0h6Z3ArcCLuWgEqYtv94hY0rKGtYCk44CTgH0j4sU+qq8RJL0HmBwRH86vzwCIiH9txPTdvbNqehL4QH7+QeCPLWxLK/wc2AdA0nbAOhR0x8WImBsRb4uI9ohoJx3e71Zg4I8nndM4qJTAz1bq7WrcvbNqOhH4vqShwEu8cZvpUkwDpkmaB7wCHLumde1YXc4D1gVm5v+QNisiTm5tk1a+lX27GnfvmJkVxN07ZmYFceibmRXEoW9mVhCHvplZQRz6ZmYFcehbMSS158tABzONiyUdWnk9Mf9idLBtu01S0f8I3JrDoW82OBOAG1vdCLN6OfStSJK2kfRbSXtIOkfSvHzf9s/k4d+QdE8un5pvh9F9GgLGAvdJmizpEkn/JelxSR+TdLakuZJulLR2HmffPN+5kqZJWrepC27Fc+hbcSRtD1wDHAfsBrQDY/N927tu6nVeRLw7InYC1gcOrDGpXYHfVX4tvC3pthkHAT8GfhkR7wT+AnxE0nrAxcDhuXwo6TbSZk3j0LfStAHXAR/Pty3eD/hR133bI+LpXG+ffGvnuaQg37HGtMbz5tv93hARfwXmkn4+39XtM5f0xbI98GhE/CGXXwK8v1ELZlYPh76V5jnSf6J6X08V8h75vwOH5j3yC4D1alTdH7i58rrrn768Dvy1cgTwOr7Pla0iHPpWmleAQ4BjJB1J+pd8J+Wb2yFpM94I+KckbQgc2n0ikjYGhkbE8n7Mez7QLukd+fXRwO0DWwyzgXHoW3Ei4gVSH/3nSbexfgK4X9LvgCMj4lnS3v080p0O76kxmQ8Bt/Rzvi8BxwNX526j14HzB7gYZgPiu2yaDYCkC4ELI2JWq9ti1h8OfTOzgrh7x8ysIA59M7OCOPTNzAri0DczK4hD38ysIA59M7OC/DdtlfreOgBNAgAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# test search.display_report after removing pint\n", + "\n", + "import nuad.constraints as nc\n", + "import nuad.search as ns\n", + "\n", + "random_seed = 1\n", + "\n", + "# many 4-domain strands with no common domains, 4 domains each, every domain length = 10\n", + "\n", + "# num_strands = 3\n", + "# num_strands = 5\n", + "# num_strands = 10\n", + "# num_strands = 50\n", + "num_strands = 100\n", + "\n", + "design = nc.Design()\n", + "# si wi ni ei\n", + "# strand i is [----------|----------|----------|---------->\n", + "for i in range(num_strands):\n", + " design.add_strand([f's{i}', f'w{i}', f'n{i}', f'e{i}'])\n", + "\n", + "numpy_filters = [nc.NearestNeighborEnergyFilter(-9.3, -9.0, 52.0)]\n", + "\n", + "replace_with_close_sequences = True\n", + "domain_pool_10 = nc.DomainPool(f'length-10_domains', 10,\n", + " numpy_filters=numpy_filters,\n", + " replace_with_close_sequences=replace_with_close_sequences,\n", + " )\n", + "domain_pool_11 = nc.DomainPool(f'length-11_domains', 11,\n", + " numpy_filters=numpy_filters,\n", + " replace_with_close_sequences=replace_with_close_sequences,\n", + " )\n", + "\n", + "for strand in design.strands:\n", + " for domain in strand.domains[:2]:\n", + " domain.pool = domain_pool_10\n", + " for domain in strand.domains[2:]:\n", + " domain.pool = domain_pool_11\n", + "\n", + "strand_pairs_rna_duplex_constraint = nc.rna_duplex_strand_pairs_constraint(\n", + " threshold=-1.0, temperature=52, short_description='RNAduplex')\n", + "\n", + "constraints = [strand_pairs_rna_duplex_constraint]\n", + "\n", + "ns.assign_sequences_to_domains_randomly_from_pools(design=design, warn_fixed_sequences=True)\n", + "\n", + "ns.display_report(design=design, constraints=constraints)" + ] + }, { "cell_type": "code", "execution_count": 22, diff --git a/nuad/constraints.py b/nuad/constraints.py index 2f90d8d8..a041f1d2 100644 --- a/nuad/constraints.py +++ b/nuad/constraints.py @@ -22,7 +22,6 @@ import os import math import json -from decimal import Decimal from typing import List, Set, Dict, Callable, Iterable, Tuple, Collection, TypeVar, Any, \ cast, Generic, DefaultDict, FrozenSet, Iterator, Sequence, Type, Optional from dataclasses import dataclass, field, InitVar @@ -35,7 +34,6 @@ from enum import Enum, auto, unique import functools -import pint import numpy as np # noqa from ordered_set import OrderedSet @@ -46,10 +44,6 @@ import nuad.modifications as nm from nuad.json_noindent_serializer import JSONSerializable, json_encode, NoIndent -from pint import UnitRegistry - -ureg = UnitRegistry() - # need typing_extensions package prior to Python 3.8 to get Protocol object try: from typing import Protocol @@ -4274,7 +4268,8 @@ class Result(Generic[DesignPart]): -2.5 kcal/mol, and a strand has energy -3.4 kcal/mol, then the following are sensible values for these fields: - - ``value`` = ``-3.4`` or ``"-3.4 kcal/mol"`` or ``pint.Quantity(Decimal(-3.4), "kcal/mol")`` + - ``value`` = ``-3.4`` + - ``unit`` = ``"kcal/mol"`` - ``excess`` = ``-0.9`` - ``summary`` = ``"-3.4 kcal/mol"`` """ @@ -4292,15 +4287,21 @@ class Result(Generic[DesignPart]): _summary: Optional[str] = None - value: pint.Quantity[Decimal] | None = None + value: float | None = None """ If this is a "numeric" constraint, i.e., checking some number such as the complex free energy of a strand and comparing it to a threshold, this is the "raw" value. It is optional, but if specified, then the raw values can be plotted in a Jupyter notebook by the function :meth:`display_report`. - If a ``float``, then no units are assumed. If it is a ``str``, then it is assumed that it can be - passed to the constructor pint.Quantity and interpreted as a value with units, e.g., the string - "-3.4 kcal/mol". + Optional units (e.g., 'kcal/mol') can be specified in the field :data:`Result.units`. + """ + + unit: str | None = None + """ + Optional units for :data:`Result.value`, e.g., ``'kcal/mol'``. + + If specified, then the units are used in text reports + and to label the y-axis in plots created by :meth:`search.display_report`. """ score: float = field(init=False) @@ -4317,7 +4318,8 @@ class Result(Generic[DesignPart]): def __init__(self, excess: float, summary: str | None = None, - value: float | str | pint.Quantity[Decimal] | None = None) -> None: + value: float | None = None, + unit: str | None = None) -> None: self.excess = excess if summary is None: if value is None: @@ -4327,7 +4329,11 @@ def __init__(self, else: self._summary = summary if value is not None: - self.value = parse_and_normalize_quantity(value) + self.value = value + self.unit = unit + else: + if unit is not None: + raise ValueError('units cannot be specified if value is None') self.score = 0.0 self.part = None # type:ignore @@ -4344,8 +4350,10 @@ def summary(self) -> str: # This formatting is "short pretty": https://pint.readthedocs.io/en/stable/user/formatting.html # e.g., kcal/mol instead of kilocalorie / mol # also 2 decimal places to make numbers line up nicely - self.value.default_format = '.2fC~' - summary_str = f'{self.value}' + # self.value.default_format = '.2fC~' + summary_str = f'{self.value:6.2f}' + if self.unit is not None: + summary_str += f' {self.unit}' return str(summary_str) else: return self._summary @@ -4355,62 +4363,6 @@ def summary(self, summary: str) -> None: self._summary = summary -def parse_and_normalize_quantity(quantity: float | int | str | pint.Quantity) \ - -> pint.Quantity[Decimal]: - if isinstance(quantity, (str, float, int)): - quantity = ureg.Quantity(quantity) - quantity = normalize_quantity(quantity) - return quantity - - -def Q_(qty: int | str | Decimal | float, unit: str | pint.Unit) -> pint.Quantity[Decimal]: # noqa - # Convenient constructor for units, eg, :code:`Q_(5.0, 'nM')`. - # Ensures that the quantity is a Decimal. - if isinstance(qty, Decimal): - return ureg.Quantity(qty, unit) - else: - # we convert to string to avoid floating-point weirdness. For example - # ureg.Quantity(Decimal(-2.1), 'kcal/mol') gives - # -2.100000000000000088817841970012523233890533447265625 kilocalorie / mole, - # whereas - # ureg.Quantity(Decimal(str(-2.1)), 'kcal/mol') gives - # -2.1 kilocalorie / mole, - qty_str = str(qty) - return ureg.Quantity(Decimal(qty_str), unit) - - -def normalize_quantity(quantity: pint.Quantity, compact: bool = False) -> pint.Quantity[Decimal]: - """ - Normalize `quantity` so that it has a Decimal magnitude, - is "compact" if specified (uses units within the correct "3 orders of magnitude": - https://pint.readthedocs.io/en/0.18/tutorial.html#simplifying-units) - and eliminate trailing zeros. - - :param quantity: - a pint Quantity[Decimal] - :param compact: - whether to change units to make compact (within correct 3 orders of magnitude, e.g., - 30 kg instead of 30,000 g) - :return: - `quantity` normalized to be compact and without trailing zeros. - """ - if not isinstance(quantity.magnitude, Decimal): - quantity = Q_(quantity.magnitude, quantity.units) - if compact: - quantity = quantity.to_compact() - mag_int = quantity.magnitude.to_integral() - if mag_int == quantity.magnitude: - # can be represented exactly as integer, so return that; - # quantity.magnitude.normalize() would use scientific notation in this case, which we don't want - quantity = Q_(mag_int, quantity.units) - else: - # is not exact integer, so normalize will return normal float literal such as 10.2 - # and not scientific notation like it would for an integer - mag_norm = quantity.magnitude.normalize() - quantity = Q_(mag_norm, quantity.units) - return quantity - - @dataclass(eq=False) class SingularConstraint(Constraint[DesignPart], Generic[DesignPart], ABC): evaluate: Callable[[Tuple[str, ...], DesignPart | None], Result[DesignPart]] = \ @@ -4903,8 +4855,7 @@ def evaluate(seqs: Tuple[str], _: Domain | None) -> Result: sequence = seqs[0] energy = nv.free_energy_single_strand(sequence, temperature, sodium, magnesium) excess = max(0.0, threshold - energy) - value = f'{energy:6.2f} kcal/mol' - return Result(excess=excess, value=value) + return Result(excess=excess, value=energy, unit='kcal/mol') if description is None: description = f'NUPACK secondary structure of domain exceeds {threshold} kcal/mol' @@ -4972,8 +4923,7 @@ def evaluate(seqs: Tuple[str], _: Strand | None) -> Result: sequence = seqs[0] energy = nv.free_energy_single_strand(sequence, temperature, sodium, magnesium) excess = max(0.0, threshold - energy) - value = f'{energy:6.2f} kcal/mol' - return Result(excess=excess, value=value) + return Result(excess=excess, value=energy, unit='kcal/mol') if description is None: description = f'strand NUPACK energy >= {threshold} kcal/mol at {temperature}C' @@ -5098,7 +5048,7 @@ def evaluate(seqs: Tuple[str, ...], domain_pair: DomainPair | None) -> Result: summary = '\n ' + '\n '.join(lines) max_excess = max(0.0, max_excess) - return Result(excess=max_excess, summary=summary, value=max_excess) + return Result(excess=max_excess, summary=summary, value=max_excess, unit='kcal/mol') if pairs is not None: pairs = tuple(pairs) @@ -5257,8 +5207,7 @@ def evaluate(seqs: Tuple[str, ...], _: StrandPair | None) -> Result: seq1, seq2 = seqs energy = nv.binding(seq1, seq2, temperature=temperature, sodium=sodium, magnesium=magnesium) excess = max(0.0, threshold - energy) - value = f'{energy:6.2f} kcal/mol' - return Result(excess=excess, value=value) + return Result(excess=excess, value=energy, unit='kcal/mol') if pairs is not None: pairs = tuple(pairs) @@ -5426,7 +5375,7 @@ def evaluate_bulk(domain_pairs: Iterable[DomainPair]) -> List[Result]: lines = [line for line, _ in lines_and_energies] summary = '\n ' + '\n '.join(lines) max_excess = max(0.0, max_excess) - result = Result(excess=max_excess, summary=summary, value=max_excess) + result = Result(excess=max_excess, summary=summary, value=max_excess, unit='kcal/mol') results.append(result) return results @@ -5517,7 +5466,7 @@ def evaluate_bulk(domain_pairs: Iterable[DomainPair]) -> List[Result]: lines = [line for line, _ in lines_and_energies] summary = '\n ' + '\n '.join(lines) max_excess = max(0.0, max_excess) - result = Result(excess=max_excess, summary=summary, value=max_excess) + result = Result(excess=max_excess, summary=summary, value=max_excess, unit='kcal/mol') results.append(result) return results @@ -5635,9 +5584,8 @@ def evaluate_bulk(dom_pairs: Iterable[DomainPair]) -> List[Result]: else: excess = 0 - value = f'{energy:6.2f} kcal/mol' - summary = f'{value}; target: [{low_threshold}, {high_threshold}]' - result = Result(excess=excess, value=value, summary=summary) + summary = f'{energy:6.2f} kcal/mol; target: [{low_threshold}, {high_threshold}]' + result = Result(excess=excess, value=energy, unit='kcal/mol', summary=summary) results.append(result) return results @@ -6562,8 +6510,7 @@ def evaluate_bulk(pairs_: Iterable[DomainPair]) -> List[Result]: results = [] for lcs_size in lcs_sizes: excess = lcs_size - threshold - value = f'{lcs_size}' - result = Result(excess=excess, value=value) + result = Result(excess=excess, value=lcs_size) results.append(result) return results @@ -6639,8 +6586,7 @@ def evaluate_bulk(strand_pairs: Iterable[StrandPair]) -> List[Result]: results = [] for lcs_size in lcs_sizes: excess = lcs_size - threshold - value = f'{lcs_size}' - result = Result(excess=excess, value=value) + result = Result(excess=excess, value=lcs_size) results.append(result) # end_eb = time.time() @@ -6813,8 +6759,7 @@ def evaluate_bulk(strand_pairs: Iterable[StrandPair]) -> List[Result]: results = [] for pair, energy in zip(strand_pairs, energies): excess = threshold - energy - value = f'{energy:6.2f} kcal/mol' - result = Result(excess=excess, value=value) + result = Result(excess=excess, value=energy, unit='kcal/mol') results.append(result) return results @@ -6900,8 +6845,7 @@ def evaluate_bulk(strand_pairs: Iterable[StrandPair]) -> List[Result]: results = [] for pair, energy in zip(strand_pairs, energies): excess = threshold - energy - value = f'{energy:6.2f} kcal/mol' - result = Result(excess=excess, value=value) + result = Result(excess=excess, value=energy, unit='kcal/mol') results.append(result) return results @@ -7002,8 +6946,8 @@ def evaluate_bulk(strand_pairs: Iterable[StrandPair]) -> List[Result]: results = [] for pair, energy in zip(strand_pairs, energies): excess = threshold - energy - value = f'{energy:6.2f} kcal/mol' - results.append(Result(excess=excess, value=value)) + result = Result(excess=excess, value=energy, unit='kcal/mol') + results.append(result) return results pairs_tuple = None diff --git a/nuad/search.py b/nuad/search.py index 0e6636c6..0996bae2 100644 --- a/nuad/search.py +++ b/nuad/search.py @@ -28,8 +28,6 @@ import datetime from functools import lru_cache -import pint - try: from typing import Literal except ImportError: @@ -45,7 +43,7 @@ from ordered_set import OrderedSet import numpy as np # noqa -import nuad.np as nnp +import nuad.np as nn # XXX: If I understand ThreadPool versus Pool, ThreadPool will get no benefit from multiple cores, # but Pool will. However, when I check the core usage, all of them spike when using ThreadPool, which @@ -900,7 +898,7 @@ def search_for_sequences(design: nc.Design, params: SearchParameters) -> None: if params.random_seed is not None: rng = np.random.default_rng(params.random_seed) else: - rng = nnp.default_rng + rng = nn.default_rng if params.probability_of_keeping_change is None: params.probability_of_keeping_change = default_probability_of_keeping_change_function(params) @@ -1370,7 +1368,7 @@ def _dec(score_: float) -> int: def assign_sequences_to_domains_randomly_from_pools(design: Design, warn_fixed_sequences: bool, - rng: np.random.Generator = nnp.default_rng, + rng: np.random.Generator = nn.default_rng, overwrite_existing_sequences: bool = False) -> None: """ Assigns to each :any:`Domain` in this :any:`Design` a random DNA sequence from its @@ -2155,7 +2153,7 @@ def display_report(design: nc.Design, constraints: Iterable[Constraint], Dict[str | Constraint, None | Tuple[float, float]] = None, yscales: Literal['log', 'linear', 'symlog'] | Dict[str | Constraint, - Literal['log', 'linear', 'symlog']] = _default_yscale, + Literal['log', 'linear', 'symlog']] = _default_yscale, bins: int | Dict[str | Constraint, int] = _default_num_bins) -> None: """ When run in a Jupyter notebook cell, creates a :any:`ConstraintsReport` (the one returned from @@ -2214,12 +2212,13 @@ def dm(obj): include_only_with_values=False) # divide into constraints with values (put in histogram) and without (print summary of violations) - reports_with_values: List[Tuple[ConstraintReport, List[pint.Quantity]]] = [] + reports_with_values: List[Tuple[ConstraintReport, List[float], List[tuple]]] = [] reports_without_values: List[ConstraintReport] = [] for i, report in enumerate(constraints_report.reports): - quantities = [ev.result.value for ev in report.evaluations if ev.result.value is not None] - if len(quantities) > 0: - reports_with_values.append((report, quantities)) + values = [ev.result.value for ev in report.evaluations if ev.result.value is not None] + units = [ev.result.unit for ev in report.evaluations if ev.result.value is not None] + if len(values) > 0: + reports_with_values.append((report, values, units)) else: reports_without_values.append(report) num_figs = len(reports_with_values) @@ -2231,12 +2230,8 @@ def dm(obj): for viol in report.violations: print(f' {part_type_name} {viol.part.name}: {viol.summary}') - for i, (report, quantities) in enumerate(reports_with_values): - quantities = [ev.result.value for ev in report.evaluations if ev.result.value is not None] - assert len(quantities) > 0 - - # convert pint.Quantity to unitless magnitude to avoid UnitStrippedWarning when calling py.hist - values = [q.magnitude for q in quantities] + for i, (report, values, units) in enumerate(reports_with_values): + assert len(values) > 0 yscale = _value_from_constraint_dict(yscales, report.constraint, _default_yscale, str) # type: ignore @@ -2269,8 +2264,9 @@ def dm(obj): plt.ylim(ylim) # label x-axis with units (e.g., kilocalorie / mole) - unit = str(quantities[0].units) - plt.xlabel(unit) + unit = units[0] + if unit is not None: + plt.xlabel(unit) plt.title(report.constraint.description) diff --git a/requirements.txt b/requirements.txt index af87cbdc..ff4ef0b0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,6 @@ ordered_set pathos nupack tabulate -pint matplotlib openpyxl scadnano