From b390f54b9a320a968b8bab026576d22ab75f07db Mon Sep 17 00:00:00 2001 From: Daniel Frees Date: Fri, 5 Jan 2024 19:34:51 -0800 Subject: [PATCH] last touches on 1.1.3 --- examples/neurosurgery-example.ipynb | 62 +++++++++++------------------ setup.py | 53 ------------------------ 2 files changed, 24 insertions(+), 91 deletions(-) delete mode 100644 setup.py diff --git a/examples/neurosurgery-example.ipynb b/examples/neurosurgery-example.ipynb index d3f8286..7a78501 100644 --- a/examples/neurosurgery-example.ipynb +++ b/examples/neurosurgery-example.ipynb @@ -4,23 +4,7 @@ "cell_type": "code", "execution_count": 1, "metadata": {}, - "outputs": [ - { - "ename": "ModuleNotFoundError", - "evalue": "No module named 'lxml'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[1], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mscrapemed\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01msmed\u001b[39;00m \n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mpd\u001b[39;00m \n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mscrapemed\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpaper\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Paper\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mscrapemed\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mscrape\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mscrape\u001b[39;00m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mscrapemed\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpaperSet\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m paperSet\n", - "File \u001b[0;32m~/miniconda3/envs/scrapemed/lib/python3.11/site-packages/scrapemed/paper.py:19\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;124;03mScrapeMed's Paper Module\u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;124;03m============================\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;124;03m Also may be caused by broken XML formatting.\u001b[39;00m\n\u001b[1;32m 17\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m---> 19\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mscrapemed\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_parse\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mparse\u001b[39;00m\n\u001b[1;32m 20\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mlxml\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01metree\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mET\u001b[39;00m\n\u001b[1;32m 21\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mpd\u001b[39;00m\n", - "File \u001b[0;32m~/miniconda3/envs/scrapemed/lib/python3.11/site-packages/scrapemed/_parse.py:31\u001b[0m\n\u001b[1;32m 29\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m List, Dict, Tuple, Set\n\u001b[1;32m 30\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Union\n\u001b[0;32m---> 31\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mscrapemed\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mscrape\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mscrape\u001b[39;00m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mlxml\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01metree\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mET\u001b[39;00m\n\u001b[1;32m 33\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mscrapemed\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m basicBiMap, cleanerdoc\n", - "File \u001b[0;32m~/miniconda3/envs/scrapemed/lib/python3.11/site-packages/scrapemed/scrape.py:18\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;124;03mScrapeMed's Scrape Module\u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;124;03m============================\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;124;03m validating.\u001b[39;00m\n\u001b[1;32m 14\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 17\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mscrapemed\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_clean\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01m_clean\u001b[39;00m\n\u001b[0;32m---> 18\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mscrapemed\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_validate\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01m_validate\u001b[39;00m\n\u001b[1;32m 19\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mlxml\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01metree\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mET\u001b[39;00m\n\u001b[1;32m 20\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mBio\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Entrez\n", - "File \u001b[0;32m~/miniconda3/envs/scrapemed/lib/python3.11/site-packages/scrapemed/_validate.py:14\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;124;03mScrapeMed's ``_validate`` Module\u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;124;03m=================================\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;124;03m downloaded XML.\u001b[39;00m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mre\u001b[39;00m\n\u001b[0;32m---> 14\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mlxml\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01metree\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mET\u001b[39;00m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mos\u001b[39;00m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mio\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m StringIO\n", - "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'lxml'" - ] - } - ], + "outputs": [], "source": [ "import scrapemed as smed \n", "import pandas as pd \n", @@ -34,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -49,7 +33,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -58,7 +42,7 @@ "['10744829', '10711149', '10710589', '10698442', '10692402', '10749855', '10742002', '10714222', '10681104', '10680896']" ] }, - "execution_count": 5, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -69,33 +53,35 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generating paperSet from PMCID list (This can take a while due to PMC HTTP Request Limitations!)...\n", + "Done generating paperSet!\n" + ] + } + ], "source": [ - "#pset = paperSet.from_pmcid_list(brain_surgery_articles, EMAIL)" + "pset = paperSet.from_pmcid_list(brain_surgery_articles, EMAIL)" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 5, "metadata": {}, "outputs": [ { - "ename": "DTDParseError", - "evalue": "error parsing DTD", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mDTDParseError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[11], line 6\u001b[0m\n\u001b[1;32m 3\u001b[0m PMCID \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m7067710\u001b[39m\n\u001b[1;32m 4\u001b[0m email \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mgetenv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPMC_EMAIL\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m----> 6\u001b[0m p \u001b[38;5;241m=\u001b[39m \u001b[43mpaper\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mPaper\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pmc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mPMCID\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43memail\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdownload\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/scrapemed/lib/python3.11/site-packages/scrapemed/paper.py:179\u001b[0m, in \u001b[0;36mPaper.from_pmc\u001b[0;34m(cls, pmcid, email, download, validate, verbose, suppress_warnings, suppress_errors)\u001b[0m\n\u001b[1;32m 177\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(NUM_TRIES):\n\u001b[1;32m 178\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 179\u001b[0m paper_dict \u001b[38;5;241m=\u001b[39m \u001b[43mparse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpaper_dict_from_pmc\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 180\u001b[0m \u001b[43m \u001b[49m\u001b[43mpmcid\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpmcid\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 181\u001b[0m \u001b[43m \u001b[49m\u001b[43memail\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43memail\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 182\u001b[0m \u001b[43m \u001b[49m\u001b[43mdownload\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 183\u001b[0m \u001b[43m \u001b[49m\u001b[43mvalidate\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mvalidate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 184\u001b[0m \u001b[43m \u001b[49m\u001b[43mverbose\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mverbose\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 185\u001b[0m \u001b[43m \u001b[49m\u001b[43msuppress_warnings\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msuppress_warnings\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 186\u001b[0m \u001b[43m \u001b[49m\u001b[43msuppress_errors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msuppress_errors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 187\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 188\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n\u001b[1;32m 189\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m HTTPError:\n", - "File \u001b[0;32m~/miniconda3/envs/scrapemed/lib/python3.11/site-packages/scrapemed/_parse.py:130\u001b[0m, in \u001b[0;36mpaper_dict_from_pmc\u001b[0;34m(pmcid, email, download, validate, verbose, suppress_warnings, suppress_errors)\u001b[0m\n\u001b[1;32m 128\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGenerating Paper object for PMCID = \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpmcid\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m...\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 129\u001b[0m \u001b[38;5;66;03m# DOWNLOAD XML TREE AND GET ROOT\u001b[39;00m\n\u001b[0;32m--> 130\u001b[0m paper_tree \u001b[38;5;241m=\u001b[39m \u001b[43mscrape\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_xml\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 131\u001b[0m \u001b[43m \u001b[49m\u001b[43mpmcid\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpmcid\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43memail\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43memail\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdownload\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalidate\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mvalidate\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mverbose\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mverbose\u001b[49m\n\u001b[1;32m 132\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 133\u001b[0m root \u001b[38;5;241m=\u001b[39m paper_tree\u001b[38;5;241m.\u001b[39mgetroot()\n\u001b[1;32m 135\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m generate_paper_dict(\n\u001b[1;32m 136\u001b[0m pmcid\u001b[38;5;241m=\u001b[39mpmcid,\n\u001b[1;32m 137\u001b[0m paper_root\u001b[38;5;241m=\u001b[39mroot,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 140\u001b[0m suppress_errors\u001b[38;5;241m=\u001b[39msuppress_errors,\n\u001b[1;32m 141\u001b[0m )\n", - "File \u001b[0;32m~/miniconda3/envs/scrapemed/lib/python3.11/site-packages/scrapemed/scrape.py:123\u001b[0m, in \u001b[0;36mget_xml\u001b[0;34m(pmcid, email, download, validate, strip_text_styling, verbose)\u001b[0m\n\u001b[1;32m 116\u001b[0m tree \u001b[38;5;241m=\u001b[39m xml_tree_from_string(\n\u001b[1;32m 117\u001b[0m xml_string\u001b[38;5;241m=\u001b[39mxml_text, strip_text_styling\u001b[38;5;241m=\u001b[39mstrip_text_styling, verbose\u001b[38;5;241m=\u001b[39mverbose\n\u001b[1;32m 118\u001b[0m )\n\u001b[1;32m 120\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m validate:\n\u001b[1;32m 121\u001b[0m \u001b[38;5;66;03m# Validate tags, attrs, values are supported for\u001b[39;00m\n\u001b[1;32m 122\u001b[0m \u001b[38;5;66;03m# parsing by the scrapemed package.\u001b[39;00m\n\u001b[0;32m--> 123\u001b[0m \u001b[43m_validate\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidate_xml\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtree\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 125\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[1;32m 126\u001b[0m (\n\u001b[1;32m 127\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mWarning! Scraping XML for PMCID \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpmcid\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m from \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 130\u001b[0m validationWarning,\n\u001b[1;32m 131\u001b[0m )\n", - "File \u001b[0;32m~/miniconda3/envs/scrapemed/lib/python3.11/site-packages/scrapemed/_validate.py:88\u001b[0m, in \u001b[0;36mvalidate_xml\u001b[0;34m(xml)\u001b[0m\n\u001b[1;32m 80\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m dtd_doc \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 81\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m noDTDFoundError(\n\u001b[1;32m 82\u001b[0m cleanerdoc(\n\u001b[1;32m 83\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"DTD not found in scrapemed package. Ensure you are using the\u001b[39;00m\n\u001b[1;32m 84\u001b[0m \u001b[38;5;124;03m latest package version.\"\"\"\u001b[39;00m\n\u001b[1;32m 85\u001b[0m )\n\u001b[1;32m 86\u001b[0m )\n\u001b[0;32m---> 88\u001b[0m dtd \u001b[38;5;241m=\u001b[39m \u001b[43mET\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mDTD\u001b[49m\u001b[43m(\u001b[49m\u001b[43mStringIO\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdtd_doc\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 90\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m dtd\u001b[38;5;241m.\u001b[39mvalidate(xml)\n", - "File \u001b[0;32msrc/lxml/dtd.pxi:291\u001b[0m, in \u001b[0;36mlxml.etree.DTD.__init__\u001b[0;34m()\u001b[0m\n", - "File \u001b[0;32msrc/lxml/dtd.pxi:409\u001b[0m, in \u001b[0;36mlxml.etree._parseDtdFromFilelike\u001b[0;34m()\u001b[0m\n", - "\u001b[0;31mDTDParseError\u001b[0m: error parsing DTD" + "name": "stderr", + "output_type": "stream", + "text": [ + "unexpectedMultipleMatchWarning: Warning! Multiple titles matched. Setting Paper.title to the first match.\n", + "\n", + "unexpectedTagWarning: Tag of type sc found in a text portion of the provided markup language. Expected only HTML styling tags, or tags from the following list: ['xref', 'fig', 'table-wrap']. Specified unknown tag behavior: keep. Warning occured in a text section with id: Par6.\n", + "\n" ] } ], diff --git a/setup.py b/setup.py deleted file mode 100644 index 3bd924d..0000000 --- a/setup.py +++ /dev/null @@ -1,53 +0,0 @@ -from setuptools import setup -import os - -_SCRAPEMED_REQS = [ - "biopython>=1.78", - "graphviz>=0.20.1", - "lxml>=4.9.2,<5.0.0", # 5.0.0 has stricter rules against - # max entity amplification, - # which breaks nlm articleset 2.0 DTD - "pandas>=1.5.2", - "requests-html>=0.10.0", - "sqlalchemy>=1.4.39", - "beautifulsoup4>=4.11", - "html5lib>=1.1", - "jinja2", - "python-dotenv", - "chromadb", - "langchain", - "uuid", - "matplotlib", - "wordcloud", -] - -_INSTALL_REQUIRES = _SCRAPEMED_REQS -_TEST_REQS = _SCRAPEMED_REQS - -setup( - name="scrapemed", - version="1.0.9", - description="ScrapeMed: Data Scraping for PubMed Central.", - author="Daniel Frees", - author_email="danielfrees@g.ucla.edu", - url="https://github.com/danielfrees/scrapemed", - packages=[ - "scrapemed", - ], - package_dir={ - "scrapemed": "scrapemed", - }, - package_data={ - "scrapemed": [os.path.join("data", "DTDs", "*")], - }, - install_requires=_INSTALL_REQUIRES, - tests_require=_TEST_REQS, - test_suite="tests", - classifiers=[ - "Development Status :: 4 - Beta", - "Intended Audience :: Developers", - "License :: OSI Approved :: MIT License", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.11", - ], -)