diff --git a/Changelog.txt b/Changelog.txt
index 351c8a5e6a0a..45d2c10c4165 100644
--- a/Changelog.txt
+++ b/Changelog.txt
@@ -23,6 +23,37 @@
 # - title by author
 # }}}
 
+{{{ 7.10.0 2024-05-03
+
+:: new features
+
+- Export of calibre data: Ensure individual part files in the exported data are no larger than one gigabyte even
+  if the library contains individual files larger than that size.
+
+  Note that this means that exports created by calibre from this version
+  on will not be importable by earlier versions. However, exports from
+  earlier versions should still be importable.
+
+- Edit book: Spell check: Add options to exclude words in ALL CAPS or with numbers or in camelCase/snake_case from the list of words
+
+- Allow easily inverting the current search via the right click menu on the search box
+
+:: bug fixes
+
+- [2064546] Kobo driver: Fix database unsupported error with newest firmware
+
+- [2063301] DOCX Input: Fix text elements containing only whitespace being incorrectly ignored
+
+- Bulk metadata dialog: Do not fail when setting covers from ebook files and some of the files have invalid covers
+
+:: improved recipes
+- Economist
+- The Week
+- Caravan Magazine
+- Financial Times
+
+}}}
+
 {{{ 7.9.0 2024-04-19
 
 :: new features
diff --git a/bypy/sources.json b/bypy/sources.json
index 0e1a8ae071d2..506a79bab504 100644
--- a/bypy/sources.json
+++ b/bypy/sources.json
@@ -323,8 +323,8 @@
     {
         "name": "libxml2",
         "unix": {
-            "filename": "libxml2-2.12.1.tar.xz",
-            "hash": "sha256:8982b9ccdf7f456e30d8f7012d50858c6623e495333b6191def455c7e95427eb",
+            "filename": "libxml2-2.12.6.tar.xz",
+            "hash": "sha256:889c593a881a3db5fdd96cc9318c87df34eb648edfc458272ad46fd607353fbb",
             "urls": ["https://download.gnome.org/sources/libxml2/2.12/{filename}"]
         }
     },
@@ -620,8 +620,8 @@
     {
         "name": "lxml",
         "unix": {
-            "filename": "lxml-4.9.3.tar.gz",
-            "hash": "sha256:48628bd53a426c9eb9bc066a923acaa0878d1e86129fd5359aee99285f4eed9c",
+            "filename": "lxml-5.2.1.tar.gz",
+            "hash": "sha256:3f7765e69bbce0906a7c74d5fe46d2c7a7596147318dbc08e4a2431f3060e306",
             "urls": ["pypi"]
         }
     },
@@ -968,6 +968,15 @@
         }
     },
 
+    {
+        "name": "lxml-html-clean",
+        "unix": {
+            "filename": "lxml_html_clean-0.1.1-py3-none-any.whl",
+            "hash": "sha256:58c04176593c9caf72ec92e033d2f38859e918b3eff0cc0f8051ad27dc2ab8ef",
+            "urls": ["pypi"]
+        }
+    },
+
     {
         "name": "ply",
 		"comment": "Needed for sip (build time dependency)",
diff --git a/bypy/windows/site.py b/bypy/windows/site.py
index 2871855025b3..fb0e6450b073 100644
--- a/bypy/windows/site.py
+++ b/bypy/windows/site.py
@@ -60,16 +60,6 @@ def set_quit():
     builtins.exit = _sitebuiltins.Quitter('exit', eof)
 
 
-def workaround_lxml_bug():
-    # Without calling xmlInitParser() import lxml causes a segfault
-    import ctypes
-    x = ctypes.WinDLL('libxml2.dll')
-    x.xmlInitParser()
-    workaround_lxml_bug.libxml2 = x
-    from lxml import etree
-    del etree
-
-
 def main():
     sys.meta_path.insert(0, PydImporter())
     os.add_dll_directory(os.path.abspath(os.path.join(sys.app_dir, 'app', 'bin')))
@@ -85,8 +75,6 @@ def fake_getline(filename, lineno, module_globals=None):
     set_helper()
     set_quit()
 
-    workaround_lxml_bug()
-
     return run_entry_point()
 
 
diff --git a/recipes/caravan_magazine.recipe b/recipes/caravan_magazine.recipe
index c271c3a92c14..98e86d58f613 100644
--- a/recipes/caravan_magazine.recipe
+++ b/recipes/caravan_magazine.recipe
@@ -1,10 +1,7 @@
-#!/usr/bin/env python
-# vim:fileencoding=utf-8
-# License: GPLv3 Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
-
 import json
+from urllib.parse import quote, urlparse
 
-from calibre.web.feeds.news import BasicNewsRecipe, classes
+from calibre.web.feeds.news import BasicNewsRecipe
 from mechanize import Request
 
 
@@ -21,6 +18,45 @@ def safe_dict(data, *names):
         ans = ans.get(x) or ''
     return ans
 
+
+def parse_body(x):
+    if x.get('type', '') == 'paragraph':
+        yield '<p>'
+        for p in x.get('content', {}):
+            yield ''.join(parse_p(p))
+        yield '</p>\n'
+    elif x.get('type', '') in {'blockquote', 'pullquote'}:
+        yield '<blockquote>'
+        for p in x.get('content', {}):
+            yield from parse_body(p)
+        yield '</blockquote>'
+    elif x.get('type', '') == 'figure':
+        yield '<img src="{}">'.format(absurl(x['attrs']['src'].replace('=s0', '=s768-rw')))
+        for p in x.get('content', {}):
+            yield from parse_body(p)
+    elif x.get('type', '') in {'caption', 'credit'}:
+        yield '<div class="sub">'
+        for div in x.get('content', {}):
+            yield ''.join(parse_p(div))
+        yield '</div>\n'
+    elif x.get('type', '') != '':
+        if 'content' in x:
+            yield '<p>'
+            for p in x.get('content', {}):
+                yield from parse_body(p)
+            yield '</p>'
+
+def parse_p(p):
+    if p.get('type', '') == 'text':
+        if 'marks' in p:
+            tag = p['marks'][0]['type']
+            yield '<' + tag + '>'
+            yield p['text']
+            yield '</' + tag + '>'
+        else:
+            yield p['text']
+
+
 class CaravanMagazine(BasicNewsRecipe):
 
     title = 'Caravan Magazine'
@@ -40,23 +76,26 @@ class CaravanMagazine(BasicNewsRecipe):
     remove_attributes = ['style', 'height', 'width']
     ignore_duplicate_articles = {'url'}
     resolve_internal_links = True
+    needs_subscription = 'optional'
+    logged = False
 
     extra_css = '''
+        img {display:block; margin:0 auto;}
         blockquote, em {color:#202020;}
-        .article_subtitle {font-style:italic; color:#202020;}
-        #fig-c, .photo_wrapper, .cover_figure_element {text-align:center; font-size:small;}
-        .pre-title, .text_wrapper {font-size:small; color:#404040;}
+        .desc {font-style:italic; color:#202020;}
+        .sub {text-align:center; font-size:small;}
+        .cat, .auth {font-size:small; color:#404040;}
     '''
 
     def get_browser(self, *args, **kw):
         br = BasicNewsRecipe.get_browser(self, *args, **kw)
         if not self.username or not self.password:
             return br
-        data = json.dumps({'email': self.username, 'name': '', 'password': self.password})
+        data = json.dumps({"0":{"json":{"email":self.username,"password":self.password}}})
         if not isinstance(data, bytes):
             data = data.encode('utf-8')
         rq = Request(
-            url='https://caravanmagazine.in/api/users/login',
+            url='https://caravanmagazine.in/api/trpc/users.login?batch=1',
             data=data,
             headers={
                 'Accept': 'application/json, text/plain, */*',
@@ -66,37 +105,33 @@ class CaravanMagazine(BasicNewsRecipe):
             },
             method='POST'
         )
-        res = br.open(rq).read()
-        res = res.decode('utf-8')
-        self.log('Login request response: {}'.format(res))
-        res = json.loads(res)
-        if res['code'] != 200 or res['message'] != "Login success":
-            raise ValueError('Login failed, check your username and password')
+        try:
+            res = br.open(rq).read()
+            res = res.decode('utf-8')
+            res = json.loads(res)
+            self.log(safe_dict(res[0], 'result', 'data', 'json', 'message'))
+            self.logged = True
+        except:
+            self.log.warn('\n**Login failed, check your username and password\n')
+            return br
         return br
 
-    keep_only_tags = [
-        classes('text_wrapper cover_figure_element article_content')
-    ]
-
-    def preprocess_html(self, soup):
-        h2 = soup.find('h2')
-        if h2:
-            h2.name = 'p'
-        for fc in soup.findAll('figcaption'):
-            fc['id'] = 'fig-c'
-        return soup
-
     def parse_index(self):
         self.log(
             '\n***\nif this recipe fails, report it on: '
             'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n'
         )
+
         api = 'https://api.caravanmagazine.in/api/trpc/magazines.getLatestIssue'
-        # api = 'https://api.caravanmagazine.in/api/trpc/magazines.getForMonthAndYear?batch=1&' + \
-        # 'input=%7B%220%22%3A%7B%22json%22%3A%7B%22month%22%3A' + '2' + '%2C%22year%22%3A' + '2024' + '%7D%7D%7D'
-        # input={"0":{"json":{"month":2,"year":2024}}}
-        raw = self.index_to_soup(api, raw=True)
-        data = json.loads(raw)['result']['data']['json']
+        # for past editions
+        # inp = json.dumps({"0":{"json":{"month":6,"year":2023}}})
+        # api = 'https://api.caravanmagazine.in/api/trpc/magazines.getForMonthAndYear?batch=1&input=' + quote(inp, safe='')
+
+        raw = json.loads(self.index_to_soup(api, raw=True))
+        if isinstance(raw, list):
+            data = raw[0]['result']['data']['json']
+        else:
+            data = raw['result']['data']['json']
         cover = safe_dict(data, 'issue', 'cover', 'data', 'url').replace('=s0', '=s768-rw')
         self.cover_url = absurl(cover)
 
@@ -122,3 +157,46 @@ class CaravanMagazine(BasicNewsRecipe):
             if articles:
                 feeds.append((section, articles))
         return feeds
+
+    def print_version(self, url):
+        slug = urlparse(url).path
+        inp = json.dumps({"0":{"json":{"slug":slug}}})
+        return 'https://api.caravanmagazine.in/api/trpc/articles.getFromCache?batch=1&input=' + quote(inp, safe='')
+
+    def preprocess_raw_html(self, raw, url):
+        cache_data = json.loads(raw)[0]
+        art_id = cache_data['result']['data']['json']['articleId']
+        prim_data = cache_data['result']['data']['json']['data']
+
+        cat = desc = lede = auth = ''
+
+        cat = '<div class="cat">' + safe_dict(prim_data, 'printTitle') + '</div>\n'
+        title = '<h1>' + safe_dict(prim_data, 'title') + '</h1>\n'
+        desc = '<p class="desc">' + safe_dict(prim_data, 'description') + '</p>\n'
+
+        authors = []
+        for q in prim_data.get('authors', {}):
+            authors.append(safe_dict(q, 'name'))
+        dt = ''
+        if prim_data.get('writtenAt', '') != '':
+            import time
+            from datetime import datetime, timedelta
+            dt = datetime.fromisoformat(prim_data['writtenAt'][:-1]) + timedelta(seconds=time.timezone)
+            dt = dt.strftime('%b %d, %Y, %I:%M %p')
+        auth ='<p class="auth">' + ', '.join(authors) + ' | ' + dt + '</p>\n'
+        lede = ''.join(parse_body(prim_data.get('cover', {})))
+
+        free_cont = ''
+        for x in prim_data['data']['content']:
+            free_cont += '\n'+ ''.join(parse_body(x))
+
+        premium_cont = ''
+        if self.logged:
+            cont_url = 'https://api.caravanmagazine.in/api/paywall/check-article?articleId='
+            art_cont = json.loads(self.index_to_soup(cont_url + str(art_id), raw=True))
+            for x in art_cont['premiumContent']:
+                premium_cont += '\n' + ''.join(parse_body(x))
+
+        return '<html><body><div>' \
+                    + cat + title + desc + auth + lede + free_cont + premium_cont + \
+                        '</div></body></html>'
diff --git a/recipes/dilema.recipe b/recipes/dilema.recipe
new file mode 100644
index 000000000000..1a64701880c4
--- /dev/null
+++ b/recipes/dilema.recipe
@@ -0,0 +1,107 @@
+#!/usr/bin/env  python
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+
+class Volkskrant(BasicNewsRecipe):
+    title = 'Dilema'
+    __author__ = 'Cristi Ghera'
+    max_articles_per_feed = 100
+    description = '"Sint vechi, domnule!" (I.L. Caragiale)'
+    needs_subscription = False
+    language = 'ro'
+    country = 'RO'
+    category = 'politics, culture, Romania'
+    resolve_internal_links = True
+    remove_tags_before = { 'class': 'post' }
+    remove_tags_after = { 'class': 'post_content' }
+    remove_tags = [
+        dict(
+            attrs={
+                'class': [
+                    'single_meta_category',
+                    'avatar',
+                    'jm-post-like',
+                    'fa',
+                ]
+            }
+        ),
+        dict(
+            name=['div'],
+            attrs={
+                'class': ['mb-2']
+            }
+        ),
+        dict(id=['like', 'dlik']),
+        dict(name=['script', 'noscript', 'style']),
+    ]
+    remove_attributes = ["class", "id", "name", "style"]
+    encoding = 'utf-8'
+    no_stylesheets = True
+    ignore_duplicate_articles = {'url'}
+
+    def parse_index(self):
+        homepage_url = 'https://www.dilema.ro/'
+        soup = self.index_to_soup(homepage_url)
+
+        articles = []
+
+        # .banner-container
+        banner_container = soup.find('div', attrs={'class': 'banner-container'})
+        container = banner_container.find('h5')
+        a = container.find('a')
+        url = homepage_url + a.attrs['href']
+        articles.append(
+            dict(
+                title=self.tag_to_string(container).strip(),
+                url=url,
+                date=self.tag_to_string(banner_container.find(attrs={'class': 'post-date'})).strip(),
+                description='',
+                content=''
+            )
+        )
+
+        # .homepage_builder_3grid_post
+        containers = soup.findAll('div', attrs={'class': 'homepage_builder_3grid_post'})
+        for container in containers:
+            if self.tag_to_string(container.find('h2')) in ['CELE MAI RECENTE', 'CELE MAI CITITE']:
+                continue
+            for article in container.findAll('div', attrs={'class': 'blog_grid_post_style'}):
+                title_container = article.find('h3')
+                if not title_container:
+                    continue
+                url = title_container.find('a')['href']
+                url = homepage_url + url
+                article_title = self.tag_to_string(title_container).strip()
+                author = self.tag_to_string(
+                    article.find('a', attrs={'rel': 'author'})
+                ).strip()
+                summary = self.tag_to_string(article.find('p')).strip()
+                pubdate = self.tag_to_string(article.find(attrs={'class': 'post-date'}))
+                description = author + ' - ' + summary
+                articles.append(
+                    dict(
+                        title=article_title,
+                        url=url,
+                        date=pubdate,
+                        description=description,
+                        content=''
+                    )
+                )
+
+        sections = [("Numărul curent", articles)]
+        return sections
+
+    def preprocess_html(self, soup):
+        main_carousel = soup.find(attrs={'id': 'main-carousel'})
+        if main_carousel:
+            img = main_carousel.find('img')
+            body = soup.find('body')
+            body.clear()
+            body.append(img)
+        return soup
+
+    def get_cover_url(self):
+        url = 'https://www.dilema.ro/coperta-saptaminii/'
+        soup = self.index_to_soup(url)
+        img = soup.find(attrs={'id': 'main-carousel'}).find('img')
+        return url + img.attrs['src']
diff --git a/recipes/economist.recipe b/recipes/economist.recipe
index e6dcd3acb851..960df8f62422 100644
--- a/recipes/economist.recipe
+++ b/recipes/economist.recipe
@@ -2,17 +2,22 @@
 # License: GPLv3 Copyright: 2008, Kovid Goyal <kovid at kovidgoyal.net>
 
 import json
+import time
 from collections import defaultdict
+from datetime import datetime, timedelta
+from urllib.parse import quote, urlencode
 
 from calibre import replace_entities
 from calibre.ebooks.BeautifulSoup import NavigableString, Tag
+from calibre.ptempfile import PersistentTemporaryFile
 from calibre.utils.date import parse_only_date
 from calibre.web.feeds.news import BasicNewsRecipe
 from html5_parser import parse
 from lxml import etree
 
-# For past editions, set date to, for example, '2020-11-28'
+# For past editions, set date to, for example, '2020-11-28'.
 edition_date = None
+use_archive = True
 
 
 def E(parent, name, text='', **attrs):
@@ -52,31 +57,63 @@ class JSONHasNoContent(ValueError):
     pass
 
 
-def load_article_from_json(raw, root):
-    # open('/t/raw.json', 'w').write(raw)
-    try:
-        data = json.loads(raw)['props']['pageProps']['content']
-    except KeyError as e:
-        raise JSONHasNoContent(e)
-    if isinstance(data, list):
-        data = data[0]
-    body = root.xpath('//body')[0]
-    for child in tuple(body):
-        body.remove(child)
-    article = E(body, 'article')
-    E(article, 'h4', data['subheadline'], style='color: red; margin: 0')
-    E(article, 'h1', data['headline'], style='font-size: x-large')
-    E(article, 'div', data['description'], style='font-style: italic')
-    E(article, 'div', (data['datePublishedString'] or '') + ' | ' + (data['dateline'] or ''), style='color: gray; margin: 1em')
-    main_image_url = safe_dict(data, 'image', 'main', 'url').get('canonical')
-    if main_image_url:
-        div = E(article, 'div')
+if use_archive:
+    def load_article_from_json(raw, root):
+        # open('/t/raw.json', 'w').write(raw)
+        data = json.loads(raw)
+        body = root.xpath('//body')[0]
+        article = E(body, 'article')
+        E(article, 'div', data['flyTitle'] , style='color: red; font-size:small; font-weight:bold;')
+        E(article, 'h1', data['title'], title=safe_dict(data, "url", "canonical") or '')
+        E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;')
         try:
-            E(div, 'img', src=main_image_url)
+            date = data['dateModified']
         except Exception:
-            pass
-    for node in data.get('text') or ():
-        process_node(node, article)
+            date = data['datePublished']
+        dt = datetime.fromisoformat(date[:-1]) + timedelta(seconds=time.timezone)
+        dt = dt.strftime('%b %d, %Y, %I:%M %p')
+        if data['dateline'] is None:
+            E(article, 'p', dt, style='color: gray; font-size:small;')
+        else:
+            E(article, 'p', dt + ' | ' + (data['dateline']), style='color: gray; font-size:small;')
+        main_image_url = safe_dict(data, 'image', 'main', 'url').get('canonical')
+        if main_image_url:
+            div = E(article, 'div')
+            try:
+                E(div, 'img', src=main_image_url)
+            except Exception:
+                pass
+        for node in data.get('text') or ():
+            process_node(node, article)
+else:
+    def load_article_from_json(raw, root):
+        # open('/t/raw.json', 'w').write(raw)
+        try:
+            data = json.loads(raw)['props']['pageProps']['content']
+        except KeyError as e:
+            raise JSONHasNoContent(e)
+        if isinstance(data, list):
+            data = data[0]
+        body = root.xpath('//body')[0]
+        for child in tuple(body):
+            body.remove(child)
+        article = E(body, 'article')
+        E(article, 'div', replace_entities(data['subheadline']) , style='color: red; font-size:small; font-weight:bold;')
+        E(article, 'h1', replace_entities(data['headline']))
+        E(article, 'div', replace_entities(data['description']), style='font-style: italic; color:#202020;')
+        if data['dateline'] is None:
+            E(article, 'p', (data['datePublishedString'] or ''), style='color: gray; font-size:small;')
+        else:
+            E(article, 'p', (data['datePublishedString'] or '') + ' | ' + (data['dateline']), style='color: gray; font-size:small;')
+        main_image_url = safe_dict(data, 'image', 'main', 'url').get('canonical')
+        if main_image_url:
+            div = E(article, 'div')
+            try:
+                E(div, 'img', src=main_image_url)
+            except Exception:
+                pass
+        for node in data.get('text') or ():
+            process_node(node, article)
 
 
 def cleanup_html_article(root):
@@ -129,31 +166,9 @@ class Economist(BasicNewsRecipe):
         ' perspective. Best downloaded on Friday mornings (GMT)'
     )
     extra_css = '''
-        .headline {font-size: x-large;}
-        h2 { font-size: small;  }
-        h1 { font-size: medium;  }
-        em.Bold {font-weight:bold;font-style:normal;}
-        em.Italic {font-style:italic;}
-        p.xhead {font-weight:bold;}
-        .pullquote {
-            float: right;
-            font-size: larger;
-            font-weight: bold;
-            font-style: italic;
-            page-break-inside:avoid;
-            border-bottom: 3px solid black;
-            border-top: 3px solid black;
-            width: 228px;
-            margin: 0px 0px 10px 15px;
-            padding: 7px 0px 9px;
-        }
-        .flytitle-and-title__flytitle {
-            display: block;
-            font-size: smaller;
-            color: red;
-        }
+        em { color:#202020; }
         img {display:block; margin:0 auto;}
-        '''
+    '''
     oldest_article = 7.0
     resolve_internal_links = True
     remove_tags = [
@@ -186,15 +201,6 @@ class Economist(BasicNewsRecipe):
 
     needs_subscription = False
 
-    def __init__(self, *args, **kwargs):
-        BasicNewsRecipe.__init__(self, *args, **kwargs)
-        if self.output_profile.short_name.startswith('kindle'):
-            # Reduce image sizes to get file size below amazon's email
-            # sending threshold
-            self.web2disk_options.compress_news_images = True
-            self.web2disk_options.compress_news_images_auto_size = 5
-            self.log.warn('Kindle Output profile being used, reducing image quality to keep file size below amazon email threshold')
-
     def get_browser(self, *args, **kwargs):
         # Needed to bypass cloudflare
         kwargs['user_agent'] = 'common_words/based'
@@ -202,19 +208,170 @@ class Economist(BasicNewsRecipe):
         br.addheaders += [('Accept-Language', 'en-GB,en-US;q=0.9,en;q=0.8')]
         return br
 
+    def publication_date(self):
+        if edition_date:
+            return parse_only_date(edition_date, as_utc=False)
+        url = self.browser.open("https://www.economist.com/printedition").geturl()
+        return parse_only_date(url.split("/")[-1], as_utc=False)
+
+    def economist_test_article(self):
+        return [('Articles', [{'title':'test',
+            'url':'https://www.economist.com/the-americas/2024/04/14/elon-musk-is-feuding-with-brazils-powerful-supreme-court'
+        }])]
+
+    def economist_return_index(self, ans):
+        if not ans:
+            raise NoArticles(
+                'Could not find any articles, either the '
+                'economist.com server is having trouble and you should '
+                'try later or the website format has changed and the '
+                'recipe needs to be updated.'
+            )
+        return ans
+
+    if use_archive:
+        def parse_index(self):
+            # return self.economist_test_article()
+            url = 'https://www.economist.com/weeklyedition/archive'
+            if edition_date:
+                url = 'https://www.economist.com/weeklyedition/' + edition_date
+            soup = self.index_to_soup(url)
+            script_tag = soup.find("script", id="__NEXT_DATA__")
+            if script_tag is None:
+                raise ValueError('No script tag with JSON data found in the weeklyedition archive')
+            data = json.loads(script_tag.string)
+            content_id = data['props']['pageProps']['content']['id'].split('/')[-1]
+            query = {
+                'query': 'query LatestWeeklyAutoEditionQuery($ref:String!){canonical(ref:$ref){hasPart(from:0 size:1 sort:"datePublished:desc"){parts{...WeeklyEditionFragment __typename}__typename}__typename}}fragment WeeklyEditionFragment on Content{id type datePublished image{...ImageCoverFragment __typename}url{canonical __typename}hasPart(size:100 sort:"publication.context.position"){parts{...ArticleFragment __typename}__typename}__typename}fragment ArticleFragment on Content{ad{grapeshot{channels{name __typename}__typename}__typename}articleSection{internal{id title:headline __typename}__typename}audio{main{id duration(format:"seconds")source:channel{id __typename}url{canonical __typename}__typename}__typename}byline dateline dateModified datePublished dateRevised flyTitle:subheadline id image{...ImageInlineFragment ...ImageMainFragment ...ImagePromoFragment __typename}print{title:headline flyTitle:subheadline rubric:description section{id title:headline __typename}__typename}publication{id tegID title:headline flyTitle:subheadline datePublished regionsAllowed url{canonical __typename}__typename}rubric:description source:channel{id __typename}tegID text(format:"json")title:headline type url{canonical __typename}topic contentIdentity{forceAppWebview mediaType articleType __typename}__typename}fragment ImageInlineFragment on Media{inline{url{canonical __typename}width height __typename}__typename}fragment ImageMainFragment on Media{main{url{canonical __typename}width height __typename}__typename}fragment ImagePromoFragment on Media{promo{url{canonical __typename}id width height __typename}__typename}fragment ImageCoverFragment on Media{cover{headline width height url{canonical __typename}regionsAllowed __typename}__typename}',  # noqa
+                'operationName': 'LatestWeeklyAutoEditionQuery',
+                'variables': '{{"ref":"/content/{}"}}'.format(content_id),
+            }
+            if edition_date:
+                query = {
+                    'query': 'query SpecificWeeklyEditionQuery($path:String!){section:canonical(ref:$path){...WeeklyEditionFragment __typename}}fragment WeeklyEditionFragment on Content{id type datePublished image{...ImageCoverFragment __typename}url{canonical __typename}hasPart(size:100 sort:"publication.context.position"){parts{...ArticleFragment __typename}__typename}__typename}fragment ArticleFragment on Content{ad{grapeshot{channels{name __typename}__typename}__typename}articleSection{internal{id title:headline __typename}__typename}audio{main{id duration(format:"seconds")source:channel{id __typename}url{canonical __typename}__typename}__typename}byline dateline dateModified datePublished dateRevised flyTitle:subheadline id image{...ImageInlineFragment ...ImageMainFragment ...ImagePromoFragment __typename}print{title:headline flyTitle:subheadline rubric:description section{id title:headline __typename}__typename}publication{id tegID title:headline flyTitle:subheadline datePublished regionsAllowed url{canonical __typename}__typename}rubric:description source:channel{id __typename}tegID text(format:"json")title:headline type url{canonical __typename}topic contentIdentity{forceAppWebview mediaType articleType __typename}__typename}fragment ImageInlineFragment on Media{inline{url{canonical __typename}width height __typename}__typename}fragment ImageMainFragment on Media{main{url{canonical __typename}width height __typename}__typename}fragment ImagePromoFragment on Media{promo{url{canonical __typename}id width height __typename}__typename}fragment ImageCoverFragment on Media{cover{headline width height url{canonical __typename}regionsAllowed __typename}__typename}',  # noqa
+                    'operationName': 'SpecificWeeklyEditionQuery',
+                    'variables': '{{"path":"/content/{}"}}'.format(content_id),
+                }
+            url = 'https://cp2-graphql-gateway.p.aws.economist.com/graphql?' + urlencode(query, safe='()!', quote_via=quote)
+            raw = self.index_to_soup(url, raw=True)
+            ans = self.economist_parse_index(raw)
+            return self.economist_return_index(ans)
+
+        def economist_parse_index(self, raw):
+            if edition_date:
+                data = json.loads(raw)['data']['section']
+            else:
+                data = json.loads(raw)['data']['canonical']['hasPart']['parts'][0]
+            self.description = data['image']['cover'][0]['headline']
+            dt = datetime.fromisoformat(data['datePublished'][:-1]) + timedelta(seconds=time.timezone)
+            dt = dt.strftime('%b %d, %Y')
+            self.timefmt = ' [' + dt + ']'
+            self.cover_url = data['image']['cover'][0]['url']['canonical'].replace('economist.com/', 
+                'economist.com/cdn-cgi/image/width=960,quality=80,format=auto/')
+            self.log('Got cover:', self.cover_url)
+
+            feeds_dict = defaultdict(list)
+            for part in safe_dict(data, "hasPart", "parts"):
+                try:
+                    section = part['articleSection']['internal'][0]['title']
+                except Exception:
+                    section = safe_dict(part, 'print', 'section', 'title') or 'section'
+                if section not in feeds_dict:
+                    self.log(section)
+                title = safe_dict(part, "title")
+                desc = safe_dict(part, "rubric") or ''
+                sub = safe_dict(part, "flyTitle") or ''
+                if sub and section != sub:
+                    desc = sub + ' :: ' + desc
+                pt = PersistentTemporaryFile('.html')
+                pt.write(json.dumps(part).encode('utf-8'))
+                pt.close()
+                url = 'file:///' + pt.name
+                feeds_dict[section].append({"title": title, "url": url, "description": desc})
+                self.log('\t', title, '\n\t\t', desc)
+            return [(section, articles) for section, articles in feeds_dict.items()]
+
+        def populate_article_metadata(self, article, soup, first):
+            article.url = soup.find('h1')['title']
+
+        def preprocess_html(self, soup):
+            for img in soup.findAll('img', src=True):
+                img['src'] = img['src'].replace('economist.com/', 
+                    'economist.com/cdn-cgi/image/width=600,quality=80,format=auto/')
+            return soup
+
+    else: # Load articles from individual article pages {{{
+
+        def __init__(self, *args, **kwargs):
+            BasicNewsRecipe.__init__(self, *args, **kwargs)
+            if self.output_profile.short_name.startswith('kindle'):
+                # Reduce image sizes to get file size below amazon's email
+                # sending threshold
+                self.web2disk_options.compress_news_images = True
+                self.web2disk_options.compress_news_images_auto_size = 5
+                self.log.warn('Kindle Output profile being used, reducing image quality to keep file size below amazon email threshold')
+
+        def parse_index(self):
+            # return self.economist_test_article()
+            if edition_date:
+                url = 'https://www.economist.com/weeklyedition/' + edition_date
+                self.timefmt = ' [' + edition_date + ']'
+            else:
+                url = 'https://www.economist.com/weeklyedition'
+            soup = self.index_to_soup(url)
+            ans = self.economist_parse_index(soup)
+            return self.economist_return_index(ans)
+
+        def economist_parse_index(self, soup):
+            script_tag = soup.find("script", id="__NEXT_DATA__")
+            if script_tag is not None:
+                data = json.loads(script_tag.string)
+                # open('/t/raw.json', 'w').write(json.dumps(data, indent=2, sort_keys=True))
+                self.description = safe_dict(data, "props", "pageProps", "content", "image", "main", "headline")
+                self.timefmt = ' [' + safe_dict(data, "props", "pageProps", "content", "datePublishedString") + ']'
+                self.cover_url = safe_dict(data, "props", "pageProps", "content", "image", "main", "url", "canonical")
+                self.log('Got cover:', self.cover_url)
+
+                feeds_dict = defaultdict(list)
+                for part in safe_dict(data, "props", "pageProps", "content", "hasPart", "parts"):
+                    section = safe_dict(part, "print", "section", "headline") or ''
+                    title = safe_dict(part, "headline") or ''
+                    url = safe_dict(part, "url", "canonical") or ''
+                    if not section or not title or not url:
+                        continue
+                    desc = safe_dict(part, "description") or ''
+                    sub = safe_dict(part, "subheadline") or ''
+                    if sub and section != sub:
+                        desc = sub + ' :: ' + desc
+                    feeds_dict[section].append({"title": title, "url": url, "description": desc})
+                    self.log(' ', title, url, '\n   ', desc)
+                return [(section, articles) for section, articles in feeds_dict.items()]
+            else:
+                return []
+
+        # }}}
+
+
     def preprocess_raw_html(self, raw, url):
         # open('/t/raw.html', 'wb').write(raw.encode('utf-8'))
-        root = parse(raw)
+        if use_archive:
+            body = '<html><body><article></article></body></html>'
+            root = parse(body)
+            load_article_from_json(raw, root)
+        else:
+            root = parse(raw)
+            script = root.xpath('//script[@id="__NEXT_DATA__"]')
+            if script:
+                try:
+                    load_article_from_json(script[0].text, root)
+                except JSONHasNoContent:
+                    cleanup_html_article(root)
+
         if '/interactive/' in url:
-            return '<html><body><article><h1 class="headline">' + root.xpath('//h1')[0].text + '</h1><em>' \
+            return '<html><body><article><h1>' + root.xpath('//h1')[0].text + '</h1><em>' \
                         + 'This article is supposed to be read in a browser' \
                             + '</em></article></body></html>'
-        script = root.xpath('//script[@id="__NEXT_DATA__"]')
-        if script:
-            try:
-                load_article_from_json(script[0].text, root)
-            except JSONHasNoContent:
-                cleanup_html_article(root)
+
         for div in root.xpath('//div[@class="lazy-image"]'):
             noscript = list(div.iter('noscript'))
             if noscript and noscript[0].text:
@@ -227,11 +384,15 @@ class Economist(BasicNewsRecipe):
         for x in root.xpath('//*[name()="script" or name()="style" or name()="source" or name()="meta"]'):
             x.getparent().remove(x)
         # the economist uses <small> for small caps with a custom font
+        for init in root.xpath('//span[@data-caps="initial"]'):
+            init.set('style', 'font-weight:bold;')
         for x in root.xpath('//small'):
             if x.text and len(x) == 0:
                 x.text = x.text.upper()
                 x.tag = 'span'
                 x.set('style', 'font-variant: small-caps')
+        for h2 in root.xpath('//h2'):
+            h2.tag = 'h4'
         for x in root.xpath('//figcaption'):
             x.set('style', 'text-align:center; font-size:small;')
         for x in root.xpath('//cite'):
@@ -239,17 +400,8 @@ class Economist(BasicNewsRecipe):
             x.set('style', 'color:#404040;')
         raw = etree.tostring(root, encoding='unicode')
         return raw
-
-    def publication_date(self):
-        if edition_date:
-            return parse_only_date(edition_date, as_utc=False)
-        url = self.browser.open("https://www.economist.com/printedition").geturl()
-        return parse_only_date(url.split("/")[-1], as_utc=False)
-
-    def parse_index(self):
-        # return [('Articles', [{'title':'test',
-        #     'url':'https://www.economist.com/interactive/briefing/2022/06/11/huge-foundation-models-are-turbo-charging-ai-progress'
-        # }])]
+    def parse_index_from_printedition(self):
+        # return self.economist_test_article()
         if edition_date:
             url = 'https://www.economist.com/weeklyedition/' + edition_date
             self.timefmt = ' [' + edition_date + ']'
@@ -276,33 +428,6 @@ class Economist(BasicNewsRecipe):
             )
         return ans
 
-    def economist_parse_index(self, soup):
-        script_tag = soup.find("script", id="__NEXT_DATA__")
-        if script_tag is not None:
-            data = json.loads(script_tag.string)
-            # open('/t/raw.json', 'w').write(json.dumps(data, indent=2, sort_keys=True))
-            # self.title = 'The Economist | ' + safe_dict(data, "props", "pageProps", "content", "image", "main", "headline")
-            self.timefmt = ' [' + safe_dict(data, "props", "pageProps", "content", "datePublishedString") + ']'
-            self.cover_url = safe_dict(data, "props", "pageProps", "content", "image", "main", "url", "canonical")
-            self.log('Got cover:', self.cover_url)
-
-            feeds_dict = defaultdict(list)
-            for part in safe_dict(data, "props", "pageProps", "content", "hasPart", "parts"):
-                section = safe_dict(part, "print", "section", "headline") or ''
-                title = safe_dict(part, "headline") or ''
-                url = safe_dict(part, "url", "canonical") or ''
-                if not section or not title or not url:
-                    continue
-                desc = safe_dict(part, "description") or ''
-                sub = safe_dict(part, "subheadline") or ''
-                if sub and section != sub:
-                    desc = sub + ' :: ' + desc
-                feeds_dict[section].append({"title": title, "url": url, "description": desc})
-                self.log(' ', title, url, '\n   ', desc)
-            return [(section, articles) for section, articles in feeds_dict.items()]
-        else:
-            return []
-
     def eco_find_image_tables(self, soup):
         for x in soup.findAll('table', align=['right', 'center']):
             if len(x.findAll('font')) in (1, 2) and len(x.findAll('img')) == 1:
@@ -330,3 +455,12 @@ class Economist(BasicNewsRecipe):
         if url.endswith('/print'):
             url = url.rpartition('/')[0]
         return BasicNewsRecipe.canonicalize_internal_url(self, url, is_link=is_link)
+
+
+def get_login_cookies(username, password):
+    print(33333333333, username, password)
+
+
+if __name__ == '__main__':
+    import sys
+    get_login_cookies(sys.argv[-2], sys.argv[-1])
diff --git a/recipes/economist_espresso.recipe b/recipes/economist_espresso.recipe
index 512bd7dfcfb8..522fe4a715f1 100644
--- a/recipes/economist_espresso.recipe
+++ b/recipes/economist_espresso.recipe
@@ -56,6 +56,9 @@ class Espresso(BasicNewsRecipe):
             ),
         ]
 
+    def print_version(self, url):
+        return 'https://webcache.googleusercontent.com/search?q=cache:' + url
+
     def preprocess_html(self, soup):
         if h1 := soup.find('h1'):
             if p := h1.find_next_sibling('p'):
diff --git a/recipes/economist_free.recipe b/recipes/economist_free.recipe
index e6dcd3acb851..960df8f62422 100644
--- a/recipes/economist_free.recipe
+++ b/recipes/economist_free.recipe
@@ -2,17 +2,22 @@
 # License: GPLv3 Copyright: 2008, Kovid Goyal <kovid at kovidgoyal.net>
 
 import json
+import time
 from collections import defaultdict
+from datetime import datetime, timedelta
+from urllib.parse import quote, urlencode
 
 from calibre import replace_entities
 from calibre.ebooks.BeautifulSoup import NavigableString, Tag
+from calibre.ptempfile import PersistentTemporaryFile
 from calibre.utils.date import parse_only_date
 from calibre.web.feeds.news import BasicNewsRecipe
 from html5_parser import parse
 from lxml import etree
 
-# For past editions, set date to, for example, '2020-11-28'
+# For past editions, set date to, for example, '2020-11-28'.
 edition_date = None
+use_archive = True
 
 
 def E(parent, name, text='', **attrs):
@@ -52,31 +57,63 @@ class JSONHasNoContent(ValueError):
     pass
 
 
-def load_article_from_json(raw, root):
-    # open('/t/raw.json', 'w').write(raw)
-    try:
-        data = json.loads(raw)['props']['pageProps']['content']
-    except KeyError as e:
-        raise JSONHasNoContent(e)
-    if isinstance(data, list):
-        data = data[0]
-    body = root.xpath('//body')[0]
-    for child in tuple(body):
-        body.remove(child)
-    article = E(body, 'article')
-    E(article, 'h4', data['subheadline'], style='color: red; margin: 0')
-    E(article, 'h1', data['headline'], style='font-size: x-large')
-    E(article, 'div', data['description'], style='font-style: italic')
-    E(article, 'div', (data['datePublishedString'] or '') + ' | ' + (data['dateline'] or ''), style='color: gray; margin: 1em')
-    main_image_url = safe_dict(data, 'image', 'main', 'url').get('canonical')
-    if main_image_url:
-        div = E(article, 'div')
+if use_archive:
+    def load_article_from_json(raw, root):
+        # open('/t/raw.json', 'w').write(raw)
+        data = json.loads(raw)
+        body = root.xpath('//body')[0]
+        article = E(body, 'article')
+        E(article, 'div', data['flyTitle'] , style='color: red; font-size:small; font-weight:bold;')
+        E(article, 'h1', data['title'], title=safe_dict(data, "url", "canonical") or '')
+        E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;')
         try:
-            E(div, 'img', src=main_image_url)
+            date = data['dateModified']
         except Exception:
-            pass
-    for node in data.get('text') or ():
-        process_node(node, article)
+            date = data['datePublished']
+        dt = datetime.fromisoformat(date[:-1]) + timedelta(seconds=time.timezone)
+        dt = dt.strftime('%b %d, %Y, %I:%M %p')
+        if data['dateline'] is None:
+            E(article, 'p', dt, style='color: gray; font-size:small;')
+        else:
+            E(article, 'p', dt + ' | ' + (data['dateline']), style='color: gray; font-size:small;')
+        main_image_url = safe_dict(data, 'image', 'main', 'url').get('canonical')
+        if main_image_url:
+            div = E(article, 'div')
+            try:
+                E(div, 'img', src=main_image_url)
+            except Exception:
+                pass
+        for node in data.get('text') or ():
+            process_node(node, article)
+else:
+    def load_article_from_json(raw, root):
+        # open('/t/raw.json', 'w').write(raw)
+        try:
+            data = json.loads(raw)['props']['pageProps']['content']
+        except KeyError as e:
+            raise JSONHasNoContent(e)
+        if isinstance(data, list):
+            data = data[0]
+        body = root.xpath('//body')[0]
+        for child in tuple(body):
+            body.remove(child)
+        article = E(body, 'article')
+        E(article, 'div', replace_entities(data['subheadline']) , style='color: red; font-size:small; font-weight:bold;')
+        E(article, 'h1', replace_entities(data['headline']))
+        E(article, 'div', replace_entities(data['description']), style='font-style: italic; color:#202020;')
+        if data['dateline'] is None:
+            E(article, 'p', (data['datePublishedString'] or ''), style='color: gray; font-size:small;')
+        else:
+            E(article, 'p', (data['datePublishedString'] or '') + ' | ' + (data['dateline']), style='color: gray; font-size:small;')
+        main_image_url = safe_dict(data, 'image', 'main', 'url').get('canonical')
+        if main_image_url:
+            div = E(article, 'div')
+            try:
+                E(div, 'img', src=main_image_url)
+            except Exception:
+                pass
+        for node in data.get('text') or ():
+            process_node(node, article)
 
 
 def cleanup_html_article(root):
@@ -129,31 +166,9 @@ class Economist(BasicNewsRecipe):
         ' perspective. Best downloaded on Friday mornings (GMT)'
     )
     extra_css = '''
-        .headline {font-size: x-large;}
-        h2 { font-size: small;  }
-        h1 { font-size: medium;  }
-        em.Bold {font-weight:bold;font-style:normal;}
-        em.Italic {font-style:italic;}
-        p.xhead {font-weight:bold;}
-        .pullquote {
-            float: right;
-            font-size: larger;
-            font-weight: bold;
-            font-style: italic;
-            page-break-inside:avoid;
-            border-bottom: 3px solid black;
-            border-top: 3px solid black;
-            width: 228px;
-            margin: 0px 0px 10px 15px;
-            padding: 7px 0px 9px;
-        }
-        .flytitle-and-title__flytitle {
-            display: block;
-            font-size: smaller;
-            color: red;
-        }
+        em { color:#202020; }
         img {display:block; margin:0 auto;}
-        '''
+    '''
     oldest_article = 7.0
     resolve_internal_links = True
     remove_tags = [
@@ -186,15 +201,6 @@ class Economist(BasicNewsRecipe):
 
     needs_subscription = False
 
-    def __init__(self, *args, **kwargs):
-        BasicNewsRecipe.__init__(self, *args, **kwargs)
-        if self.output_profile.short_name.startswith('kindle'):
-            # Reduce image sizes to get file size below amazon's email
-            # sending threshold
-            self.web2disk_options.compress_news_images = True
-            self.web2disk_options.compress_news_images_auto_size = 5
-            self.log.warn('Kindle Output profile being used, reducing image quality to keep file size below amazon email threshold')
-
     def get_browser(self, *args, **kwargs):
         # Needed to bypass cloudflare
         kwargs['user_agent'] = 'common_words/based'
@@ -202,19 +208,170 @@ class Economist(BasicNewsRecipe):
         br.addheaders += [('Accept-Language', 'en-GB,en-US;q=0.9,en;q=0.8')]
         return br
 
+    def publication_date(self):
+        if edition_date:
+            return parse_only_date(edition_date, as_utc=False)
+        url = self.browser.open("https://www.economist.com/printedition").geturl()
+        return parse_only_date(url.split("/")[-1], as_utc=False)
+
+    def economist_test_article(self):
+        return [('Articles', [{'title':'test',
+            'url':'https://www.economist.com/the-americas/2024/04/14/elon-musk-is-feuding-with-brazils-powerful-supreme-court'
+        }])]
+
+    def economist_return_index(self, ans):
+        if not ans:
+            raise NoArticles(
+                'Could not find any articles, either the '
+                'economist.com server is having trouble and you should '
+                'try later or the website format has changed and the '
+                'recipe needs to be updated.'
+            )
+        return ans
+
+    if use_archive:
+        def parse_index(self):
+            # return self.economist_test_article()
+            url = 'https://www.economist.com/weeklyedition/archive'
+            if edition_date:
+                url = 'https://www.economist.com/weeklyedition/' + edition_date
+            soup = self.index_to_soup(url)
+            script_tag = soup.find("script", id="__NEXT_DATA__")
+            if script_tag is None:
+                raise ValueError('No script tag with JSON data found in the weeklyedition archive')
+            data = json.loads(script_tag.string)
+            content_id = data['props']['pageProps']['content']['id'].split('/')[-1]
+            query = {
+                'query': 'query LatestWeeklyAutoEditionQuery($ref:String!){canonical(ref:$ref){hasPart(from:0 size:1 sort:"datePublished:desc"){parts{...WeeklyEditionFragment __typename}__typename}__typename}}fragment WeeklyEditionFragment on Content{id type datePublished image{...ImageCoverFragment __typename}url{canonical __typename}hasPart(size:100 sort:"publication.context.position"){parts{...ArticleFragment __typename}__typename}__typename}fragment ArticleFragment on Content{ad{grapeshot{channels{name __typename}__typename}__typename}articleSection{internal{id title:headline __typename}__typename}audio{main{id duration(format:"seconds")source:channel{id __typename}url{canonical __typename}__typename}__typename}byline dateline dateModified datePublished dateRevised flyTitle:subheadline id image{...ImageInlineFragment ...ImageMainFragment ...ImagePromoFragment __typename}print{title:headline flyTitle:subheadline rubric:description section{id title:headline __typename}__typename}publication{id tegID title:headline flyTitle:subheadline datePublished regionsAllowed url{canonical __typename}__typename}rubric:description source:channel{id __typename}tegID text(format:"json")title:headline type url{canonical __typename}topic contentIdentity{forceAppWebview mediaType articleType __typename}__typename}fragment ImageInlineFragment on Media{inline{url{canonical __typename}width height __typename}__typename}fragment ImageMainFragment on Media{main{url{canonical __typename}width height __typename}__typename}fragment ImagePromoFragment on Media{promo{url{canonical __typename}id width height __typename}__typename}fragment ImageCoverFragment on Media{cover{headline width height url{canonical __typename}regionsAllowed __typename}__typename}',  # noqa
+                'operationName': 'LatestWeeklyAutoEditionQuery',
+                'variables': '{{"ref":"/content/{}"}}'.format(content_id),
+            }
+            if edition_date:
+                query = {
+                    'query': 'query SpecificWeeklyEditionQuery($path:String!){section:canonical(ref:$path){...WeeklyEditionFragment __typename}}fragment WeeklyEditionFragment on Content{id type datePublished image{...ImageCoverFragment __typename}url{canonical __typename}hasPart(size:100 sort:"publication.context.position"){parts{...ArticleFragment __typename}__typename}__typename}fragment ArticleFragment on Content{ad{grapeshot{channels{name __typename}__typename}__typename}articleSection{internal{id title:headline __typename}__typename}audio{main{id duration(format:"seconds")source:channel{id __typename}url{canonical __typename}__typename}__typename}byline dateline dateModified datePublished dateRevised flyTitle:subheadline id image{...ImageInlineFragment ...ImageMainFragment ...ImagePromoFragment __typename}print{title:headline flyTitle:subheadline rubric:description section{id title:headline __typename}__typename}publication{id tegID title:headline flyTitle:subheadline datePublished regionsAllowed url{canonical __typename}__typename}rubric:description source:channel{id __typename}tegID text(format:"json")title:headline type url{canonical __typename}topic contentIdentity{forceAppWebview mediaType articleType __typename}__typename}fragment ImageInlineFragment on Media{inline{url{canonical __typename}width height __typename}__typename}fragment ImageMainFragment on Media{main{url{canonical __typename}width height __typename}__typename}fragment ImagePromoFragment on Media{promo{url{canonical __typename}id width height __typename}__typename}fragment ImageCoverFragment on Media{cover{headline width height url{canonical __typename}regionsAllowed __typename}__typename}',  # noqa
+                    'operationName': 'SpecificWeeklyEditionQuery',
+                    'variables': '{{"path":"/content/{}"}}'.format(content_id),
+                }
+            url = 'https://cp2-graphql-gateway.p.aws.economist.com/graphql?' + urlencode(query, safe='()!', quote_via=quote)
+            raw = self.index_to_soup(url, raw=True)
+            ans = self.economist_parse_index(raw)
+            return self.economist_return_index(ans)
+
+        def economist_parse_index(self, raw):
+            if edition_date:
+                data = json.loads(raw)['data']['section']
+            else:
+                data = json.loads(raw)['data']['canonical']['hasPart']['parts'][0]
+            self.description = data['image']['cover'][0]['headline']
+            dt = datetime.fromisoformat(data['datePublished'][:-1]) + timedelta(seconds=time.timezone)
+            dt = dt.strftime('%b %d, %Y')
+            self.timefmt = ' [' + dt + ']'
+            self.cover_url = data['image']['cover'][0]['url']['canonical'].replace('economist.com/', 
+                'economist.com/cdn-cgi/image/width=960,quality=80,format=auto/')
+            self.log('Got cover:', self.cover_url)
+
+            feeds_dict = defaultdict(list)
+            for part in safe_dict(data, "hasPart", "parts"):
+                try:
+                    section = part['articleSection']['internal'][0]['title']
+                except Exception:
+                    section = safe_dict(part, 'print', 'section', 'title') or 'section'
+                if section not in feeds_dict:
+                    self.log(section)
+                title = safe_dict(part, "title")
+                desc = safe_dict(part, "rubric") or ''
+                sub = safe_dict(part, "flyTitle") or ''
+                if sub and section != sub:
+                    desc = sub + ' :: ' + desc
+                pt = PersistentTemporaryFile('.html')
+                pt.write(json.dumps(part).encode('utf-8'))
+                pt.close()
+                url = 'file:///' + pt.name
+                feeds_dict[section].append({"title": title, "url": url, "description": desc})
+                self.log('\t', title, '\n\t\t', desc)
+            return [(section, articles) for section, articles in feeds_dict.items()]
+
+        def populate_article_metadata(self, article, soup, first):
+            article.url = soup.find('h1')['title']
+
+        def preprocess_html(self, soup):
+            for img in soup.findAll('img', src=True):
+                img['src'] = img['src'].replace('economist.com/', 
+                    'economist.com/cdn-cgi/image/width=600,quality=80,format=auto/')
+            return soup
+
+    else: # Load articles from individual article pages {{{
+
+        def __init__(self, *args, **kwargs):
+            BasicNewsRecipe.__init__(self, *args, **kwargs)
+            if self.output_profile.short_name.startswith('kindle'):
+                # Reduce image sizes to get file size below amazon's email
+                # sending threshold
+                self.web2disk_options.compress_news_images = True
+                self.web2disk_options.compress_news_images_auto_size = 5
+                self.log.warn('Kindle Output profile being used, reducing image quality to keep file size below amazon email threshold')
+
+        def parse_index(self):
+            # return self.economist_test_article()
+            if edition_date:
+                url = 'https://www.economist.com/weeklyedition/' + edition_date
+                self.timefmt = ' [' + edition_date + ']'
+            else:
+                url = 'https://www.economist.com/weeklyedition'
+            soup = self.index_to_soup(url)
+            ans = self.economist_parse_index(soup)
+            return self.economist_return_index(ans)
+
+        def economist_parse_index(self, soup):
+            script_tag = soup.find("script", id="__NEXT_DATA__")
+            if script_tag is not None:
+                data = json.loads(script_tag.string)
+                # open('/t/raw.json', 'w').write(json.dumps(data, indent=2, sort_keys=True))
+                self.description = safe_dict(data, "props", "pageProps", "content", "image", "main", "headline")
+                self.timefmt = ' [' + safe_dict(data, "props", "pageProps", "content", "datePublishedString") + ']'
+                self.cover_url = safe_dict(data, "props", "pageProps", "content", "image", "main", "url", "canonical")
+                self.log('Got cover:', self.cover_url)
+
+                feeds_dict = defaultdict(list)
+                for part in safe_dict(data, "props", "pageProps", "content", "hasPart", "parts"):
+                    section = safe_dict(part, "print", "section", "headline") or ''
+                    title = safe_dict(part, "headline") or ''
+                    url = safe_dict(part, "url", "canonical") or ''
+                    if not section or not title or not url:
+                        continue
+                    desc = safe_dict(part, "description") or ''
+                    sub = safe_dict(part, "subheadline") or ''
+                    if sub and section != sub:
+                        desc = sub + ' :: ' + desc
+                    feeds_dict[section].append({"title": title, "url": url, "description": desc})
+                    self.log(' ', title, url, '\n   ', desc)
+                return [(section, articles) for section, articles in feeds_dict.items()]
+            else:
+                return []
+
+        # }}}
+
+
     def preprocess_raw_html(self, raw, url):
         # open('/t/raw.html', 'wb').write(raw.encode('utf-8'))
-        root = parse(raw)
+        if use_archive:
+            body = '<html><body><article></article></body></html>'
+            root = parse(body)
+            load_article_from_json(raw, root)
+        else:
+            root = parse(raw)
+            script = root.xpath('//script[@id="__NEXT_DATA__"]')
+            if script:
+                try:
+                    load_article_from_json(script[0].text, root)
+                except JSONHasNoContent:
+                    cleanup_html_article(root)
+
         if '/interactive/' in url:
-            return '<html><body><article><h1 class="headline">' + root.xpath('//h1')[0].text + '</h1><em>' \
+            return '<html><body><article><h1>' + root.xpath('//h1')[0].text + '</h1><em>' \
                         + 'This article is supposed to be read in a browser' \
                             + '</em></article></body></html>'
-        script = root.xpath('//script[@id="__NEXT_DATA__"]')
-        if script:
-            try:
-                load_article_from_json(script[0].text, root)
-            except JSONHasNoContent:
-                cleanup_html_article(root)
+
         for div in root.xpath('//div[@class="lazy-image"]'):
             noscript = list(div.iter('noscript'))
             if noscript and noscript[0].text:
@@ -227,11 +384,15 @@ class Economist(BasicNewsRecipe):
         for x in root.xpath('//*[name()="script" or name()="style" or name()="source" or name()="meta"]'):
             x.getparent().remove(x)
         # the economist uses <small> for small caps with a custom font
+        for init in root.xpath('//span[@data-caps="initial"]'):
+            init.set('style', 'font-weight:bold;')
         for x in root.xpath('//small'):
             if x.text and len(x) == 0:
                 x.text = x.text.upper()
                 x.tag = 'span'
                 x.set('style', 'font-variant: small-caps')
+        for h2 in root.xpath('//h2'):
+            h2.tag = 'h4'
         for x in root.xpath('//figcaption'):
             x.set('style', 'text-align:center; font-size:small;')
         for x in root.xpath('//cite'):
@@ -239,17 +400,8 @@ class Economist(BasicNewsRecipe):
             x.set('style', 'color:#404040;')
         raw = etree.tostring(root, encoding='unicode')
         return raw
-
-    def publication_date(self):
-        if edition_date:
-            return parse_only_date(edition_date, as_utc=False)
-        url = self.browser.open("https://www.economist.com/printedition").geturl()
-        return parse_only_date(url.split("/")[-1], as_utc=False)
-
-    def parse_index(self):
-        # return [('Articles', [{'title':'test',
-        #     'url':'https://www.economist.com/interactive/briefing/2022/06/11/huge-foundation-models-are-turbo-charging-ai-progress'
-        # }])]
+    def parse_index_from_printedition(self):
+        # return self.economist_test_article()
         if edition_date:
             url = 'https://www.economist.com/weeklyedition/' + edition_date
             self.timefmt = ' [' + edition_date + ']'
@@ -276,33 +428,6 @@ class Economist(BasicNewsRecipe):
             )
         return ans
 
-    def economist_parse_index(self, soup):
-        script_tag = soup.find("script", id="__NEXT_DATA__")
-        if script_tag is not None:
-            data = json.loads(script_tag.string)
-            # open('/t/raw.json', 'w').write(json.dumps(data, indent=2, sort_keys=True))
-            # self.title = 'The Economist | ' + safe_dict(data, "props", "pageProps", "content", "image", "main", "headline")
-            self.timefmt = ' [' + safe_dict(data, "props", "pageProps", "content", "datePublishedString") + ']'
-            self.cover_url = safe_dict(data, "props", "pageProps", "content", "image", "main", "url", "canonical")
-            self.log('Got cover:', self.cover_url)
-
-            feeds_dict = defaultdict(list)
-            for part in safe_dict(data, "props", "pageProps", "content", "hasPart", "parts"):
-                section = safe_dict(part, "print", "section", "headline") or ''
-                title = safe_dict(part, "headline") or ''
-                url = safe_dict(part, "url", "canonical") or ''
-                if not section or not title or not url:
-                    continue
-                desc = safe_dict(part, "description") or ''
-                sub = safe_dict(part, "subheadline") or ''
-                if sub and section != sub:
-                    desc = sub + ' :: ' + desc
-                feeds_dict[section].append({"title": title, "url": url, "description": desc})
-                self.log(' ', title, url, '\n   ', desc)
-            return [(section, articles) for section, articles in feeds_dict.items()]
-        else:
-            return []
-
     def eco_find_image_tables(self, soup):
         for x in soup.findAll('table', align=['right', 'center']):
             if len(x.findAll('font')) in (1, 2) and len(x.findAll('img')) == 1:
@@ -330,3 +455,12 @@ class Economist(BasicNewsRecipe):
         if url.endswith('/print'):
             url = url.rpartition('/')[0]
         return BasicNewsRecipe.canonicalize_internal_url(self, url, is_link=is_link)
+
+
+def get_login_cookies(username, password):
+    print(33333333333, username, password)
+
+
+if __name__ == '__main__':
+    import sys
+    get_login_cookies(sys.argv[-2], sys.argv[-1])
diff --git a/recipes/el_correo.recipe b/recipes/el_correo.recipe
index 4f03835d4684..f83ee410df07 100644
--- a/recipes/el_correo.recipe
+++ b/recipes/el_correo.recipe
@@ -19,6 +19,8 @@ class elcorreo(BasicNewsRecipe):
     encoding = 'utf-8'
     remove_empty_feeds = True
     resolve_internal_links = True
+    max_articles_per_feed = 25 # articles
+    compress_news_images = True 
 
     extra_css = '''
         .v-mdl-ath__inf, .v-mdl-ath__p--2, .v-mdl-ath__p {font-size:small; color:#404040;}
diff --git a/recipes/financial_times.recipe b/recipes/financial_times.recipe
index a4c6dfc507d7..62384d6aedcb 100644
--- a/recipes/financial_times.recipe
+++ b/recipes/financial_times.recipe
@@ -146,6 +146,13 @@ class ft(BasicNewsRecipe):
         return html
 
     def preprocess_html(self, soup):
+        p = soup.find(**classes('o-topper__standfirst'))
+        if p:
+            p.name = 'p'
+        for table in soup.findAll('table'):
+            if len(table.find('tbody').findAll('tr')) > 20:
+                table.find('tbody').decompose()
+                table.string = '** a table that was supposed to be here has been removed.'
         for con in soup.findAll(attrs={'class':'n-content-layout__slot'}):
             if con.find('figure'):
                 con['id'] = 'fig'
diff --git a/recipes/icons/dilema.png b/recipes/icons/dilema.png
new file mode 100644
index 000000000000..86df3de293df
Binary files /dev/null and b/recipes/icons/dilema.png differ
diff --git a/recipes/icons/internazionale.png b/recipes/icons/internazionale.png
new file mode 100644
index 000000000000..09447924625d
Binary files /dev/null and b/recipes/icons/internazionale.png differ
diff --git a/recipes/icons/parool.png b/recipes/icons/parool.png
new file mode 100644
index 000000000000..27af33f1ddd2
Binary files /dev/null and b/recipes/icons/parool.png differ
diff --git a/recipes/icons/revista22.png b/recipes/icons/revista22.png
new file mode 100644
index 000000000000..41b27a753353
Binary files /dev/null and b/recipes/icons/revista22.png differ
diff --git a/recipes/icons/volksrant.png b/recipes/icons/volksrant.png
index 57349203ab8e..ec3d3c8a0f1a 100644
Binary files a/recipes/icons/volksrant.png and b/recipes/icons/volksrant.png differ
diff --git a/recipes/internazionale.recipe b/recipes/internazionale.recipe
new file mode 100644
index 000000000000..d2c1a0bbfc07
--- /dev/null
+++ b/recipes/internazionale.recipe
@@ -0,0 +1,121 @@
+#!/usr/bin/env  python
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+
+class Volkskrant(BasicNewsRecipe):
+    title = 'Internazionale'
+    __author__ = 'Cristi Ghera'
+    max_articles_per_feed = 100
+    description = 'Internazionale - Notizie dall’Italia e dal mondo'
+    needs_subscription = False
+    language = 'it'
+    country = 'IT'
+    category = 'news, politics, Italy, world'
+    resolve_internal_links = True
+    remove_tags_before = { 'name': 'article' }
+    remove_tags_after = { 'name': 'article' }
+    remove_tags = [
+        dict(
+            attrs={
+                'class': [
+                    'item-banner',
+                    'hentryfeed__side',
+                    'magazine-article-share-tools',
+                    'magazine-article-share-popup',
+                    'article_next',
+                    'cta_nl_ext_container',
+                ]
+            }
+        ),
+        dict(name=['script', 'style']),
+    ]
+    remove_attributes = ["class", "id", "name", "style"]
+    encoding = 'utf-8'
+    no_stylesheets = True
+    ignore_duplicate_articles = {'url'}
+
+    current_number_url = "https://www.internazionale.it/sommario"
+    home_url = "https://www.internazionale.it"
+    cover_url = None
+
+    def extract_article(self, article):
+        url = article.find('a')['href']
+        if url[0] == '/':
+            url = self.home_url + url
+        title_parts = []
+        tag = article.find('div', {'class': 'abstract-article__tag'})
+        if tag:
+            title_parts.append(self.tag_to_string(tag).upper())
+        title_parts.append(self.tag_to_string(article.find('div', {'class': 'abstract-article__title'})))
+        article_title = ' \u2022 '.join(title_parts)
+        pubdate=''
+        description_parts = []
+        author = article.find('div', {'class': 'abstract-article__author'})
+        if author:
+            description_parts.append(self.tag_to_string(author))
+        summary = article.find('div', {'class': 'abstract-article__content'})
+        if summary:
+            description_parts.append(self.tag_to_string(summary))
+        description = ' \u2022 '.join(description_parts)
+        return dict(
+            title=article_title,
+            url=url,
+            date=pubdate,
+            description=description,
+            content=''
+        )
+
+    def parse_index(self):
+        soup = self.index_to_soup(self.current_number_url)
+        self.cover_url = soup.find('span', { 'class': 'img_expand' })['data-src']
+        main_container = soup.find('div', { 'class': 'content_data' })
+        children = main_container.findAll('div', recursive=False)
+        sections = []
+        current_section = None
+        for container in children:
+            if 'abstract-testatina' in container['class'] or 'abstract-testatina-cultura' in container['class']:
+                if current_section:
+                    sections.append(current_section)
+                current_section = (self.tag_to_string(container), [])
+                continue
+
+            if 'masonry-items' in container['class']:
+                for article in container.findAll('div', {'class': 'abstract-article'}):
+                    current_section[1].append(self.extract_article(article))
+                continue
+
+            if 'abstract-article' in container['class']:
+                current_section[1].append(self.extract_article(container))
+                continue
+
+            # print(container['class'])
+        if current_section:
+            sections.append(current_section)
+        return sections
+
+    def preprocess_html(self, soup):
+        for node in soup.findAll('figure'):
+            img_src = None
+            image_attributes = [
+                'data-media1024',
+                'data-media1025',
+                'data-media641',
+                'data-media321',
+                'data-media',
+            ]
+            for attr in image_attributes:
+                if node.has_attr(attr):
+                    img_src = node[attr]
+                    break
+            node.name = 'div'
+            if img_src:
+                img = soup.new_tag('img', src=img_src)
+                node.insert(0, img)
+        for node in soup.findAll('figcaption'):
+            node.name = 'div'
+        # if self.browser.cookiejar:
+        #     self.browser.cookiejar.clear()
+        return soup
+
+    def get_cover_url(self):
+        return self.cover_url
diff --git a/recipes/mit_technology_review.recipe b/recipes/mit_technology_review.recipe
index 709962c0ba8d..08bbd0b9fc6a 100644
--- a/recipes/mit_technology_review.recipe
+++ b/recipes/mit_technology_review.recipe
@@ -76,7 +76,9 @@ class MitTechnologyReview(BasicNewsRecipe):
         soup = self.index_to_soup(self.INDEX)
         issue = soup.find(attrs={'class':lambda x: x and x.startswith('magazineHero__title')})
         time = soup.find(attrs={'class': lambda x: x and x.startswith('magazineHero__date')})
+        desc = soup.find(attrs={'class': lambda x: x and x.startswith('magazineHero__description')})
         self.title = 'MIT Tech Review ' + self.tag_to_string(issue)
+        self.description = self.tag_to_string(desc)
         self.timefmt = ' [' + self.tag_to_string(time) + ']'
         self.log('Downloading issue: ', self.timefmt)
 
diff --git a/recipes/parool.recipe b/recipes/parool.recipe
new file mode 100644
index 000000000000..4f19e35d3975
--- /dev/null
+++ b/recipes/parool.recipe
@@ -0,0 +1,99 @@
+#!/usr/bin/env  python
+import json
+import uuid
+from contextlib import closing
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+from mechanize import Request
+
+
+class Parool(BasicNewsRecipe):
+    title = 'Het Parool'
+    __author__ = 'Cristi Ghera'
+    max_articles_per_feed = 100
+    description = 'Het Parool - Vrij, Onverveerd'
+    needs_subscription = False
+    language = 'nl'
+    country = 'NL'
+    category = 'news, politics, Netherlands'
+    resolve_internal_links = True
+    remove_tags_before = dict(id='main-content')
+    remove_tags_after  = dict(id='main-content')
+    remove_tags = [
+        dict(attrs={'class':['article-footer__sharing', 'artstyle__editorial-tips', 'artstyle__advertisement',
+                             'artstyle__container__icon','artstyle__disabled-embed','container__title__icon',]}),
+        dict(attrs={'data-element-id': ['article-element-authors']}),
+        dict(name=['script', 'noscript', 'style']),
+    ]
+    remove_attributes = ["class", "id", "name", "style"]
+    encoding = 'utf-8'
+    no_stylesheets = True
+    ignore_duplicate_articles = {'url'}
+
+    def parse_index(self):
+        soup = self.index_to_soup('https://www.parool.nl/privacy-wall/accept?redirectUri=%2Feditie%2Fvandaag%2F&authId=' + str(uuid.uuid4()))
+        containers = soup.findAll('section', attrs={'class': 'section--horizontal'})
+        sections = []
+        for container in containers:
+            section_title = self.tag_to_string(container.find('h2')).strip()
+            articles = []
+
+            for art in container.findAll('article'):
+                a = art.find('a')
+                url = a['href']
+                if url[0] == '/':
+                    url = 'https://www.parool.nl' + url
+                if '/editie/' not in url:
+                    continue
+                header = a.find('header')
+                teaser_label = self.tag_to_string(header.find('h4').find('span', attrs={'class': 'teaser__label'})).strip()
+                teaser_sublabel = self.tag_to_string(header.find('h4').find('span', attrs={'class': 'teaser__sublabel'})).strip()
+                teaser_title = self.tag_to_string(header.find('h3').find('span', attrs={'class': 'teaser__title__value--short'})).strip()
+                ignore = { "dirkjan", "s1ngle", "pukkels", "hein de kort" }
+                if teaser_label.lower() in ignore:
+                    continue
+                parts = []
+                if teaser_label:
+                    parts.append(teaser_label.upper())
+                if teaser_sublabel:
+                    parts.append(teaser_sublabel)
+                if teaser_title:
+                    parts.append(teaser_title)
+                article_title = ' \u2022 '.join(parts)
+                articles.append(dict(title=article_title,
+                                    url=url,
+                                    content=''))
+
+            sections.append((section_title, articles))
+        return sections
+
+    def preprocess_html(self, soup):
+        for tag in soup():
+            if tag.name == 'img':
+                if tag['src'][0] == '/':
+                    tag['src'] = 'https://www.parool.nl' + tag['src']
+        for tag in soup():
+            if tag.name == "picture":
+                tag.replaceWith(tag.find("img"))
+        comic_articles = {
+            "Alle strips van Dirkjan",
+            "S1NGLE",
+            "Pukkels",
+            "Bekijk hier alle cartoons van Hein de Kort",
+        }
+        if self.tag_to_string(soup.find('h1')).strip() in comic_articles:
+            for node in soup.find('figure').find_next_siblings():
+                node.extract()
+        return soup
+
+    def get_cover_url(self):
+        headers = {
+            'X-Requested-With': 'XMLHttpRequest',
+            'Accept': 'application/json, text/javascript, */*; q=0.01',
+            'DNT': '1',
+        }
+        url = "https://login-api.e-pages.dk/v1/krant.parool.nl/folders"
+        with closing(self.browser.open(Request(url, None, headers))) as r:
+            folders = json.loads(r.read())
+            return folders["objects"][0]["teaser_medium"]
+        return None
diff --git a/recipes/revista22.recipe b/recipes/revista22.recipe
new file mode 100644
index 000000000000..7d2a55b2f1ab
--- /dev/null
+++ b/recipes/revista22.recipe
@@ -0,0 +1,75 @@
+#!/usr/bin/env  python
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+
+class Volkskrant(BasicNewsRecipe):
+    title = 'Revista 22'
+    __author__ = 'Cristi Ghera'
+    max_articles_per_feed = 100
+    description = 'Revista 22'
+    needs_subscription = False
+    language = 'ro'
+    country = 'RO'
+    category = 'news, politics, Romania'
+    resolve_internal_links = True
+    remove_tags_before = { 'class': 'col-span-8' }
+    remove_tags_after = { 'class': 'col-span-8' }
+    remove_tags = [
+        dict(
+            attrs={
+                'class': [
+                    'icons',
+                    'float-left',
+                    'samesection',
+                ]
+            }
+        ),
+        dict(
+            name=['div'],
+            attrs={
+                'class': ['mb-2']
+            }
+        ),
+        dict(id=['comments']),
+        dict(name=['script', 'noscript', 'style']),
+    ]
+    remove_attributes = ["class", "id", "name", "style"]
+    encoding = 'utf-8'
+    no_stylesheets = True
+    ignore_duplicate_articles = {'url'}
+
+    def parse_index(self):
+        soup = self.index_to_soup('https://revista22.ro')
+        url = soup.find('div', attrs={'class': 'uppercase'}).find('a').attrs['href']
+        if url[0] == '/':
+            url = 'https://revista22.ro' + url
+        soup = self.index_to_soup(url)
+        main_container = soup.find('div', attrs={'class': 'col-span-8'})
+        containers = main_container.findAll(attrs={'class': 'mb-4'})
+        articles = []
+        for container in containers:
+            if 'pb-4' not in container.attrs['class']:
+                continue
+            a = container.find('a')
+            url = a['href']
+            if url[0] == '/':
+                url = 'https://revista22.ro' + url
+            article_title = self.tag_to_string(a.find('h3')).strip()
+            author = self.tag_to_string(
+                container.find('span', attrs={'class': 'text-red'})
+            ).strip()
+            summary = self.tag_to_string(container.find('p')).strip()
+            pubdate = self.tag_to_string(a.find('span'))
+            description = author + ' - ' + summary
+            articles.append(
+                dict(
+                    title=article_title,
+                    url=url,
+                    date=pubdate,
+                    description=description,
+                    content=''
+                )
+            )
+
+        sections = [('Numărul curent', articles)]
+        return sections
diff --git a/recipes/the_week_magazine_free.recipe b/recipes/the_week_magazine_free.recipe
index c2d3717c3058..3f1f0b2dc2d1 100644
--- a/recipes/the_week_magazine_free.recipe
+++ b/recipes/the_week_magazine_free.recipe
@@ -65,7 +65,7 @@ class TheWeek(BasicNewsRecipe):
     ]
 
     remove_tags = [
-        dict(name='aside'),
+        dict(name=['aside', 'source']),
         classes(
             'blueconic-article__wrapper ad-unit van_vid_carousel tag-links'
         )
diff --git a/recipes/the_week_uk.recipe b/recipes/the_week_uk.recipe
index 0ccf8b9902a9..7652ce29bfdd 100644
--- a/recipes/the_week_uk.recipe
+++ b/recipes/the_week_uk.recipe
@@ -65,7 +65,7 @@ class TheWeek(BasicNewsRecipe):
     ]
 
     remove_tags = [
-        dict(name='aside'),
+        dict(name=['aside', 'source']),
         classes(
             'blueconic-article__wrapper ad-unit van_vid_carousel tag-links'
         )
diff --git a/recipes/volksrant.recipe b/recipes/volksrant.recipe
index 9a116aa7cee8..da3b850ec483 100644
--- a/recipes/volksrant.recipe
+++ b/recipes/volksrant.recipe
@@ -1,7 +1,10 @@
 #!/usr/bin/env  python
+import json
 import uuid
+from contextlib import closing
 
 from calibre.web.feeds.recipes import BasicNewsRecipe
+from mechanize import Request
 
 
 class Volkskrant(BasicNewsRecipe):
@@ -95,4 +98,25 @@ class Volkskrant(BasicNewsRecipe):
             if tag.name == 'img':
                 if tag['src'][0] == '/':
                     tag['src'] = 'https://www.volkskrant.nl' + tag['src']
+
+        for tag in soup():
+            if tag.name == "picture":
+                tag.replaceWith(tag.find("img"))
+
+        comic_articles = { "Bas van der Schot", "Poldermodellen", "Gummbah", "Sigmund" }
+        if self.tag_to_string(soup.find('h1')).strip() in comic_articles:
+            for node in soup.find('figure').find_next_siblings():
+                node.extract()
         return soup
+
+    def get_cover_url(self):
+        headers = {
+            'X-Requested-With': 'XMLHttpRequest',
+            'Accept': 'application/json, text/javascript, */*; q=0.01',
+            'DNT': '1',
+        }
+        url = "https://login-api.e-pages.dk/v1/krant.volkskrant.nl/folders"
+        with closing(self.browser.open(Request(url, None, headers))) as r:
+            folders = json.loads(r.read())
+            return folders["objects"][0]["teaser_medium"]
+        return None
diff --git a/setup/polib.py b/setup/polib.py
new file mode 100644
index 000000000000..f45caa2e1e60
--- /dev/null
+++ b/setup/polib.py
@@ -0,0 +1,1821 @@
+# -* coding: utf-8 -*-
+#
+# License: MIT (see LICENSE file provided)
+# vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:
+
+"""
+**polib** allows you to manipulate, create, modify gettext files (pot, po and
+mo files).  You can load existing files, iterate through it's entries, add,
+modify entries, comments or metadata, etc. or create new po files from scratch.
+
+**polib** provides a simple and pythonic API via the :func:`~polib.pofile` and
+:func:`~polib.mofile` convenience functions.
+"""
+
+import array
+import codecs
+import os
+import re
+import struct
+import sys
+import textwrap
+import io
+
+
+__author__ = 'David Jean Louis <izimobil@gmail.com>'
+__version__ = '1.2.0'
+__all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
+           'default_encoding', 'escape', 'unescape', 'detect_encoding', ]
+
+
+# the default encoding to use when encoding cannot be detected
+default_encoding = 'utf-8'
+
+# python 2/3 compatibility helpers {{{
+
+
+if sys.version_info < (3,):
+    PY3 = False
+    text_type = unicode
+
+    def b(s):
+        return s
+
+    def u(s):
+        return unicode(s, "unicode_escape")
+
+else:
+    PY3 = True
+    text_type = str
+
+    def b(s):
+        return s.encode("latin-1")
+
+    def u(s):
+        return s
+# }}}
+# _pofile_or_mofile {{{
+
+
+def _pofile_or_mofile(f, type, **kwargs):
+    """
+    Internal function used by :func:`polib.pofile` and :func:`polib.mofile` to
+    honor the DRY concept.
+    """
+    # get the file encoding
+    enc = kwargs.get('encoding')
+    if enc is None:
+        enc = detect_encoding(f, type == 'mofile')
+
+    # parse the file
+    kls = type == 'pofile' and _POFileParser or _MOFileParser
+    parser = kls(
+        f,
+        encoding=enc,
+        check_for_duplicates=kwargs.get('check_for_duplicates', False),
+        klass=kwargs.get('klass')
+    )
+    instance = parser.parse()
+    instance.wrapwidth = kwargs.get('wrapwidth', 78)
+    return instance
+# }}}
+# _is_file {{{
+
+
+def _is_file(filename_or_contents):
+    """
+    Safely returns the value of os.path.exists(filename_or_contents).
+
+    Arguments:
+
+    ``filename_or_contents``
+        either a filename, or a string holding the contents of some file.
+        In the latter case, this function will always return False.
+    """
+    try:
+        return os.path.isfile(filename_or_contents)
+    except (TypeError, ValueError, UnicodeEncodeError):
+        return False
+# }}}
+# function pofile() {{{
+
+
+def pofile(pofile, **kwargs):
+    """
+    Convenience function that parses the po or pot file ``pofile`` and returns
+    a :class:`~polib.POFile` instance.
+
+    Arguments:
+
+    ``pofile``
+        string, full or relative path to the po/pot file or its content (data).
+
+    ``wrapwidth``
+        integer, the wrap width, only useful when the ``-w`` option was passed
+        to xgettext (optional, default: ``78``).
+
+    ``encoding``
+        string, the encoding to use (e.g. "utf-8") (default: ``None``, the
+        encoding will be auto-detected).
+
+    ``check_for_duplicates``
+        whether to check for duplicate entries when adding entries to the
+        file (optional, default: ``False``).
+
+    ``klass``
+        class which is used to instantiate the return value (optional,
+        default: ``None``, the return value with be a :class:`~polib.POFile`
+        instance).
+    """
+    return _pofile_or_mofile(pofile, 'pofile', **kwargs)
+# }}}
+# function mofile() {{{
+
+
+def mofile(mofile, **kwargs):
+    """
+    Convenience function that parses the mo file ``mofile`` and returns a
+    :class:`~polib.MOFile` instance.
+
+    Arguments:
+
+    ``mofile``
+        string, full or relative path to the mo file or its content (string
+        or bytes).
+
+    ``wrapwidth``
+        integer, the wrap width, only useful when the ``-w`` option was passed
+        to xgettext to generate the po file that was used to format the mo file
+        (optional, default: ``78``).
+
+    ``encoding``
+        string, the encoding to use (e.g. "utf-8") (default: ``None``, the
+        encoding will be auto-detected).
+
+    ``check_for_duplicates``
+        whether to check for duplicate entries when adding entries to the
+        file (optional, default: ``False``).
+
+    ``klass``
+        class which is used to instantiate the return value (optional,
+        default: ``None``, the return value with be a :class:`~polib.POFile`
+        instance).
+    """
+    return _pofile_or_mofile(mofile, 'mofile', **kwargs)
+# }}}
+# function detect_encoding() {{{
+
+
+def detect_encoding(file, binary_mode=False):
+    """
+    Try to detect the encoding used by the ``file``. The ``file`` argument can
+    be a PO or MO file path or a string containing the contents of the file.
+    If the encoding cannot be detected, the function will return the value of
+    ``default_encoding``.
+
+    Arguments:
+
+    ``file``
+        string, full or relative path to the po/mo file or its content.
+
+    ``binary_mode``
+        boolean, set this to True if ``file`` is a mo file.
+    """
+    PATTERN = r'"?Content-Type:.+? charset=([\w_\-:\.]+)'
+    rxt = re.compile(u(PATTERN))
+    rxb = re.compile(b(PATTERN))
+
+    def charset_exists(charset):
+        """Check whether ``charset`` is valid or not."""
+        try:
+            codecs.lookup(charset)
+        except LookupError:
+            return False
+        return True
+
+    if not _is_file(file):
+        try:
+            match = rxt.search(file)
+        except TypeError:
+            match = rxb.search(file)
+        if match:
+            enc = match.group(1).strip()
+            if not isinstance(enc, text_type):
+                enc = enc.decode('utf-8')
+            if charset_exists(enc):
+                return enc
+    else:
+        # For PY3, always treat as binary
+        if binary_mode or PY3:
+            mode = 'rb'
+            rx = rxb
+        else:
+            mode = 'r'
+            rx = rxt
+        with open(file, mode) as f:
+            for line in f.readlines():
+                match = rx.search(line)
+                if match:
+                    f.close()
+                    enc = match.group(1).strip()
+                    if not isinstance(enc, text_type):
+                        enc = enc.decode('utf-8')
+                    if charset_exists(enc):
+                        return enc
+    return default_encoding
+# }}}
+# function escape() {{{
+
+
+def escape(st):
+    """
+    Escapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r``, ``\\v``,
+    ``\\b``, ``\\f`` and ``"`` in the given string ``st`` and returns it.
+    """
+    return st.replace('\\', r'\\')\
+             .replace('\t', r'\t')\
+             .replace('\r', r'\r')\
+             .replace('\n', r'\n')\
+             .replace('\v', r'\v')\
+             .replace('\b', r'\b')\
+             .replace('\f', r'\f')\
+             .replace('\"', r'\"')
+# }}}
+# function unescape() {{{
+
+
+def unescape(st):
+    """
+    Unescapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r``, ``\\v``,
+    ``\\b``, ``\\f`` and ``"`` in the given string ``st`` and returns it.
+    """
+    def unescape_repl(m):
+        m = m.group(1)
+        if m == 'n':
+            return '\n'
+        if m == 't':
+            return '\t'
+        if m == 'r':
+            return '\r'
+        if m == 'v':
+            return '\v'
+        if m == 'b':
+            return '\b'
+        if m == 'f':
+            return '\f'
+        if m == '\\':
+            return '\\'
+        return m  # handles escaped double quote
+    return re.sub(r'\\(\\|n|t|r|v|b|f|")', unescape_repl, st)
+# }}}
+# function natural_sort() {{{
+
+
+def natural_sort(lst):
+    """
+    Sort naturally the given list.
+    Credits: http://stackoverflow.com/a/4836734
+    """
+    def convert(text):
+        return int(text) if text.isdigit() else text.lower()
+
+    def alphanum_key(key):
+        return [convert(c) for c in re.split('([0-9]+)', key)]
+
+    return sorted(lst, key=alphanum_key)
+
+# }}}
+# class _BaseFile {{{
+
+
+class _BaseFile(list):
+    """
+    Common base class for the :class:`~polib.POFile` and :class:`~polib.MOFile`
+    classes. This class should **not** be instantiated directly.
+    """
+
+    def __init__(self, *args, **kwargs):
+        """
+        Constructor, accepts the following keyword arguments:
+
+        ``pofile``
+            string, the path to the po or mo file, or its content as a string.
+
+        ``wrapwidth``
+            integer, the wrap width, only useful when the ``-w`` option was
+            passed to xgettext (optional, default: ``78``).
+
+        ``encoding``
+            string, the encoding to use, defaults to ``default_encoding``
+            global variable (optional).
+
+        ``check_for_duplicates``
+            whether to check for duplicate entries when adding entries to the
+            file, (optional, default: ``False``).
+        """
+        list.__init__(self)
+        # the opened file handle
+        pofile = kwargs.get('pofile', None)
+        if pofile and _is_file(pofile):
+            self.fpath = pofile
+        else:
+            self.fpath = kwargs.get('fpath')
+        # the width at which lines should be wrapped
+        self.wrapwidth = kwargs.get('wrapwidth', 78)
+        # the file encoding
+        self.encoding = kwargs.get('encoding', default_encoding)
+        # whether to check for duplicate entries or not
+        self.check_for_duplicates = kwargs.get('check_for_duplicates', False)
+        # header
+        self.header = ''
+        # both po and mo files have metadata
+        self.metadata = {}
+        self.metadata_is_fuzzy = 0
+
+    def __unicode__(self):
+        """
+        Returns the unicode representation of the file.
+        """
+        ret = []
+        entries = [self.metadata_as_entry()] + \
+                  [e for e in self if not e.obsolete]
+        for entry in entries:
+            ret.append(entry.__unicode__(self.wrapwidth))
+        for entry in self.obsolete_entries():
+            ret.append(entry.__unicode__(self.wrapwidth))
+        ret = u('\n').join(ret)
+        return ret
+
+    if PY3:
+        def __str__(self):
+            return self.__unicode__()
+    else:
+        def __str__(self):
+            """
+            Returns the string representation of the file.
+            """
+            return unicode(self).encode(self.encoding)
+
+    def __contains__(self, entry):
+        """
+        Overridden ``list`` method to implement the membership test (in and
+        not in).
+        The method considers that an entry is in the file if it finds an entry
+        that has the same msgid (the test is **case sensitive**) and the same
+        msgctxt (or none for both entries).
+
+        Argument:
+
+        ``entry``
+            an instance of :class:`~polib._BaseEntry`.
+        """
+        return self.find(entry.msgid, by='msgid', msgctxt=entry.msgctxt) \
+            is not None
+
+    def __eq__(self, other):
+        return str(self) == str(other)
+
+    def append(self, entry):
+        """
+        Overridden method to check for duplicates entries, if a user tries to
+        add an entry that is already in the file, the method will raise a
+        ``ValueError`` exception.
+
+        Argument:
+
+        ``entry``
+            an instance of :class:`~polib._BaseEntry`.
+        """
+        # check_for_duplicates may not be defined (yet) when unpickling.
+        # But if pickling, we never want to check for duplicates anyway.
+        if getattr(self, 'check_for_duplicates', False) and entry in self:
+            raise ValueError('Entry "%s" already exists' % entry.msgid)
+        super(_BaseFile, self).append(entry)
+
+    def insert(self, index, entry):
+        """
+        Overridden method to check for duplicates entries, if a user tries to
+        add an entry that is already in the file, the method will raise a
+        ``ValueError`` exception.
+
+        Arguments:
+
+        ``index``
+            index at which the entry should be inserted.
+
+        ``entry``
+            an instance of :class:`~polib._BaseEntry`.
+        """
+        if self.check_for_duplicates and entry in self:
+            raise ValueError('Entry "%s" already exists' % entry.msgid)
+        super(_BaseFile, self).insert(index, entry)
+
+    def metadata_as_entry(self):
+        """
+        Returns the file metadata as a :class:`~polib.POFile` instance.
+        """
+        e = POEntry(msgid='')
+        mdata = self.ordered_metadata()
+        if mdata:
+            strs = []
+            for name, value in mdata:
+                # Strip whitespace off each line in a multi-line entry
+                strs.append('%s: %s' % (name, value))
+            e.msgstr = '\n'.join(strs) + '\n'
+        if self.metadata_is_fuzzy:
+            e.flags.append('fuzzy')
+        return e
+
+    def save(self, fpath=None, repr_method='__unicode__', newline=None):
+        """
+        Saves the po file to ``fpath``.
+        If it is an existing file and no ``fpath`` is provided, then the
+        existing file is rewritten with the modified data.
+
+        Keyword arguments:
+
+        ``fpath``
+            string, full or relative path to the file.
+
+        ``repr_method``
+            string, the method to use for output.
+
+        ``newline``
+            string, controls how universal newlines works
+        """
+        if self.fpath is None and fpath is None:
+            raise IOError('You must provide a file path to save() method')
+        contents = getattr(self, repr_method)()
+        if fpath is None:
+            fpath = self.fpath
+        if repr_method == 'to_binary':
+            with open(fpath, 'wb') as fhandle:
+                fhandle.write(contents)
+        else:
+            with io.open(
+                fpath,
+                'w',
+                encoding=self.encoding,
+                newline=newline
+            ) as fhandle:
+                if not isinstance(contents, text_type):
+                    contents = contents.decode(self.encoding)
+                fhandle.write(contents)
+
+        # set the file path if not set
+        if self.fpath is None and fpath:
+            self.fpath = fpath
+
+    def find(self, st, by='msgid', include_obsolete_entries=False,
+             msgctxt=False):
+        """
+        Find the entry which msgid (or property identified by the ``by``
+        argument) matches the string ``st``.
+
+        Keyword arguments:
+
+        ``st``
+            string, the string to search for.
+
+        ``by``
+            string, the property to use for comparison (default: ``msgid``).
+
+        ``include_obsolete_entries``
+            boolean, whether to also search in entries that are obsolete.
+
+        ``msgctxt``
+            string, allows specifying a specific message context for the
+            search.
+        """
+        if include_obsolete_entries:
+            entries = self[:]
+        else:
+            entries = [e for e in self if not e.obsolete]
+        matches = []
+        for e in entries:
+            if getattr(e, by) == st:
+                if msgctxt is not False and e.msgctxt != msgctxt:
+                    continue
+                matches.append(e)
+        if len(matches) == 1:
+            return matches[0]
+        elif len(matches) > 1:
+            if not msgctxt:
+                # find the entry with no msgctx
+                e = None
+                for m in matches:
+                    if not m.msgctxt:
+                        e = m
+                if e:
+                    return e
+                # fallback to the first entry found
+                return matches[0]
+        return None
+
+    def ordered_metadata(self):
+        """
+        Convenience method that returns an ordered version of the metadata
+        dictionary. The return value is list of tuples (metadata name,
+        metadata_value).
+        """
+        # copy the dict first
+        metadata = self.metadata.copy()
+        data_order = [
+            'Project-Id-Version',
+            'Report-Msgid-Bugs-To',
+            'POT-Creation-Date',
+            'PO-Revision-Date',
+            'Last-Translator',
+            'Language-Team',
+            'Language',
+            'MIME-Version',
+            'Content-Type',
+            'Content-Transfer-Encoding',
+            'Plural-Forms'
+        ]
+        ordered_data = []
+        for data in data_order:
+            try:
+                value = metadata.pop(data)
+                ordered_data.append((data, value))
+            except KeyError:
+                pass
+        # the rest of the metadata will be alphabetically ordered since there
+        # are no specs for this AFAIK
+        for data in natural_sort(metadata.keys()):
+            value = metadata[data]
+            ordered_data.append((data, value))
+        return ordered_data
+
+    def to_binary(self):
+        """
+        Return the binary representation of the file.
+        """
+        offsets = []
+        entries = self.translated_entries()
+
+        # the keys are sorted in the .mo file
+        def cmp(_self, other):
+            # msgfmt compares entries with msgctxt if it exists
+            self_msgid = _self.msgctxt and _self.msgctxt or _self.msgid
+            other_msgid = other.msgctxt and other.msgctxt or other.msgid
+            if self_msgid > other_msgid:
+                return 1
+            elif self_msgid < other_msgid:
+                return -1
+            else:
+                return 0
+        # add metadata entry
+        entries.sort(key=lambda o: o.msgid_with_context.encode('utf-8'))
+        mentry = self.metadata_as_entry()
+        entries = [mentry] + entries
+        entries_len = len(entries)
+        ids, strs = b(''), b('')
+        for e in entries:
+            # For each string, we need size and file offset.  Each string is
+            # NUL terminated; the NUL does not count into the size.
+            msgid = b('')
+            if e.msgctxt:
+                # Contexts are stored by storing the concatenation of the
+                # context, a <EOT> byte, and the original string
+                msgid = self._encode(e.msgctxt + '\4')
+            if e.msgid_plural:
+                msgstr = []
+                for index in sorted(e.msgstr_plural.keys()):
+                    msgstr.append(e.msgstr_plural[index])
+                msgid += self._encode(e.msgid + '\0' + e.msgid_plural)
+                msgstr = self._encode('\0'.join(msgstr))
+            else:
+                msgid += self._encode(e.msgid)
+                msgstr = self._encode(e.msgstr)
+            offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
+            ids += msgid + b('\0')
+            strs += msgstr + b('\0')
+
+        # The header is 7 32-bit unsigned integers.
+        keystart = 7 * 4 + 16 * entries_len
+        # and the values start after the keys
+        valuestart = keystart + len(ids)
+        koffsets = []
+        voffsets = []
+        # The string table first has the list of keys, then the list of values.
+        # Each entry has first the size of the string, then the file offset.
+        for o1, l1, o2, l2 in offsets:
+            koffsets += [l1, o1 + keystart]
+            voffsets += [l2, o2 + valuestart]
+        offsets = koffsets + voffsets
+
+        output = struct.pack(
+            "Iiiiiii",
+            # Magic number
+            MOFile.MAGIC,
+            # Version
+            0,
+            # number of entries
+            entries_len,
+            # start of key index
+            7 * 4,
+            # start of value index
+            7 * 4 + entries_len * 8,
+            # size and offset of hash table, we don't use hash tables
+            0, keystart
+
+        )
+        if PY3 and sys.version_info.minor > 1:  # python 3.2 or superior
+            output += array.array("i", offsets).tobytes()
+        else:
+            output += array.array("i", offsets).tostring()
+        output += ids
+        output += strs
+        return output
+
+    def _encode(self, mixed):
+        """
+        Encodes the given ``mixed`` argument with the file encoding if and
+        only if it's an unicode string and returns the encoded string.
+        """
+        if isinstance(mixed, text_type):
+            mixed = mixed.encode(self.encoding)
+        return mixed
+# }}}
+# class POFile {{{
+
+
+class POFile(_BaseFile):
+    """
+    Po (or Pot) file reader/writer.
+    This class inherits the :class:`~polib._BaseFile` class and, by extension,
+    the python ``list`` type.
+    """
+
+    def __unicode__(self):
+        """
+        Returns the unicode representation of the po file.
+        """
+        ret, headers = '', self.header.split('\n')
+        for header in headers:
+            if not len(header):
+                ret += "#\n"
+            elif header[:1] in [',', ':']:
+                ret += '#%s\n' % header
+            else:
+                ret += '# %s\n' % header
+
+        if not isinstance(ret, text_type):
+            ret = ret.decode(self.encoding)
+
+        return ret + _BaseFile.__unicode__(self)
+
+    def save_as_mofile(self, fpath):
+        """
+        Saves the binary representation of the file to given ``fpath``.
+
+        Keyword argument:
+
+        ``fpath``
+            string, full or relative path to the mo file.
+        """
+        _BaseFile.save(self, fpath, 'to_binary')
+
+    def percent_translated(self):
+        """
+        Convenience method that returns the percentage of translated
+        messages.
+        """
+        total = len([e for e in self if not e.obsolete])
+        if total == 0:
+            return 100
+        translated = len(self.translated_entries())
+        return int(translated * 100 / float(total))
+
+    def translated_entries(self):
+        """
+        Convenience method that returns the list of translated entries.
+        """
+        return [e for e in self if e.translated()]
+
+    def untranslated_entries(self):
+        """
+        Convenience method that returns the list of untranslated entries.
+        """
+        return [e for e in self if not e.translated() and not e.obsolete
+                and not e.fuzzy]
+
+    def fuzzy_entries(self):
+        """
+        Convenience method that returns the list of fuzzy entries.
+        """
+        return [e for e in self if e.fuzzy and not e.obsolete]
+
+    def obsolete_entries(self):
+        """
+        Convenience method that returns the list of obsolete entries.
+        """
+        return [e for e in self if e.obsolete]
+
+    def merge(self, refpot):
+        """
+        Convenience method that merges the current pofile with the pot file
+        provided. It behaves exactly as the gettext msgmerge utility:
+
+        * comments of this file will be preserved, but extracted comments and
+          occurrences will be discarded;
+        * any translations or comments in the file will be discarded, however,
+          dot comments and file positions will be preserved;
+        * the fuzzy flags are preserved.
+
+        Keyword argument:
+
+        ``refpot``
+            object POFile, the reference catalog.
+        """
+        # Store entries in dict/set for faster access
+        self_entries = dict(
+            (entry.msgid_with_context, entry) for entry in self
+        )
+        refpot_msgids = set(entry.msgid_with_context for entry in refpot)
+        # Merge entries that are in the refpot
+        for entry in refpot:
+            e = self_entries.get(entry.msgid_with_context)
+            if e is None:
+                e = POEntry()
+                self.append(e)
+            e.merge(entry)
+        # ok, now we must "obsolete" entries that are not in the refpot anymore
+        for entry in self:
+            if entry.msgid_with_context not in refpot_msgids:
+                entry.obsolete = True
+# }}}
+# class MOFile {{{
+
+
+class MOFile(_BaseFile):
+    """
+    Mo file reader/writer.
+    This class inherits the :class:`~polib._BaseFile` class and, by
+    extension, the python ``list`` type.
+    """
+    MAGIC = 0x950412de
+    MAGIC_SWAPPED = 0xde120495
+
+    def __init__(self, *args, **kwargs):
+        """
+        Constructor, accepts all keywords arguments accepted by
+        :class:`~polib._BaseFile` class.
+        """
+        _BaseFile.__init__(self, *args, **kwargs)
+        self.magic_number = None
+        self.version = 0
+
+    def save_as_pofile(self, fpath):
+        """
+        Saves the mofile as a pofile to ``fpath``.
+
+        Keyword argument:
+
+        ``fpath``
+            string, full or relative path to the file.
+        """
+        _BaseFile.save(self, fpath)
+
+    def save(self, fpath=None):
+        """
+        Saves the mofile to ``fpath``.
+
+        Keyword argument:
+
+        ``fpath``
+            string, full or relative path to the file.
+        """
+        _BaseFile.save(self, fpath, 'to_binary')
+
+    def percent_translated(self):
+        """
+        Convenience method to keep the same interface with POFile instances.
+        """
+        return 100
+
+    def translated_entries(self):
+        """
+        Convenience method to keep the same interface with POFile instances.
+        """
+        return self
+
+    def untranslated_entries(self):
+        """
+        Convenience method to keep the same interface with POFile instances.
+        """
+        return []
+
+    def fuzzy_entries(self):
+        """
+        Convenience method to keep the same interface with POFile instances.
+        """
+        return []
+
+    def obsolete_entries(self):
+        """
+        Convenience method to keep the same interface with POFile instances.
+        """
+        return []
+# }}}
+# class _BaseEntry {{{
+
+
+class _BaseEntry(object):
+    """
+    Base class for :class:`~polib.POEntry` and :class:`~polib.MOEntry` classes.
+    This class should **not** be instantiated directly.
+    """
+
+    def __init__(self, *args, **kwargs):
+        """
+        Constructor, accepts the following keyword arguments:
+
+        ``msgid``
+            string, the entry msgid.
+
+        ``msgstr``
+            string, the entry msgstr.
+
+        ``msgid_plural``
+            string, the entry msgid_plural.
+
+        ``msgstr_plural``
+            dict, the entry msgstr_plural lines.
+
+        ``msgctxt``
+            string, the entry context (msgctxt).
+
+        ``obsolete``
+            bool, whether the entry is "obsolete" or not.
+
+        ``encoding``
+            string, the encoding to use, defaults to ``default_encoding``
+            global variable (optional).
+        """
+        self.msgid = kwargs.get('msgid', '')
+        self.msgstr = kwargs.get('msgstr', '')
+        self.msgid_plural = kwargs.get('msgid_plural', '')
+        self.msgstr_plural = kwargs.get('msgstr_plural', {})
+        self.msgctxt = kwargs.get('msgctxt', None)
+        self.obsolete = kwargs.get('obsolete', False)
+        self.encoding = kwargs.get('encoding', default_encoding)
+
+    def __unicode__(self, wrapwidth=78):
+        """
+        Returns the unicode representation of the entry.
+        """
+        if self.obsolete:
+            delflag = '#~ '
+        else:
+            delflag = ''
+        ret = []
+        # write the msgctxt if any
+        if self.msgctxt is not None:
+            ret += self._str_field("msgctxt", delflag, "", self.msgctxt,
+                                   wrapwidth)
+        # write the msgid
+        ret += self._str_field("msgid", delflag, "", self.msgid, wrapwidth)
+        # write the msgid_plural if any
+        if self.msgid_plural:
+            ret += self._str_field("msgid_plural", delflag, "",
+                                   self.msgid_plural, wrapwidth)
+        if self.msgstr_plural:
+            # write the msgstr_plural if any
+            msgstrs = self.msgstr_plural
+            keys = list(msgstrs)
+            keys.sort()
+            for index in keys:
+                msgstr = msgstrs[index]
+                plural_index = '[%s]' % index
+                ret += self._str_field("msgstr", delflag, plural_index, msgstr,
+                                       wrapwidth)
+        else:
+            # otherwise write the msgstr
+            ret += self._str_field("msgstr", delflag, "", self.msgstr,
+                                   wrapwidth)
+        ret.append('')
+        ret = u('\n').join(ret)
+        return ret
+
+    if PY3:
+        def __str__(self):
+            return self.__unicode__()
+    else:
+        def __str__(self):
+            """
+            Returns the string representation of the entry.
+            """
+            return unicode(self).encode(self.encoding)
+
+    def __eq__(self, other):
+        return str(self) == str(other)
+
+    def _str_field(self, fieldname, delflag, plural_index, field,
+                   wrapwidth=78):
+        lines = field.splitlines(True)
+        if len(lines) > 1:
+            lines = [''] + lines  # start with initial empty line
+        else:
+            escaped_field = escape(field)
+            specialchars_count = 0
+            for c in ['\\', '\n', '\r', '\t', '\v', '\b', '\f', '"']:
+                specialchars_count += field.count(c)
+            # comparison must take into account fieldname length + one space
+            # + 2 quotes (eg. msgid "<string>")
+            flength = len(fieldname) + 3
+            if plural_index:
+                flength += len(plural_index)
+            real_wrapwidth = wrapwidth - flength + specialchars_count
+            if wrapwidth > 0 and len(field) > real_wrapwidth:
+                # Wrap the line but take field name into account
+                lines = [''] + [unescape(item) for item in textwrap.wrap(
+                    escaped_field,
+                    wrapwidth - 2,  # 2 for quotes ""
+                    drop_whitespace=False,
+                    break_long_words=False
+                )]
+            else:
+                lines = [field]
+        if fieldname.startswith('previous_'):
+            # quick and dirty trick to get the real field name
+            fieldname = fieldname[9:]
+
+        ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index,
+                                escape(lines.pop(0)))]
+        for line in lines:
+            ret.append('%s"%s"' % (delflag, escape(line)))
+        return ret
+
+    @property
+    def msgid_with_context(self):
+        if self.msgctxt:
+            return '%s%s%s' % (self.msgctxt, "\x04", self.msgid)
+        return self.msgid
+# }}}
+# class POEntry {{{
+
+
+class POEntry(_BaseEntry):
+    """
+    Represents a po file entry.
+    """
+
+    def __init__(self, *args, **kwargs):
+        """
+        Constructor, accepts the following keyword arguments:
+
+        ``comment``
+            string, the entry comment.
+
+        ``tcomment``
+            string, the entry translator comment.
+
+        ``occurrences``
+            list, the entry occurrences.
+
+        ``flags``
+            list, the entry flags.
+
+        ``previous_msgctxt``
+            string, the entry previous context.
+
+        ``previous_msgid``
+            string, the entry previous msgid.
+
+        ``previous_msgid_plural``
+            string, the entry previous msgid_plural.
+
+        ``linenum``
+            integer, the line number of the entry
+        """
+        _BaseEntry.__init__(self, *args, **kwargs)
+        self.comment = kwargs.get('comment', '')
+        self.tcomment = kwargs.get('tcomment', '')
+        self.occurrences = kwargs.get('occurrences', [])
+        self.flags = kwargs.get('flags', [])
+        self.previous_msgctxt = kwargs.get('previous_msgctxt', None)
+        self.previous_msgid = kwargs.get('previous_msgid', None)
+        self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None)
+        self.linenum = kwargs.get('linenum', None)
+
+    def __unicode__(self, wrapwidth=78):
+        """
+        Returns the unicode representation of the entry.
+        """
+        ret = []
+        # comments first, if any (with text wrapping as xgettext does)
+        if self.obsolete:
+            comments = [('tcomment', '# ')]
+        else:
+            comments = [('tcomment', '# '), ('comment', '#. ')]
+        for c in comments:
+            val = getattr(self, c[0])
+            if val:
+                for comment in val.split('\n'):
+                    if wrapwidth > 0 and len(comment) + len(c[1]) > wrapwidth:
+                        ret += textwrap.wrap(
+                            comment,
+                            wrapwidth,
+                            initial_indent=c[1],
+                            subsequent_indent=c[1],
+                            break_long_words=False
+                        )
+                    else:
+                        ret.append('%s%s' % (c[1], comment))
+
+        # occurrences (with text wrapping as xgettext does)
+        if not self.obsolete and self.occurrences:
+            filelist = []
+            for fpath, lineno in self.occurrences:
+                if lineno:
+                    filelist.append('%s:%s' % (fpath, lineno))
+                else:
+                    filelist.append(fpath)
+            filestr = ' '.join(filelist)
+            if wrapwidth > 0 and len(filestr) + 3 > wrapwidth:
+                # textwrap split words that contain hyphen, this is not
+                # what we want for filenames, so the dirty hack is to
+                # temporally replace hyphens with a char that a file cannot
+                # contain, like "*"
+                ret += [line.replace('*', '-') for line in textwrap.wrap(
+                    filestr.replace('-', '*'),
+                    wrapwidth,
+                    initial_indent='#: ',
+                    subsequent_indent='#: ',
+                    break_long_words=False
+                )]
+            else:
+                ret.append('#: ' + filestr)
+
+        # flags (TODO: wrapping ?)
+        if self.flags:
+            ret.append('#, %s' % ', '.join(self.flags))
+
+        # previous context and previous msgid/msgid_plural
+        fields = ['previous_msgctxt', 'previous_msgid',
+                  'previous_msgid_plural']
+        if self.obsolete:
+            prefix = "#~| "
+        else:
+            prefix = "#| "
+        for f in fields:
+            val = getattr(self, f)
+            if val is not None:
+                ret += self._str_field(f, prefix, "", val, wrapwidth)
+
+        ret.append(_BaseEntry.__unicode__(self, wrapwidth))
+        ret = u('\n').join(ret)
+        return ret
+
+    def __cmp__(self, other):
+        """
+        Called by comparison operations if rich comparison is not defined.
+        """
+        # First: Obsolete test
+        if self.obsolete != other.obsolete:
+            if self.obsolete:
+                return -1
+            else:
+                return 1
+        # Work on a copy to protect original
+        occ1 = sorted(self.occurrences[:])
+        occ2 = sorted(other.occurrences[:])
+        if occ1 > occ2:
+            return 1
+        if occ1 < occ2:
+            return -1
+        # Compare context
+        msgctxt = self.msgctxt or '0'
+        othermsgctxt = other.msgctxt or '0'
+        if msgctxt > othermsgctxt:
+            return 1
+        elif msgctxt < othermsgctxt:
+            return -1
+        # Compare msgid_plural
+        msgid_plural = self.msgid_plural or '0'
+        othermsgid_plural = other.msgid_plural or '0'
+        if msgid_plural > othermsgid_plural:
+            return 1
+        elif msgid_plural < othermsgid_plural:
+            return -1
+        # Compare msgstr_plural
+        if self.msgstr_plural and isinstance(self.msgstr_plural, dict):
+            msgstr_plural = list(self.msgstr_plural.values())
+        else:
+            msgstr_plural = []
+        if other.msgstr_plural and isinstance(other.msgstr_plural, dict):
+            othermsgstr_plural = list(other.msgstr_plural.values())
+        else:
+            othermsgstr_plural = []
+        if msgstr_plural > othermsgstr_plural:
+            return 1
+        elif msgstr_plural < othermsgstr_plural:
+            return -1
+        # Compare msgid
+        if self.msgid > other.msgid:
+            return 1
+        elif self.msgid < other.msgid:
+            return -1
+        # Compare msgstr
+        if self.msgstr > other.msgstr:
+            return 1
+        elif self.msgstr < other.msgstr:
+            return -1
+        return 0
+
+    def __gt__(self, other):
+        return self.__cmp__(other) > 0
+
+    def __lt__(self, other):
+        return self.__cmp__(other) < 0
+
+    def __ge__(self, other):
+        return self.__cmp__(other) >= 0
+
+    def __le__(self, other):
+        return self.__cmp__(other) <= 0
+
+    def __eq__(self, other):
+        return self.__cmp__(other) == 0
+
+    def __ne__(self, other):
+        return self.__cmp__(other) != 0
+
+    def translated(self):
+        """
+        Returns ``True`` if the entry has been translated or ``False``
+        otherwise.
+        """
+        if self.obsolete or self.fuzzy:
+            return False
+        if self.msgstr != '':
+            return True
+        if self.msgstr_plural:
+            for pos in self.msgstr_plural:
+                if self.msgstr_plural[pos] == '':
+                    return False
+            return True
+        return False
+
+    def merge(self, other):
+        """
+        Merge the current entry with the given pot entry.
+        """
+        self.msgid = other.msgid
+        self.msgctxt = other.msgctxt
+        self.occurrences = other.occurrences
+        self.comment = other.comment
+        fuzzy = self.fuzzy
+        self.flags = other.flags[:]  # clone flags
+        if fuzzy:
+            self.flags.append('fuzzy')
+        self.msgid_plural = other.msgid_plural
+        self.obsolete = other.obsolete
+        self.previous_msgctxt = other.previous_msgctxt
+        self.previous_msgid = other.previous_msgid
+        self.previous_msgid_plural = other.previous_msgid_plural
+        if other.msgstr_plural:
+            for pos in other.msgstr_plural:
+                try:
+                    # keep existing translation at pos if any
+                    self.msgstr_plural[pos]
+                except KeyError:
+                    self.msgstr_plural[pos] = ''
+
+    @property
+    def fuzzy(self):
+        return 'fuzzy' in self.flags
+
+    @fuzzy.setter
+    def fuzzy(self, value):
+        if value and not self.fuzzy:
+            self.flags.insert(0, 'fuzzy')
+        elif not value and self.fuzzy:
+            self.flags.remove('fuzzy')
+
+    def __hash__(self):
+        return hash((self.msgid, self.msgstr))
+# }}}
+# class MOEntry {{{
+
+
+class MOEntry(_BaseEntry):
+    """
+    Represents a mo file entry.
+    """
+    def __init__(self, *args, **kwargs):
+        """
+        Constructor, accepts the following keyword arguments,
+        for consistency with :class:`~polib.POEntry`:
+
+        ``comment``
+        ``tcomment``
+        ``occurrences``
+        ``flags``
+        ``previous_msgctxt``
+        ``previous_msgid``
+        ``previous_msgid_plural``
+
+        Note: even though these keyword arguments are accepted,
+        they hold no real meaning in the context of MO files
+        and are simply ignored.
+        """
+        _BaseEntry.__init__(self, *args, **kwargs)
+        self.comment = ''
+        self.tcomment = ''
+        self.occurrences = []
+        self.flags = []
+        self.previous_msgctxt = None
+        self.previous_msgid = None
+        self.previous_msgid_plural = None
+
+    def __hash__(self):
+        return hash((self.msgid, self.msgstr))
+
+# }}}
+# class _POFileParser {{{
+
+
+class _POFileParser(object):
+    """
+    A finite state machine to efficiently and correctly parse po
+    file format.
+    """
+
+    def __init__(self, pofile, *args, **kwargs):
+        """
+        Constructor.
+
+        Keyword arguments:
+
+        ``pofile``
+            string, path to the po file or its content
+
+        ``encoding``
+            string, the encoding to use, defaults to ``default_encoding``
+            global variable (optional).
+
+        ``check_for_duplicates``
+            whether to check for duplicate entries when adding entries to the
+            file (optional, default: ``False``).
+        """
+        enc = kwargs.get('encoding', default_encoding)
+        if _is_file(pofile):
+            try:
+                self.fhandle = io.open(pofile, 'rt', encoding=enc)
+            except LookupError:
+                enc = default_encoding
+                self.fhandle = io.open(pofile, 'rt', encoding=enc)
+        else:
+            self.fhandle = pofile.splitlines()
+
+        klass = kwargs.get('klass')
+        if klass is None:
+            klass = POFile
+        self.instance = klass(
+            pofile=pofile,
+            encoding=enc,
+            check_for_duplicates=kwargs.get('check_for_duplicates', False)
+        )
+        self.transitions = {}
+        self.current_line = 0
+        self.current_entry = POEntry(linenum=self.current_line)
+        self.current_state = 'st'
+        self.current_token = None
+        # two memo flags used in handlers
+        self.msgstr_index = 0
+        self.entry_obsolete = 0
+        # Configure the state machine, by adding transitions.
+        # Signification of symbols:
+        #     * ST: Beginning of the file (start)
+        #     * HE: Header
+        #     * TC: a translation comment
+        #     * GC: a generated comment
+        #     * OC: a file/line occurrence
+        #     * FL: a flags line
+        #     * CT: a message context
+        #     * PC: a previous msgctxt
+        #     * PM: a previous msgid
+        #     * PP: a previous msgid_plural
+        #     * MI: a msgid
+        #     * MP: a msgid plural
+        #     * MS: a msgstr
+        #     * MX: a msgstr plural
+        #     * MC: a msgid or msgstr continuation line
+        all = ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'pc', 'pm', 'pp', 'tc',
+               'ms', 'mp', 'mx', 'mi']
+
+        self.add('tc', ['st', 'he'],                                     'he')
+        self.add('tc', ['gc', 'oc', 'fl', 'tc', 'pc', 'pm', 'pp', 'ms',
+                        'mp', 'mx', 'mi'],                               'tc')
+        self.add('gc', all,                                              'gc')
+        self.add('oc', all,                                              'oc')
+        self.add('fl', all,                                              'fl')
+        self.add('pc', all,                                              'pc')
+        self.add('pm', all,                                              'pm')
+        self.add('pp', all,                                              'pp')
+        self.add('ct', ['st', 'he', 'gc', 'oc', 'fl', 'tc', 'pc', 'pm',
+                        'pp', 'ms', 'mx'],                               'ct')
+        self.add('mi', ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'tc', 'pc',
+                 'pm', 'pp', 'ms', 'mx'],                                'mi')
+        self.add('mp', ['tc', 'gc', 'pc', 'pm', 'pp', 'mi'],             'mp')
+        self.add('ms', ['mi', 'mp', 'tc'],                               'ms')
+        self.add('mx', ['mi', 'mx', 'mp', 'tc'],                         'mx')
+        self.add('mc', ['ct', 'mi', 'mp', 'ms', 'mx', 'pm', 'pp', 'pc'], 'mc')
+
+    def parse(self):
+        """
+        Run the state machine, parse the file line by line and call process()
+        with the current matched symbol.
+        """
+        try:
+            keywords = {
+                'msgctxt': 'ct',
+                'msgid': 'mi',
+                'msgstr': 'ms',
+                'msgid_plural': 'mp',
+            }
+            prev_keywords = {
+                'msgid_plural': 'pp',
+                'msgid': 'pm',
+                'msgctxt': 'pc',
+            }
+            tokens = []
+            fpath = '%s ' % self.instance.fpath if self.instance.fpath else ''
+            for line in self.fhandle:
+                self.current_line += 1
+                if self.current_line == 1:
+                    BOM = codecs.BOM_UTF8.decode('utf-8')
+                    if line.startswith(BOM):
+                        line = line[len(BOM):]
+                line = line.strip()
+                if line == '':
+                    continue
+
+                tokens = line.split(None, 2)
+                nb_tokens = len(tokens)
+
+                if tokens[0] == '#~|':
+                    continue
+
+                if tokens[0] == '#~' and nb_tokens > 1:
+                    line = line[3:].strip()
+                    tokens = tokens[1:]
+                    nb_tokens -= 1
+                    self.entry_obsolete = 1
+                else:
+                    self.entry_obsolete = 0
+
+                # Take care of keywords like
+                # msgid, msgid_plural, msgctxt & msgstr.
+                if tokens[0] in keywords and nb_tokens > 1:
+                    line = line[len(tokens[0]):].lstrip()
+                    if re.search(r'([^\\]|^)"', line[1:-1]):
+                        raise IOError('Syntax error in po file %s(line %s): '
+                                      'unescaped double quote found' %
+                                      (fpath, self.current_line))
+                    self.current_token = line
+                    self.process(keywords[tokens[0]])
+                    continue
+
+                self.current_token = line
+
+                if tokens[0] == '#:':
+                    if nb_tokens <= 1:
+                        continue
+                    # we are on a occurrences line
+                    self.process('oc')
+
+                elif line[:1] == '"':
+                    # we are on a continuation line
+                    if re.search(r'([^\\]|^)"', line[1:-1]):
+                        raise IOError('Syntax error in po file %s(line %s): '
+                                      'unescaped double quote found' %
+                                      (fpath, self.current_line))
+                    self.process('mc')
+
+                elif line[:7] == 'msgstr[':
+                    # we are on a msgstr plural
+                    self.process('mx')
+
+                elif tokens[0] == '#,':
+                    if nb_tokens <= 1:
+                        continue
+                    # we are on a flags line
+                    self.process('fl')
+
+                elif tokens[0] == '#' or tokens[0].startswith('##'):
+                    if line == '#':
+                        line += ' '
+                    # we are on a translator comment line
+                    self.process('tc')
+
+                elif tokens[0] == '#.':
+                    if nb_tokens <= 1:
+                        continue
+                    # we are on a generated comment line
+                    self.process('gc')
+
+                elif tokens[0] == '#|':
+                    if nb_tokens <= 1:
+                        raise IOError('Syntax error in po file %s(line %s)' %
+                                      (fpath, self.current_line))
+
+                    # Remove the marker and any whitespace right after that.
+                    line = line[2:].lstrip()
+                    self.current_token = line
+
+                    if tokens[1].startswith('"'):
+                        # Continuation of previous metadata.
+                        self.process('mc')
+                        continue
+
+                    if nb_tokens == 2:
+                        # Invalid continuation line.
+                        raise IOError('Syntax error in po file %s(line %s): '
+                                      'invalid continuation line' %
+                                      (fpath, self.current_line))
+
+                    # we are on a "previous translation" comment line,
+                    if tokens[1] not in prev_keywords:
+                        # Unknown keyword in previous translation comment.
+                        raise IOError('Syntax error in po file %s(line %s): '
+                                      'unknown keyword %s' %
+                                      (fpath, self.current_line,
+                                       tokens[1]))
+
+                    # Remove the keyword and any whitespace
+                    # between it and the starting quote.
+                    line = line[len(tokens[1]):].lstrip()
+                    self.current_token = line
+                    self.process(prev_keywords[tokens[1]])
+
+                else:
+                    raise IOError('Syntax error in po file %s(line %s)' %
+                                  (fpath, self.current_line))
+
+            if self.current_entry and len(tokens) > 0 and \
+               not tokens[0].startswith('#'):
+                # since entries are added when another entry is found, we must
+                # add the last entry here (only if there are lines). Trailing
+                # comments are ignored
+                self.instance.append(self.current_entry)
+
+            # before returning the instance, check if there's metadata and if
+            # so extract it in a dict
+            metadataentry = self.instance.find('')
+            if metadataentry:  # metadata found
+                # remove the entry
+                self.instance.remove(metadataentry)
+                self.instance.metadata_is_fuzzy = metadataentry.flags
+                key = None
+                for msg in metadataentry.msgstr.splitlines():
+                    try:
+                        key, val = msg.split(':', 1)
+                        self.instance.metadata[key] = val.strip()
+                    except (ValueError, KeyError):
+                        if key is not None:
+                            self.instance.metadata[key] += '\n' + msg.strip()
+        finally:
+            # close opened file
+            if not isinstance(self.fhandle, list):  # must be file
+                self.fhandle.close()
+        return self.instance
+
+    def add(self, symbol, states, next_state):
+        """
+        Add a transition to the state machine.
+
+        Keywords arguments:
+
+        ``symbol``
+            string, the matched token (two chars symbol).
+
+        ``states``
+            list, a list of states (two chars symbols).
+
+        ``next_state``
+            the next state the fsm will have after the action.
+        """
+        for state in states:
+            action = getattr(self, 'handle_%s' % next_state)
+            self.transitions[(symbol, state)] = (action, next_state)
+
+    def process(self, symbol):
+        """
+        Process the transition corresponding to the current state and the
+        symbol provided.
+
+        Keywords arguments:
+
+        ``symbol``
+            string, the matched token (two chars symbol).
+
+        ``linenum``
+            integer, the current line number of the parsed file.
+        """
+        try:
+            (action, state) = self.transitions[(symbol, self.current_state)]
+            if action():
+                self.current_state = state
+        except Exception:
+            fpath = '%s ' % self.instance.fpath if self.instance.fpath else ''
+            if hasattr(self.fhandle, 'close'):
+                self.fhandle.close()
+            raise IOError('Syntax error in po file %s(line %s)' %
+                          (fpath, self.current_line))
+
+    # state handlers
+
+    def handle_he(self):
+        """Handle a header comment."""
+        if self.instance.header != '':
+            self.instance.header += '\n'
+        self.instance.header += self.current_token[2:]
+        return 1
+
+    def handle_tc(self):
+        """Handle a translator comment."""
+        if self.current_state in ['mc', 'ms', 'mx']:
+            self.instance.append(self.current_entry)
+            self.current_entry = POEntry(linenum=self.current_line)
+        if self.current_entry.tcomment != '':
+            self.current_entry.tcomment += '\n'
+        tcomment = self.current_token.lstrip('#')
+        if tcomment.startswith(' '):
+            tcomment = tcomment[1:]
+        self.current_entry.tcomment += tcomment
+        return True
+
+    def handle_gc(self):
+        """Handle a generated comment."""
+        if self.current_state in ['mc', 'ms', 'mx']:
+            self.instance.append(self.current_entry)
+            self.current_entry = POEntry(linenum=self.current_line)
+        if self.current_entry.comment != '':
+            self.current_entry.comment += '\n'
+        self.current_entry.comment += self.current_token[3:]
+        return True
+
+    def handle_oc(self):
+        """Handle a file:num occurrence."""
+        if self.current_state in ['mc', 'ms', 'mx']:
+            self.instance.append(self.current_entry)
+            self.current_entry = POEntry(linenum=self.current_line)
+        occurrences = self.current_token[3:].split()
+        for occurrence in occurrences:
+            if occurrence != '':
+                try:
+                    fil, line = occurrence.rsplit(':', 1)
+                    if not line.isdigit():
+                        fil = occurrence
+                        line = ''
+                    self.current_entry.occurrences.append((fil, line))
+                except (ValueError, AttributeError):
+                    self.current_entry.occurrences.append((occurrence, ''))
+        return True
+
+    def handle_fl(self):
+        """Handle a flags line."""
+        if self.current_state in ['mc', 'ms', 'mx']:
+            self.instance.append(self.current_entry)
+            self.current_entry = POEntry(linenum=self.current_line)
+        self.current_entry.flags += [c.strip() for c in
+                                     self.current_token[3:].split(',')]
+        return True
+
+    def handle_pp(self):
+        """Handle a previous msgid_plural line."""
+        if self.current_state in ['mc', 'ms', 'mx']:
+            self.instance.append(self.current_entry)
+            self.current_entry = POEntry(linenum=self.current_line)
+        self.current_entry.previous_msgid_plural = \
+            unescape(self.current_token[1:-1])
+        return True
+
+    def handle_pm(self):
+        """Handle a previous msgid line."""
+        if self.current_state in ['mc', 'ms', 'mx']:
+            self.instance.append(self.current_entry)
+            self.current_entry = POEntry(linenum=self.current_line)
+        self.current_entry.previous_msgid = \
+            unescape(self.current_token[1:-1])
+        return True
+
+    def handle_pc(self):
+        """Handle a previous msgctxt line."""
+        if self.current_state in ['mc', 'ms', 'mx']:
+            self.instance.append(self.current_entry)
+            self.current_entry = POEntry(linenum=self.current_line)
+        self.current_entry.previous_msgctxt = \
+            unescape(self.current_token[1:-1])
+        return True
+
+    def handle_ct(self):
+        """Handle a msgctxt."""
+        if self.current_state in ['mc', 'ms', 'mx']:
+            self.instance.append(self.current_entry)
+            self.current_entry = POEntry(linenum=self.current_line)
+        self.current_entry.msgctxt = unescape(self.current_token[1:-1])
+        return True
+
+    def handle_mi(self):
+        """Handle a msgid."""
+        if self.current_state in ['mc', 'ms', 'mx']:
+            self.instance.append(self.current_entry)
+            self.current_entry = POEntry(linenum=self.current_line)
+        self.current_entry.obsolete = self.entry_obsolete
+        self.current_entry.msgid = unescape(self.current_token[1:-1])
+        return True
+
+    def handle_mp(self):
+        """Handle a msgid plural."""
+        self.current_entry.msgid_plural = unescape(self.current_token[1:-1])
+        return True
+
+    def handle_ms(self):
+        """Handle a msgstr."""
+        self.current_entry.msgstr = unescape(self.current_token[1:-1])
+        return True
+
+    def handle_mx(self):
+        """Handle a msgstr plural."""
+        index = self.current_token[7]
+        value = self.current_token[self.current_token.find('"') + 1:-1]
+        self.current_entry.msgstr_plural[int(index)] = unescape(value)
+        self.msgstr_index = int(index)
+        return True
+
+    def handle_mc(self):
+        """Handle a msgid or msgstr continuation line."""
+        token = unescape(self.current_token[1:-1])
+        if self.current_state == 'ct':
+            self.current_entry.msgctxt += token
+        elif self.current_state == 'mi':
+            self.current_entry.msgid += token
+        elif self.current_state == 'mp':
+            self.current_entry.msgid_plural += token
+        elif self.current_state == 'ms':
+            self.current_entry.msgstr += token
+        elif self.current_state == 'mx':
+            self.current_entry.msgstr_plural[self.msgstr_index] += token
+        elif self.current_state == 'pp':
+            self.current_entry.previous_msgid_plural += token
+        elif self.current_state == 'pm':
+            self.current_entry.previous_msgid += token
+        elif self.current_state == 'pc':
+            self.current_entry.previous_msgctxt += token
+        # don't change the current state
+        return False
+# }}}
+# class _MOFileParser {{{
+
+
+class _MOFileParser(object):
+    """
+    A class to parse binary mo files.
+    """
+
+    def __init__(self, mofile, *args, **kwargs):
+        """
+        Constructor.
+
+        Keyword arguments:
+
+        ``mofile``
+            string, path to the mo file or its content
+
+        ``encoding``
+            string, the encoding to use, defaults to ``default_encoding``
+            global variable (optional).
+
+        ``check_for_duplicates``
+            whether to check for duplicate entries when adding entries to the
+            file (optional, default: ``False``).
+        """
+        if _is_file(mofile):
+            self.fhandle = open(mofile, 'rb')
+        else:
+            self.fhandle = io.BytesIO(mofile)
+
+        klass = kwargs.get('klass')
+        if klass is None:
+            klass = MOFile
+        self.instance = klass(
+            fpath=mofile,
+            encoding=kwargs.get('encoding', default_encoding),
+            check_for_duplicates=kwargs.get('check_for_duplicates', False)
+        )
+
+    def __del__(self):
+        """
+        Make sure the file is closed, this prevents warnings on unclosed file
+        when running tests with python >= 3.2.
+        """
+        if self.fhandle and hasattr(self.fhandle, 'close'):
+            self.fhandle.close()
+
+    def parse(self):
+        """
+        Build the instance with the file handle provided in the
+        constructor.
+        """
+        # parse magic number
+        magic_number = self._readbinary('<I', 4)
+        if magic_number == MOFile.MAGIC:
+            ii = '<II'
+        elif magic_number == MOFile.MAGIC_SWAPPED:
+            ii = '>II'
+        else:
+            raise IOError('Invalid mo file, magic number is incorrect !')
+        self.instance.magic_number = magic_number
+        # parse the version number and the number of strings
+        version, numofstrings = self._readbinary(ii, 8)
+        # from MO file format specs: "A program seeing an unexpected major
+        # revision number should stop reading the MO file entirely"
+        if version >> 16 not in (0, 1):
+            raise IOError('Invalid mo file, unexpected major revision number')
+        self.instance.version = version
+        # original strings and translation strings hash table offset
+        msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8)
+        # move to msgid hash table and read length and offset of msgids
+        self.fhandle.seek(msgids_hash_offset)
+        msgids_index = []
+        for i in range(numofstrings):
+            msgids_index.append(self._readbinary(ii, 8))
+        # move to msgstr hash table and read length and offset of msgstrs
+        self.fhandle.seek(msgstrs_hash_offset)
+        msgstrs_index = []
+        for i in range(numofstrings):
+            msgstrs_index.append(self._readbinary(ii, 8))
+        # build entries
+        encoding = self.instance.encoding
+        for i in range(numofstrings):
+            self.fhandle.seek(msgids_index[i][1])
+            msgid = self.fhandle.read(msgids_index[i][0])
+
+            self.fhandle.seek(msgstrs_index[i][1])
+            msgstr = self.fhandle.read(msgstrs_index[i][0])
+            if i == 0 and not msgid:  # metadata
+                raw_metadata, metadata = msgstr.split(b('\n')), {}
+                for line in raw_metadata:
+                    tokens = line.split(b(':'), 1)
+                    if tokens[0] != b(''):
+                        try:
+                            k = tokens[0].decode(encoding)
+                            v = tokens[1].decode(encoding)
+                            metadata[k] = v.strip()
+                        except IndexError:
+                            metadata[k] = u('')
+                self.instance.metadata = metadata
+                continue
+            # test if we have a plural entry
+            msgid_tokens = msgid.split(b('\0'))
+            if len(msgid_tokens) > 1:
+                entry = self._build_entry(
+                    msgid=msgid_tokens[0],
+                    msgid_plural=msgid_tokens[1],
+                    msgstr_plural=dict((k, v) for k, v in
+                                       enumerate(msgstr.split(b('\0'))))
+                )
+            else:
+                entry = self._build_entry(msgid=msgid, msgstr=msgstr)
+            self.instance.append(entry)
+        # close opened file
+        self.fhandle.close()
+        return self.instance
+
+    def _build_entry(self, msgid, msgstr=None, msgid_plural=None,
+                     msgstr_plural=None):
+        msgctxt_msgid = msgid.split(b('\x04'))
+        encoding = self.instance.encoding
+        if len(msgctxt_msgid) > 1:
+            kwargs = {
+                'msgctxt': msgctxt_msgid[0].decode(encoding),
+                'msgid': msgctxt_msgid[1].decode(encoding),
+            }
+        else:
+            kwargs = {'msgid': msgid.decode(encoding)}
+        if msgstr:
+            kwargs['msgstr'] = msgstr.decode(encoding)
+        if msgid_plural:
+            kwargs['msgid_plural'] = msgid_plural.decode(encoding)
+        if msgstr_plural:
+            for k in msgstr_plural:
+                msgstr_plural[k] = msgstr_plural[k].decode(encoding)
+            kwargs['msgstr_plural'] = msgstr_plural
+        return MOEntry(**kwargs)
+
+    def _readbinary(self, fmt, numbytes):
+        """
+        Private method that unpack n bytes of data using format <fmt>.
+        It returns a tuple or a mixed value if the tuple length is 1.
+        """
+        bytes = self.fhandle.read(numbytes)
+        tup = struct.unpack(fmt, bytes)
+        if len(tup) == 1:
+            return tup[0]
+        return tup
+# }}}
diff --git a/setup/publish.py b/setup/publish.py
index eb831302437c..968860a8f558 100644
--- a/setup/publish.py
+++ b/setup/publish.py
@@ -110,7 +110,8 @@ def pre_sub_commands(self, opts):
         if 'PUBLISH_BUILD_DONE' not in os.environ:
             subprocess.check_call([sys.executable, 'setup.py', 'check'])
             subprocess.check_call([sys.executable, 'setup.py', 'build'])
-            subprocess.check_call([sys.executable, 'setup.py', 'test'])
+            if 'SKIP_CALIBRE_TESTS' not in os.environ:
+                subprocess.check_call([sys.executable, 'setup.py', 'test'])
             subprocess.check_call([sys.executable, 'setup.py', 'pot'])
             subprocess.check_call([sys.executable, 'setup.py', 'translations'])
             os.environ['PUBLISH_BUILD_DONE'] = '1'
@@ -194,7 +195,9 @@ def run(self, opts):
         languages = opts.language or list(
             json.load(open(self.j(base, 'locale', 'completed.json'), 'rb'))
         )
-        languages = ['en'] + list(set(languages) - {'en'})
+        languages = set(languages) - {'en'}
+        languages.discard('ta')  # Tamil translations break Sphinx
+        languages = ['en'] + list(languages)
         os.environ['ALL_USER_MANUAL_LANGUAGES'] = ' '.join(languages)
         for language in languages:
             jobs.append(create_job([
@@ -284,8 +287,9 @@ def build_man_pages(self, dest, compress=False):
             shutil.rmtree(dest)
         os.makedirs(dest)
         base = self.j(self.d(self.SRC), 'manual')
-        languages = list(available_translations())
-        languages = ['en'] + list(set(languages) - {'en', 'en_GB'})
+        languages = set(available_translations())
+        languages.discard('ta')  # Tamil translatins are completely borked break sphinx
+        languages = ['en'] + list(languages - {'en', 'en_GB'})
         os.environ['ALL_USER_MANUAL_LANGUAGES'] = ' '.join(languages)
         try:
             os.makedirs(dest)
diff --git a/setup/translations.py b/setup/translations.py
index 79a30beca66c..81fd3da758f1 100644
--- a/setup/translations.py
+++ b/setup/translations.py
@@ -642,9 +642,11 @@ def check_all(self):
             self.upload_to_vcs('Fixed translations')
 
     def check_for_user_manual_errors(self):
+        sys.path.insert(0, self.j(self.d(self.SRC), 'setup'))
+        import polib
+        del sys.path[0]
         self.info('Checking user manual translations...')
         srcbase = self.j(self.d(self.SRC), 'translations', 'manual')
-        import polib
         changes = defaultdict(set)
         for lang in os.listdir(srcbase):
             if lang.startswith('en_') or lang == 'en':
diff --git a/src/calibre/constants.py b/src/calibre/constants.py
index 3b44abf9f148..25f8a8df9b63 100644
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@@ -11,7 +11,7 @@
 from polyglot.builtins import environ_item, hasenv
 
 __appname__   = 'calibre'
-numeric_version = (7, 9, 0)
+numeric_version = (7, 10, 0)
 __version__   = '.'.join(map(str, numeric_version))
 git_version   = None
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"
diff --git a/src/calibre/db/cache.py b/src/calibre/db/cache.py
index b7881d0ce06f..aae4e85de0fb 100644
--- a/src/calibre/db/cache.py
+++ b/src/calibre/db/cache.py
@@ -15,7 +15,6 @@
 import weakref
 from collections import defaultdict
 from collections.abc import MutableSet, Set
-from contextlib import closing
 from functools import partial, wraps
 from io import DEFAULT_BUFFER_SIZE, BytesIO
 from queue import Queue
@@ -46,7 +45,7 @@
 from calibre.ebooks.metadata.opf2 import metadata_to_opf
 from calibre.ptempfile import PersistentTemporaryFile, SpooledTemporaryFile, base_dir
 from calibre.utils.config import prefs, tweaks
-from calibre.utils.date import UNDEFINED_DATE, utcnow
+from calibre.utils.date import UNDEFINED_DATE, timestampfromdt, utcnow
 from calibre.utils.date import now as nowf
 from calibre.utils.filenames import make_long_path_useable
 from calibre.utils.icu import lower as icu_lower
@@ -3158,11 +3157,14 @@ def report_progress(fname):
                 mdata = self.format_metadata(book_id, fmt)
                 key = f'{key_prefix}:{book_id}:{fmt}'
                 fm[fmt] = key
-                with exporter.start_file(key, mtime=mdata.get('mtime')) as dest:
-                    self._copy_format_to(book_id, fmt, dest, report_file_size=dest.ensure_space)
+                mtime = mdata.get('mtime')
+                if mtime is not None:
+                    mtime = timestampfromdt(mtime)
+                with exporter.start_file(key, mtime=mtime) as dest:
+                    self._copy_format_to(book_id, fmt, dest)
             cover_key = '{}:{}:{}'.format(key_prefix, book_id, '.cover')
             with exporter.start_file(cover_key) as dest:
-                if not self.copy_cover_to(book_id, dest, report_file_size=dest.ensure_space):
+                if not self.copy_cover_to(book_id, dest):
                     dest.discard()
                 else:
                     fm['.cover'] = cover_key
@@ -3439,6 +3441,7 @@ def is_null_date(x):
                         dest_value.extend(src_value)
                     self._set_field(field, {dest_id: dest_value})
 
+
 def import_library(library_key, importer, library_path, progress=None, abort=None):
     from calibre.db.backend import DB
     metadata = importer.metadata[library_key]
@@ -3452,27 +3455,22 @@ def report_progress(fname):
     report_progress('metadata.db')
     if abort is not None and abort.is_set():
         return
-    with open(os.path.join(library_path, 'metadata.db'), 'wb') as f:
-        src = importer.start_file(metadata['metadata.db'], 'metadata.db for ' + library_path)
-        shutil.copyfileobj(src, f)
-        src.close()
+    importer.save_file(metadata['metadata.db'], 'metadata.db for ' + library_path, os.path.join(library_path, 'metadata.db'))
     if 'full-text-search.db' in metadata:
         if progress is not None:
             progress('full-text-search.db', 1, total)
         if abort is not None and abort.is_set():
             return
         poff += 1
-        with open(os.path.join(library_path, 'full-text-search.db'), 'wb') as f:
-            src = importer.start_file(metadata['full-text-search.db'], 'full-text-search.db for ' + library_path)
-            shutil.copyfileobj(src, f)
-            src.close()
+        importer.save_file(metadata['full-text-search.db'], 'full-text-search.db for ' + library_path,
+                           os.path.join(library_path, 'full-text-search.db'))
     if abort is not None and abort.is_set():
         return
     if 'notes.db' in metadata:
         import zipfile
         notes_dir = os.path.join(library_path, NOTES_DIR_NAME)
         os.makedirs(notes_dir, exist_ok=True)
-        with closing(importer.start_file(metadata['notes.db'], 'notes.db for ' + library_path)) as stream:
+        with importer.start_file(metadata['notes.db'], 'notes.db for ' + library_path) as stream:
             stream.check_hash = False
             with zipfile.ZipFile(stream) as zf:
                 for zi in zf.infolist():
@@ -3481,6 +3479,8 @@ def report_progress(fname):
                     os.utime(tpath, (date_time, date_time))
     if abort is not None and abort.is_set():
         return
+    if importer.corrupted_files:
+        raise ValueError('Corrupted files:\n' + '\n'.join(importer.corrupted_files))
     cache = Cache(DB(library_path, load_user_formatter_functions=False))
     cache.init()
 
@@ -3493,20 +3493,22 @@ def report_progress(fname):
         if progress is not None:
             progress(title, i + poff, total)
         cache._update_path((book_id,), mark_as_dirtied=False)
-        for fmt, fmtkey in iteritems(fmt_key_map):
+        for fmt, fmtkey in fmt_key_map.items():
             if fmt == '.cover':
-                with closing(importer.start_file(fmtkey, _('Cover for %s') % title)) as stream:
+                with importer.start_file(fmtkey, _('Cover for %s') % title) as stream:
                     path = cache._field_for('path', book_id).replace('/', os.sep)
                     cache.backend.set_cover(book_id, path, stream, no_processing=True)
             else:
-                with closing(importer.start_file(fmtkey, _('{0} format for {1}').format(fmt.upper(), title))) as stream:
+                with importer.start_file(fmtkey, _('{0} format for {1}').format(fmt.upper(), title)) as stream:
                     size, fname = cache._do_add_format(book_id, fmt, stream, mtime=stream.mtime)
                     cache.fields['formats'].table.update_fmt(book_id, fmt, fname, size, cache.backend)
         for relpath, efkey in extra_files.get(book_id, {}).items():
-            with closing(importer.start_file(efkey, _('Extra file {0} for book {1}').format(relpath, title))) as stream:
+            with importer.start_file(efkey, _('Extra file {0} for book {1}').format(relpath, title)) as stream:
                 path = cache._field_for('path', book_id).replace('/', os.sep)
                 cache.backend.add_extra_file(relpath, stream, path)
         cache.dump_metadata({book_id})
+        if importer.corrupted_files:
+            raise ValueError('Corrupted files:\n' + '\n'.join(importer.corrupted_files))
     if progress is not None:
         progress(_('Completed'), total, total)
     return cache
diff --git a/src/calibre/db/tests/filesystem.py b/src/calibre/db/tests/filesystem.py
index 2cae7f0c3530..3dc5bd5607ab 100644
--- a/src/calibre/db/tests/filesystem.py
+++ b/src/calibre/db/tests/filesystem.py
@@ -246,6 +246,21 @@ def test_fname_change(self):
     def test_export_import(self):
         from calibre.db.cache import import_library
         from calibre.utils.exim import Exporter, Importer
+        with TemporaryDirectory('export_lib') as tdir:
+            for part_size in (8, 1, 1024):
+                exporter = Exporter(tdir, part_size=part_size + Exporter.tail_size())
+                files = {
+                    'a': b'a' * 7, 'b': b'b' * 7, 'c': b'c' * 2, 'd': b'd' * 9, 'e': b'e' * 3,
+                }
+                for key, data in files.items():
+                    exporter.add_file(BytesIO(data), key)
+                exporter.commit()
+                importer = Importer(tdir)
+                for key, expected in files.items():
+                    with importer.start_file(key, key) as f:
+                        actual = f.read()
+                    self.assertEqual(expected, actual, key)
+                self.assertFalse(importer.corrupted_files)
         cache = self.init_cache()
         bookdir = os.path.dirname(cache.format_abspath(1, '__COVER_INTERNAL__'))
         with open(os.path.join(bookdir, 'exf'), 'w') as f:
@@ -255,13 +270,14 @@ def test_export_import(self):
             f.write('recurse')
         self.assertEqual({ef.relpath for ef in cache.list_extra_files(1, pattern='sub/**/*')}, {'sub/recurse'})
         self.assertEqual({ef.relpath for ef in cache.list_extra_files(1)}, {'exf', 'sub/recurse'})
-        for part_size in (1 << 30, 100, 1):
+        for part_size in (512, 1027, None):
             with TemporaryDirectory('export_lib') as tdir, TemporaryDirectory('import_lib') as idir:
-                exporter = Exporter(tdir, part_size=part_size)
+                exporter = Exporter(tdir, part_size=part_size if part_size is None else (part_size + Exporter.tail_size()))
                 cache.export_library('l', exporter)
                 exporter.commit()
                 importer = Importer(tdir)
                 ic = import_library('l', importer, idir)
+                self.assertFalse(importer.corrupted_files)
                 self.assertEqual(cache.all_book_ids(), ic.all_book_ids())
                 for book_id in cache.all_book_ids():
                     self.assertEqual(cache.cover(book_id), ic.cover(book_id), 'Covers not identical for book: %d' % book_id)
@@ -290,6 +306,7 @@ def test_export_import(self):
             exporter.commit()
             importer = Importer(tdir)
             ic = import_library('l', importer, idir)
+            self.assertFalse(importer.corrupted_files)
             self.assertEqual(ic.fts_search('exim')[0]['id'], 1)
             self.assertEqual(cache.notes_for('authors', 2), ic.notes_for('authors', 2))
             a, b = cache.get_notes_resource(r1), ic.get_notes_resource(r1)
diff --git a/src/calibre/devices/eb600/driver.py b/src/calibre/devices/eb600/driver.py
index 3db68bbfe740..2c0d65772ad1 100644
--- a/src/calibre/devices/eb600/driver.py
+++ b/src/calibre/devices/eb600/driver.py
@@ -427,7 +427,7 @@ def windows_sort_drives(self, drives):
 
 class POCKETBOOK740(USBMS):
 
-    name = 'PocketBook 701 Device Interface'
+    name = 'PocketBook 740 Device Interface'
     gui_name = 'PocketBook'
     description = _('Communicate with the PocketBook 740')
     supported_platforms = ['windows', 'osx', 'linux']
diff --git a/src/calibre/devices/kobo/driver.py b/src/calibre/devices/kobo/driver.py
index ef5de175490f..875471cf4a22 100644
--- a/src/calibre/devices/kobo/driver.py
+++ b/src/calibre/devices/kobo/driver.py
@@ -1412,7 +1412,7 @@ class KOBOTOUCH(KOBO):
     # Starting with firmware version 3.19.x, the last number appears to be is a
     # build number. A number will be recorded here but it can be safely ignored
     # when testing the firmware version.
-    max_supported_fwversion         = (4, 38, 21908)
+    max_supported_fwversion         = (4, 39, 22861)
     # The following document firmware versions where new function or devices were added.
     # Not all are used, but this feels a good place to record it.
     min_fwversion_shelves           = (2, 0, 0)
diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py
index ee5fb6d3d68c..a41daae32e4e 100644
--- a/src/calibre/ebooks/docx/to_html.py
+++ b/src/calibre/ebooks/docx/to_html.py
@@ -671,8 +671,11 @@ def convert_run(self, run):
                 ctext = child.text
                 if space != 'preserve':
                     # Remove leading and trailing whitespace. Word ignores
-                    # leading and trailing whitespace without preserve
-                    ctext = ctext.strip(' \n\r\t')
+                    # leading and trailing whitespace without preserve unless
+                    # the element is only whitespace.
+                    stripped = ctext.strip(' \n\r\t')
+                    if stripped:
+                        ctext = stripped
                 # Only use a <span> with white-space:pre-wrap if this element
                 # actually needs it, i.e. if it has more than one
                 # consecutive space or it has newlines or tabs.
diff --git a/src/calibre/ebooks/readability/cleaners.py b/src/calibre/ebooks/readability/cleaners.py
index fee5aec96fd5..cd52fe2580b0 100644
--- a/src/calibre/ebooks/readability/cleaners.py
+++ b/src/calibre/ebooks/readability/cleaners.py
@@ -1,7 +1,10 @@
 # strip out a set of nuisance html attributes that can mess up rendering in RSS feeds
 import re
 
-from lxml.html.clean import Cleaner
+try:
+    from lxml_html_clean import Cleaner
+except ImportError:
+    from lxml.html.clean import Cleaner
 
 bad_attrs = ['width', 'height', 'style', '[-a-z]*color', 'background[-a-z]*', 'on*']
 single_quoted = "'[^']+'"
diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py
index fd501990dca4..04716a3cb77d 100644
--- a/src/calibre/gui2/__init__.py
+++ b/src/calibre/gui2/__init__.py
@@ -1189,10 +1189,9 @@ def __init__(self, args=(), force_calibre_style=False, override_program_name=Non
             args = [override_program_name] + args[1:]
         self.palette_manager = PaletteManager(force_calibre_style, headless)
         if headless:
-            args.extend(('-platformpluginpath', plugins_loc, '-platform', 'headless'))
+            args.extend(('-platformpluginpath', plugins_loc, '-platform', os.environ.get('CALIBRE_HEADLESS_PLATFORM', 'headless')))
         else:
             args.extend(self.palette_manager.args_to_qt)
-
         self.headless = headless
         from calibre_extensions import progress_indicator
         self.pi = progress_indicator
@@ -1583,7 +1582,7 @@ def ensure_app(headless=True):
             args = sys.argv[:1]
             has_headless = ismacos or islinux or isbsd
             if headless and has_headless:
-                args += ['-platformpluginpath', plugins_loc, '-platform', 'headless']
+                args += ['-platformpluginpath', plugins_loc, '-platform', os.environ.get('CALIBRE_HEADLESS_PLATFORM', 'headless')]
                 if ismacos:
                     os.environ['QT_MAC_DISABLE_FOREGROUND_APPLICATION_TRANSFORM'] = '1'
             if headless and iswindows:
diff --git a/src/calibre/gui2/dialogs/metadata_bulk.py b/src/calibre/gui2/dialogs/metadata_bulk.py
index 85d475829404..12e7021e0534 100644
--- a/src/calibre/gui2/dialogs/metadata_bulk.py
+++ b/src/calibre/gui2/dialogs/metadata_bulk.py
@@ -224,10 +224,13 @@ def read_file_metadata(self, args):
                             else:
                                 db.set_metadata(book_id, mi, allow_case_change=True)
                         if cdata is not None:
-                            db.set_cover({book_id: cdata})
+                            try:
+                                db.set_cover({book_id: cdata})
+                            except Exception:
+                                import traceback
+                                traceback.print_exc()
                 self.progress_update.emit(1)
             self.progress_finished_cur_step.emit()
-
         finally:
             worker.shutdown()
 
diff --git a/src/calibre/gui2/library/views.py b/src/calibre/gui2/library/views.py
index 2d60f4cdfba2..971a5746d2cd 100644
--- a/src/calibre/gui2/library/views.py
+++ b/src/calibre/gui2/library/views.py
@@ -42,7 +42,7 @@
 
 from calibre import force_unicode
 from calibre.constants import filesystem_encoding, islinux
-from calibre.gui2 import BOOK_DETAILS_DISPLAY_DEBOUNCE_DELAY, FunctionDispatcher, error_dialog, gprefs
+from calibre.gui2 import BOOK_DETAILS_DISPLAY_DEBOUNCE_DELAY, FunctionDispatcher, error_dialog, gprefs, show_restart_warning
 from calibre.gui2.dialogs.enum_values_edit import EnumValuesEdit
 from calibre.gui2.gestures import GestureManager
 from calibre.gui2.library import DEFAULT_SORT
@@ -68,6 +68,7 @@
 )
 from calibre.gui2.library.models import BooksModel, DeviceBooksModel
 from calibre.gui2.pin_columns import PinTableView
+from calibre.gui2.preferences.create_custom_column import CreateNewCustomColumn
 from calibre.utils.config import prefs, tweaks
 from calibre.utils.icu import primary_sort_key
 from polyglot.builtins import iteritems
@@ -567,6 +568,18 @@ def column_header_context_handler(self, action=None, column=None, view=None):
             view.apply_state(view.get_default_state())
         elif action == 'addcustcol':
             self.add_column_signal.emit()
+        elif action == 'editcustcol':
+            def show_restart_dialog():
+                from calibre.gui2.preferences.main import must_restart_message
+                if show_restart_warning(must_restart_message):
+                    self.gui.quit(restart=True)
+            col_manager = CreateNewCustomColumn(self.gui)
+            if col_manager.must_restart():
+                show_restart_dialog()
+            else:
+                res = col_manager.edit_existing_column(column)
+                if res[0] == CreateNewCustomColumn.Result.COLUMN_EDITED:
+                    show_restart_dialog()
         elif action.startswith('align_'):
             alignment = action.partition('_')[-1]
             self._model.change_alignment(column, alignment)
@@ -631,6 +644,13 @@ def create_context_menu(self, col, name, view):
                 ans.addAction(QIcon.ic('width.png'), _('Adjust width of column {0}').format(name),
                           partial(self.manually_adjust_column_size, view, col, name))
 
+        if not isinstance(view, DeviceBooksView):
+            col_manager = CreateNewCustomColumn(self.gui)
+            if self.can_add_columns and self.model().is_custom_column(col):
+                act = ans.addAction(QIcon.ic('edit_input.png'), _('Edit column definition for %s') % name,
+                                    partial(handler, action='editcustcol'))
+                if col_manager.must_restart():
+                    act.setEnabled(False)
         if self.is_library_view:
             if self._model.db.field_metadata[col]['is_category']:
                 act = ans.addAction(QIcon.ic('quickview.png'), _('Quickview column %s') % name,
@@ -664,8 +684,10 @@ def create_context_menu(self, col, name, view):
                 partial(handler, action='reset_ondevice_width'))
         ans.addAction(_('Restore default layout'), partial(handler, action='defaults'))
         if self.can_add_columns:
-            ans.addAction(
-                    QIcon.ic('column.png'), _('Add your own columns'), partial(handler, action='addcustcol'))
+            act = ans.addAction(QIcon.ic('column.png'), _('Add your own columns'),
+                                partial(handler, action='addcustcol'))
+            col_manager = CreateNewCustomColumn(self.gui)
+            act.setEnabled(not col_manager.must_restart())
         return ans
 
     def show_row_header_context_menu(self, pos):
diff --git a/src/calibre/gui2/linux_file_dialogs.py b/src/calibre/gui2/linux_file_dialogs.py
index f67191344143..126b550db267 100644
--- a/src/calibre/gui2/linux_file_dialogs.py
+++ b/src/calibre/gui2/linux_file_dialogs.py
@@ -25,17 +25,30 @@ def get_winid(widget=None):
         return widget.effectiveWinId()
 
 
+def to_known_dialog_provider_name(q: str) -> str:
+    uq = q.upper()
+    if uq in ('KDE', 'LXQT', 'LXDE'):
+        return 'KDE'
+    if uq in ('GNOME', 'GNOME-FLASHBACK', 'GNOME-FLASHBACK:GNOME', 'MATE', 'XFCE'):
+        return 'GNOME'
+    return ''
+
+
 def detect_desktop_environment():
     de = os.getenv('XDG_CURRENT_DESKTOP')
     if de:
-        return de.upper().split(':', 1)[0]
+        for x in de.split(':'):
+            q = to_known_dialog_provider_name(x)
+            if q:
+                return q
     if os.getenv('KDE_FULL_SESSION') == 'true':
         return 'KDE'
     if os.getenv('GNOME_DESKTOP_SESSION_ID'):
         return 'GNOME'
     ds = os.getenv('DESKTOP_SESSION')
     if ds and ds.upper() in {'GNOME', 'XFCE'}:
-        return ds.upper()
+        return 'GNOME'
+    return ''
 
 
 def is_executable_present(name):
@@ -343,9 +356,9 @@ def check_for_linux_native_dialogs():
     if ans is None:
         de = detect_desktop_environment()
         order = ('zenity', 'kdialog')
-        if de in {'GNOME', 'UNITY', 'MATE', 'XFCE'}:
+        if de == 'GNOME':
             order = ('zenity',)
-        elif de in {'KDE', 'LXDE'}:
+        elif de == 'KDE':
             order = ('kdialog',)
         for exe in order:
             if is_executable_present(exe):
diff --git a/src/calibre/gui2/preferences/create_custom_column.py b/src/calibre/gui2/preferences/create_custom_column.py
index 3480936c2ca3..981f2e01c853 100644
--- a/src/calibre/gui2/preferences/create_custom_column.py
+++ b/src/calibre/gui2/preferences/create_custom_column.py
@@ -292,9 +292,15 @@ def setup_ui(self):  # {{{
         self.g = g = QGridLayout()
         l.addLayout(g)
         l.addStretch(10)
+        bbl = QHBoxLayout()
+        txt = QLabel(_('Pressing OK will require restarting calibre even if nothing was changed'))
+        txt.setWordWrap(True)
+        bbl.addWidget(txt)
+        bbl.addStretch(1)
         self.button_box = bb = QDialogButtonBox(QDialogButtonBox.StandardButton.Ok | QDialogButtonBox.StandardButton.Cancel, self)
         bb.accepted.connect(self.accept), bb.rejected.connect(self.reject)
-        l.addWidget(bb)
+        bbl.addWidget(bb)
+        l.addLayout(bbl)
 
         def add_row(text, widget):
             if text is None:
@@ -932,6 +938,7 @@ class Result(Enum):
         INVALID_DISPLAY = 7
         EXCEPTION_RAISED = 8
         MUST_RESTART = 9
+        COLUMN_EDITED = 11
 
     def __init__(self, gui):
         self.gui = gui
@@ -991,20 +998,40 @@ def create_column(self, lookup_name, column_heading, datatype, is_multiple,
                 'colnum': self.created_count,
                 'is_multiple': is_multiple,
             }
+
+        return self._create_or_edit_column(lookup_name, freeze_lookup_name=freeze_lookup_name,
+                                           operation='create')
+
+    def edit_existing_column(self, lookup_name):
+        if lookup_name not in self.custcols:
+            return self.Result.INVALID_KEY
+        return self._create_or_edit_column(lookup_name, freeze_lookup_name=False, operation='edit')
+
+    def _create_or_edit_column(self, lookup_name, freeze_lookup_name, operation=None):
         try:
             dialog = CreateCustomColumn(self.gui, self, lookup_name,
                                         self.gui.library_view.model().orig_headers,
                                         freeze_lookup_name=freeze_lookup_name)
             if dialog.result() == QDialog.DialogCode.Accepted and self.cc_column_key is not None:
                 cc = self.custcols[lookup_name]
-                self.db.create_custom_column(
-                                label=cc['label'],
-                                name=cc['name'],
-                                datatype=cc['datatype'],
-                                is_multiple=cc['is_multiple'],
-                                display=cc['display'])
-                self.gui.must_restart_before_config = True
-                return (self.Result.COLUMN_ADDED, self.cc_column_key)
+                if operation == 'create':
+                    self.db.create_custom_column(
+                                    label=cc['label'],
+                                    name=cc['name'],
+                                    datatype=cc['datatype'],
+                                    is_multiple=bool(cc['is_multiple']),
+                                    display=cc['display'])
+                    self.gui.must_restart_before_config = True
+                    return (self.Result.COLUMN_ADDED, self.cc_column_key)
+                # editing/viewing
+                if operation == 'edit':
+                    self.db.set_custom_column_metadata(cc['colnum'], name=cc['name'],
+                                                  label=cc['label'], display=cc['display'],
+                                                  notify=False)
+                    if '*must_restart' in cc:
+                        self.gui.must_restart_before_config = True
+                    return (self.Result.COLUMN_EDITED, self.cc_column_key)
+                return (self.Result.CANCELED, self.cc_column_key)
         except Exception as e:
             import traceback
             traceback.print_exc()
diff --git a/src/calibre/gui2/preferences/main.py b/src/calibre/gui2/preferences/main.py
index 8b78916b040f..51a5913c46e1 100644
--- a/src/calibre/gui2/preferences/main.py
+++ b/src/calibre/gui2/preferences/main.py
@@ -218,6 +218,12 @@ def __init__(self, parent=None):
 
 # }}}
 
+
+must_restart_message = _('The changes you have made require calibre be '
+                         'restarted immediately. You will not be allowed to '
+                         'set any more preferences, until you restart.')
+
+
 class Preferences(QDialog):
 
     run_wizard_requested = pyqtSignal()
@@ -394,13 +400,11 @@ def commit(self, *args):
         do_restart = False
         if must_restart:
             self.must_restart = True
-            msg = _('Some of the changes you made require a restart.'
-                    ' Please restart calibre as soon as possible.')
             if rc:
-                msg = _('The changes you have made require calibre be '
-                        'restarted immediately. You will not be allowed to '
-                        'set any more preferences, until you restart.')
-
+                msg = must_restart_message
+            else:
+                msg = _('Some of the changes you made require a restart.'
+                        ' Please restart calibre as soon as possible.')
             do_restart = show_restart_warning(msg, parent=self)
 
         self.showing_widget.refresh_gui(self.gui)
diff --git a/src/calibre/gui2/search_box.py b/src/calibre/gui2/search_box.py
index 92953f306769..7e03a0a88da0 100644
--- a/src/calibre/gui2/search_box.py
+++ b/src/calibre/gui2/search_box.py
@@ -57,6 +57,12 @@ def contextMenuEvent(self, ev):
         else:
             menu.addAction(ac)
         menu.addSeparator()
+        ac = menu.addAction(_('Invert current search'))
+        ac.setEnabled(bool(self.text().strip()))
+        ac.setIcon(QIcon.ic('search.png'))
+        ac.triggered.connect(self.invert_search)
+        menu.addAction(ac)
+        menu.addSeparator()
         if self.as_url is not None:
             url = self.as_url(self.text())
             if url:
@@ -64,6 +70,17 @@ def contextMenuEvent(self, ev):
         menu.addAction(_('&Clear search history')).triggered.connect(self.clear_history)
         menu.exec(ev.globalPos())
 
+    def invert_search(self):
+        q = self.text().strip()
+        if q:
+            if q.startswith('NOT ( ') and q.endswith(' )'):
+                q = q[6:-2]
+            else:
+                q = f'NOT ( {q} )'
+            self.setText(q)
+            ev = QKeyEvent(QEvent.Type.KeyPress, Qt.Key.Key_Enter, Qt.KeyboardModifier.NoModifier)
+            self.keyPressEvent(ev)
+
     def paste_and_search(self):
         self.paste()
         ev = QKeyEvent(QEvent.Type.KeyPress, Qt.Key.Key_Enter, Qt.KeyboardModifier.NoModifier)
@@ -452,6 +469,7 @@ def add_action(current_menu, whole_name, last_component, func=None):
         use_hierarchy = 'search' in db.new_api.pref('categories_using_hierarchy', [])
         submenus = {}
         for name in sorted(db.saved_search_names(), key=lambda x: primary_sort_key(x.strip())):
+            display_name = name.replace('&', '&&')
             current_menu = menu
             if use_hierarchy:
                 components = tuple(n.strip() for n in name.split('.'))
@@ -461,14 +479,16 @@ def add_action(current_menu, whole_name, last_component, func=None):
                 for i,c in enumerate(hierarchy, start=1):
                     hierarchical_prefix = '.'.join(hierarchy[:i])
                     if hierarchical_prefix not in submenus:
-                        current_menu = current_menu.addMenu(c)
+                        current_menu = current_menu.addMenu(c.replace('&', '&&'))
                         current_menu.setIcon(folder_icon)
                         submenus[hierarchical_prefix] = current_menu
                     else:
                         current_menu = submenus[hierarchical_prefix]
-                ac = add_action(current_menu, name, last, partial(self.search.set_search_string, 'search:"='+name+'"'))
+                ac = add_action(current_menu, display_name, last.replace('&', '&&'),
+                                partial(self.search.set_search_string, 'search:"='+name+'"'))
             else:
-                ac = add_action(current_menu, name, name, partial(self.search.set_search_string, 'search:"='+name+'"'))
+                ac = add_action(current_menu, display_name, display_name,
+                                partial(self.search.set_search_string, 'search:"='+name+'"'))
             if ac.icon().isNull():
                 ac.setIcon(search_icon)
 
diff --git a/src/calibre/gui2/search_restriction_mixin.py b/src/calibre/gui2/search_restriction_mixin.py
index 3fdcef881239..b91aed7e6422 100644
--- a/src/calibre/gui2/search_restriction_mixin.py
+++ b/src/calibre/gui2/search_restriction_mixin.py
@@ -590,14 +590,16 @@ def build_search_restriction_list(self):
         dex = 0
         def add_action(current_menu, name, last):
             nonlocal dex
+            def compare_fix_amps(name1, name2):
+                return (self._trim_restriction_name(name1).replace('&&', '&') ==
+                        self._trim_restriction_name(name2).replace('&&', '&'))
             self.search_restriction.addItem(name)
             txt = self._trim_restriction_name(last)
-            if self._trim_restriction_name(name) == self._trim_restriction_name(current_restriction):
+            if compare_fix_amps(name, current_restriction):
                 a = current_menu.addAction(self.checked, txt if txt else self.no_restriction)
             else:
                 a = current_menu.addAction(txt if txt else self.no_restriction)
-            a.triggered.connect(partial(self.search_restriction_triggered,
-                                        action=a, index=dex))
+            a.triggered.connect(partial(self.search_restriction_triggered, action=a, index=dex))
             dex += 1
             return a
 
@@ -649,10 +651,9 @@ def apply_search_restriction(self, i):
         if i == 1:
             self.apply_text_search_restriction(str(self.search.currentText()))
         elif i == 2 and str(self.search_restriction.currentText()).startswith('*'):
-            self.apply_text_search_restriction(
-                                str(self.search_restriction.currentText())[1:])
+            self.apply_text_search_restriction(str(self.search_restriction.currentText())[1:])
         else:
-            r = str(self.search_restriction.currentText())
+            r = str(self.search_restriction.currentText()).replace('&&', '&')
             if r is not None and r != '':
                 restriction = 'search:"%s"'%(r)
             else:
diff --git a/src/calibre/gui2/tweak_book/spell.py b/src/calibre/gui2/tweak_book/spell.py
index ffcc777bd2e0..67a0f0f22f65 100644
--- a/src/calibre/gui2/tweak_book/spell.py
+++ b/src/calibre/gui2/tweak_book/spell.py
@@ -11,6 +11,7 @@
 from itertools import chain
 from threading import Thread
 
+import regex
 from qt.core import (
     QT_VERSION_STR,
     QAbstractItemView,
@@ -75,7 +76,7 @@
 )
 from calibre.spell.import_from import import_from_online, import_from_oxt
 from calibre.startup import connect_lambda
-from calibre.utils.icu import contains, primary_contains, primary_sort_key, sort_key
+from calibre.utils.icu import contains, primary_contains, primary_sort_key, sort_key, upper
 from calibre.utils.localization import calibre_langcode_to_name, canonicalize_lang, get_lang, get_language
 from calibre.utils.resources import get_path as P
 from calibre_extensions.progress_indicator import set_no_activate_on_click
@@ -726,6 +727,7 @@ class WordsModel(QAbstractTableModel):
     def __init__(self, parent=None):
         QAbstractTableModel.__init__(self, parent)
         self.counts = (0, 0)
+        self.all_caps = self.with_numbers = self.camel_case = self.snake_case = False
         self.words = {}  # Map of (word, locale) to location data for the word
         self.spell_map = {}  # Map of (word, locale) to dictionaries.recognized(word, locale)
         self.sort_on = (0, False)
@@ -734,6 +736,9 @@ def __init__(self, parent=None):
         self.show_only_misspelt = True
         self.headers = (_('Word'), _('Count'), _('Language'), _('Misspelled?'))
         self.alignments = Qt.AlignmentFlag.AlignLeft, Qt.AlignmentFlag.AlignRight, Qt.AlignmentFlag.AlignLeft, Qt.AlignmentFlag.AlignHCenter
+        self.num_pat = regex.compile(r'\d', flags=regex.UNICODE)
+        self.camel_case_pat = regex.compile(r'[a-z][A-Z]', flags=regex.UNICODE)
+        self.snake_case_pat = regex.compile(r'\w_\w', flags=regex.UNICODE)
 
     def rowCount(self, parent=QModelIndex()):
         return len(self.items)
@@ -794,8 +799,10 @@ def sort(self, column, order=Qt.SortOrder.AscendingOrder):
         self.do_sort()
         self.endResetModel()
 
-    def filter(self, filter_text):
+    def filter(self, filter_text, *, all_caps=False, with_numbers=False, camel_case=False, snake_case=False):
         self.filter_expression = filter_text or None
+        self.all_caps, self.with_numbers = all_caps, with_numbers
+        self.camel_case, self.snake_case = camel_case, snake_case
         self.beginResetModel()
         self.do_filter()
         self.do_sort()
@@ -839,7 +846,16 @@ def filter_item(self, x):
         if self.show_only_misspelt and self.spell_map[x]:
             return False
         func = contains if tprefs['spell_check_case_sensitive_search'] else primary_contains
-        if self.filter_expression is not None and not func(self.filter_expression, x[0]):
+        word = x[0]
+        if self.filter_expression is not None and not func(self.filter_expression, word):
+            return False
+        if self.all_caps and upper(word) == word:
+            return False
+        if self.with_numbers and self.num_pat.search(word) is not None:
+            return False
+        if self.camel_case and self.camel_case_pat.search(word) is not None:
+            return False
+        if self.snake_case and self.snake_case_pat.search(word) is not None:
             return False
         return True
 
@@ -1149,6 +1165,27 @@ def setup_ui(self):
         t.textChanged.connect(self.do_filter)
         t.setClearButtonEnabled(True)
         l.addWidget(t)
+        h = QHBoxLayout()
+        l.addLayout(h)
+        h.addWidget(QLabel(_('Also hide words:')))
+        any_hide_checked = False
+        def hw(name, title, tooltip):
+            nonlocal any_hide_checked
+            ac = QCheckBox(title)
+            pref_name = f'spell-check-hide-words-{name}'
+            ac.setObjectName(pref_name)
+            ac.setChecked(tprefs.get(pref_name, False))
+            if ac.isChecked():
+                any_hide_checked = True
+            ac.toggled.connect(self.hide_words_toggled)
+            ac.setToolTip(tooltip)
+            h.addWidget(ac)
+            return ac
+        self.all_caps = hw('all-caps', _('ALL CAPS'), _('Hide words with all capital letters'))
+        self.with_numbers = hw('with-numbers', _('with numbers'), _('Hide words that contain numbers'))
+        self.camel_case = hw('camel-case', _('camelCase'), _('Hide words in camelCase'))
+        self.snake_case = hw('snake-case', _('snake_case'), _('Hide words in snake_case'))
+        h.addStretch(10)
 
         m.h2 = h = QHBoxLayout()
         l.addLayout(h)
@@ -1252,6 +1289,14 @@ def button_action(sc, tt, button):
 
         self.action_change_word = button_action('ctrl+right', _('Change all occurrences of this word'), self.change_button)
         self.action_show_next_occurrence = button_action('alt+right', _('Show next occurrence of this word in the book'), self.next_occurrence)
+        if any_hide_checked:
+            QTimer.singleShot(0, self.do_filter)
+
+    def hide_words_toggled(self, checked):
+        cb = self.sender()
+        pref_name = cb.objectName()
+        tprefs.set(pref_name, checked)
+        self.do_filter()
 
     def next_word(self):
         v = self.suggested_list if self.focusWidget() is self.suggested_list else self.words_view
@@ -1465,7 +1510,9 @@ def __exit__(self, *args):
     def do_filter(self):
         text = str(self.filter_text.text()).strip()
         with self:
-            self.words_model.filter(text)
+            self.words_model.filter(
+                    text, all_caps=self.all_caps.isChecked(), with_numbers=self.with_numbers.isChecked(),
+                    camel_case=self.camel_case.isChecked(), snake_case=self.snake_case.isChecked())
 
     def refresh(self, change_request=None):
         if not self.isVisible():
diff --git a/src/calibre/headless/headless_integration.cpp b/src/calibre/headless/headless_integration.cpp
index e87bf2c4ca5a..77b6ab0503ef 100644
--- a/src/calibre/headless/headless_integration.cpp
+++ b/src/calibre/headless/headless_integration.cpp
@@ -127,12 +127,10 @@ HeadlessIntegration *HeadlessIntegration::instance()
     return static_cast<HeadlessIntegration *>(QGuiApplicationPrivate::platformIntegration());
 }
 
-static QString themeName() { return QStringLiteral("headless"); }
 
-QStringList HeadlessIntegration::themeNames() const
-{
-    return QStringList(themeName());
-}
+#define THEME_NAME "headless"
+
+QStringList HeadlessIntegration::themeNames() const { return QStringList(THEME_NAME); }
 
 // Restrict the styles to "fusion" to prevent native styles requiring native
 // window handles (eg Windows Vista style) from being used.
@@ -155,7 +153,7 @@ class HeadlessTheme : public QPlatformTheme
 
 QPlatformTheme *HeadlessIntegration::createPlatformTheme(const QString &name) const
 {
-    return name == themeName() ? new HeadlessTheme() : nullptr;
+    return name == THEME_NAME ? new HeadlessTheme() : nullptr;
 }
 
 QT_END_NAMESPACE
diff --git a/src/calibre/utils/exim.py b/src/calibre/utils/exim.py
index 21413045cfda..4e12dde715ea 100644
--- a/src/calibre/utils/exim.py
+++ b/src/calibre/utils/exim.py
@@ -4,6 +4,7 @@
 
 import errno
 import hashlib
+import io
 import json
 import os
 import shutil
@@ -13,6 +14,7 @@
 import time
 import uuid
 from collections import Counter
+from typing import NamedTuple
 
 from calibre import prints
 from calibre.constants import config_dir, filesystem_encoding, iswindows
@@ -25,47 +27,33 @@
 
 # Export {{{
 
-
-def send_file(from_obj, to_obj, chunksize=1<<20):
-    m = hashlib.sha1()
-    while True:
-        raw = from_obj.read(chunksize)
-        if not raw:
-            break
-        m.update(raw)
-        to_obj.write(raw)
-    return str(m.hexdigest())
-
-
 class FileDest:
 
     def __init__(self, key, exporter, mtime=None):
         self.exporter, self.key = exporter, key
         self.hasher = hashlib.sha1()
-        self.start_pos = exporter.f.tell()
+        self.start_part_number, self.start_pos = exporter.current_pos()
         self._discard = False
-        self.mtime = None
+        self.mtime = mtime
+        self.size = 0
 
     def discard(self):
         self._discard = True
 
-    def ensure_space(self, size):
-        if size > 0:
-            self.exporter.ensure_space(size)
-            self.start_pos = self.exporter.f.tell()
-
     def write(self, data):
+        self.size += len(data)
+        written = self.exporter.write(data)
+        if len(data) != written:
+            raise RuntimeError(f'Exporter failed to write all data: {len(data)} != {written}')
         self.hasher.update(data)
-        self.exporter.f.write(data)
 
     def flush(self):
         pass
 
     def close(self):
         if not self._discard:
-            size = self.exporter.f.tell() - self.start_pos
             digest = str(self.hasher.hexdigest())
-            self.exporter.file_metadata[self.key] = (len(self.exporter.parts), self.start_pos, size, digest, self.mtime)
+            self.exporter.file_metadata[self.key] = (self.start_part_number, self.start_pos, self.size, digest, self.mtime)
         del self.exporter, self.hasher
 
     def __enter__(self):
@@ -77,17 +65,23 @@ def __exit__(self, *args):
 
 class Exporter:
 
-    VERSION = 0
+    VERSION = 1
     TAIL_FMT = b'!II?'  # part_num, version, is_last
     MDATA_SZ_FMT = b'!Q'
     EXT = '.calibre-data'
 
-    def __init__(self, path_to_export_dir, part_size=(1 << 30)):
-        self.part_size = part_size
+    @classmethod
+    def tail_size(cls):
+        return struct.calcsize(cls.TAIL_FMT)
+
+    def __init__(self, path_to_export_dir, part_size=None):
+        # default part_size is 1 GB
+        self.part_size = (1 << 30) if part_size is None else part_size
         self.base = os.path.abspath(path_to_export_dir)
-        self.parts = []
-        self.new_part()
+        self.commited_parts = []
+        self.current_part = None
         self.file_metadata = {}
+        self.tail_sz = self.tail_size()
         self.metadata = {'file_metadata': self.file_metadata}
 
     def set_metadata(self, key, val):
@@ -95,47 +89,61 @@ def set_metadata(self, key, val):
             raise KeyError('The metadata already contains the key: %s' % key)
         self.metadata[key] = val
 
-    @property
-    def f(self):
-        return self.parts[-1]
+    def current_pos(self):
+        pos = 0
+        if self.current_part is not None:
+            pos = self.current_part.tell()
+            if pos >= self.part_size - self.tail_sz:
+                self.new_part()
+                pos = 0
+        return len(self.commited_parts) + 1, pos
+
+    def write(self, data: bytes) -> int:
+        written = 0
+        data = memoryview(data)
+        while len(data) > 0:
+            if self.current_part is None:
+                self.new_part()
+            max_size = self.part_size - self.tail_sz - self.current_part.tell()
+            if max_size <= 0:
+                self.new_part()
+                max_size = self.part_size - self.tail_sz
+            chunk = data[:max_size]
+            w = self.current_part.write(chunk)
+            data = data[w:]
+            written += w
+        return written
 
     def new_part(self):
-        self.parts.append(open(os.path.join(
-            self.base, f'part-{len(self.parts) + 1:04d}{self.EXT}'), 'wb'))
+        self.commit_part()
+        self.current_part = open(os.path.join(
+            self.base, f'part-{len(self.commited_parts) + 1:04d}{self.EXT}'), 'wb')
 
     def commit_part(self, is_last=False):
-        self.f.write(struct.pack(self.TAIL_FMT, len(self.parts), self.VERSION, is_last))
-        self.f.close()
-        self.parts[-1] = self.f.name
-
-    def ensure_space(self, size):
-        try:
-            if size + self.f.tell() < self.part_size:
-                return
-        except AttributeError:
-            raise RuntimeError('This exporter has already been committed, cannot add to it')
-        self.commit_part()
-        self.new_part()
+        if self.current_part is not None:
+            self.current_part.write(struct.pack(self.TAIL_FMT, len(self.commited_parts) + 1, self.VERSION, is_last))
+            self.current_part.close()
+            self.commited_parts.append(self.current_part.name)
+            self.current_part = None
 
     def commit(self):
         raw = json.dumps(self.metadata, ensure_ascii=False)
         if not isinstance(raw, bytes):
             raw = raw.encode('utf-8')
-        self.ensure_space(len(raw))
-        self.f.write(raw)
-        self.f.write(struct.pack(self.MDATA_SZ_FMT, len(raw)))
+        self.new_part()
+        orig, self.part_size = self.part_size, sys.maxsize
+        self.write(raw)
+        self.write(struct.pack(self.MDATA_SZ_FMT, len(raw)))
+        self.part_size = orig
         self.commit_part(is_last=True)
 
     def add_file(self, fileobj, key):
-        fileobj.seek(0, os.SEEK_END)
-        size = fileobj.tell()
-        fileobj.seek(0)
-        self.ensure_space(size)
-        pos = self.f.tell()
-        digest = send_file(fileobj, self.f)
-        size = self.f.tell() - pos
-        mtime = os.fstat(fileobj.fileno()).st_mtime
-        self.file_metadata[key] = (len(self.parts), pos, size, digest, mtime)
+        try:
+            mtime = os.fstat(fileobj.fileno()).st_mtime
+        except (io.UnsupportedOperation, OSError):
+            mtime = None
+        with self.start_file(key, mtime=mtime) as dest:
+            shutil.copyfileobj(fileobj, dest)
 
     def start_file(self, key, mtime=None):
         return FileDest(key, self, mtime=mtime)
@@ -217,47 +225,135 @@ def export(destdir, library_paths=None, dbmap=None, progress1=None, progress2=No
 
 # Import {{{
 
+class Chunk(NamedTuple):
+    part_num: int
+    pos_in_part: int
+    size: int
+    pos_in_file: int
+
+
+class Pos:
+
+    def __init__(self, part, pos_in_part, size, importer):
+        self.size = size
+        self.pos_in_file = 0
+        self.chunks = chunks = []
+        self.open_part = importer.open_part
+        self.currently_open_part = None
+        self.currently_open_chunk_index = -1
+
+        pos = 0
+        while size > 0:
+            part_size = importer.size_of_part(part)
+            chunk_size = min(size, part_size - pos_in_part)
+            if chunk_size > 0:
+                chunks.append(Chunk(part, pos_in_part, chunk_size, pos))
+                size -= chunk_size
+                pos += chunk_size
+            part += 1
+            pos_in_part = 0
+
+    def close(self):
+        if self.currently_open_part is not None:
+            self.currently_open_part.close()
+            self.currently_open_part = None
+        self.currently_open_chunk_index = -1
+
+    def tell(self) -> int:
+        return self.pos_in_file
+
+    def seek(self, amt, whence=os.SEEK_SET) -> int:
+        if whence == os.SEEK_SET:
+            new_pos_in_file = amt
+        if whence == os.SEEK_END:
+            new_pos_in_file = self.size + amt
+        if whence == os.SEEK_CUR:
+            new_pos_in_file = self.pos_in_file + amt
+        self.pos_in_file = max(0, min(new_pos_in_file, self.size))
+        return self.pos_in_file
+
+    def read(self, size=None):
+        if size is None or size < 0:
+            size = self.size
+        size = min(size, self.size)
+        amt_left = max(0, self.size - self.pos_in_file)
+        amt_to_read = min(amt_left, size)
+        if amt_to_read <= 0:
+            return b''
+        start_chunk = max(0, self.currently_open_chunk_index)
+        num = len(self.chunks)
+        ans = []
+        chunk_idx = -1
+        for i in range(num):
+            chunk_idx = (start_chunk + i) % num
+            chunk = self.chunks[chunk_idx]
+            if chunk.pos_in_file <= self.pos_in_file < chunk.pos_in_file + chunk.size:
+                break
+        else:
+            raise ValueError(f'No chunk found containing {self.pos_in_file=}')
+
+        while amt_to_read > 0:
+            try:
+                chunk = self.chunks[chunk_idx]
+            except IndexError:
+                break
+            ans.append(self._read_chunk(chunk, amt_to_read, chunk_idx))
+            amt_to_read -= len(ans[-1])
+            chunk_idx += 1
+        return b''.join(ans)
+
+    def _read_chunk(self, chunk, size, chunk_idx):
+        if self.currently_open_chunk_index != chunk_idx or self.currently_open_part is None:
+            self.close()
+            self.currently_open_part = self.open_part(chunk.part_num)
+            self.currently_open_chunk_index = chunk_idx
+        offset_from_start_of_chunk = self.pos_in_file - chunk.pos_in_file
+        self.currently_open_part.seek(chunk.pos_in_part + offset_from_start_of_chunk, os.SEEK_SET)
+        size = min(size, chunk.size - offset_from_start_of_chunk)
+        ans = self.currently_open_part.read(size)
+        self.pos_in_file += len(ans)
+        return ans
+
 
 class FileSource:
 
-    def __init__(self, f, size, digest, description, mtime, importer):
-        self.f, self.size, self.digest, self.description = f, size, digest, description
-        self.seekable = self.f.seekable
+    def __init__(self, start_partnum, start_pos, size, digest, description, mtime, importer):
+        self.size, self.digest, self.description = size, digest, description
         self.mtime = mtime
-        self.start = f.tell()
-        self.end = self.start + size
+        self.start = start_pos
+        self.start_partnum = start_partnum
+        self.pos = Pos(start_partnum, start_pos, size, importer)
         self.hasher = hashlib.sha1()
         self.importer = importer
         self.check_hash = True
 
+    def seekable(self):
+        return False
+
     def seek(self, amt, whence=os.SEEK_SET):
-        if whence == os.SEEK_SET:
-            return self.f.seek(self.start + amt, os.SEEK_SET)
-        if whence == os.SEEK_END:
-            return self.f.seek(self.end + amt, os.SEEK_SET)
-        if whence == os.SEEK_CUR:
-            return self.f.seek(amt, whence)
+        return self.pos.seek(amt, whence)
 
     def tell(self):
-        return self.f.tell() - self.start
+        return self.pos.tell()
 
     def read(self, size=None):
-        if size is not None and size < 1:
-            return b''
-        left = self.end - self.f.tell()
-        amt = min(left, size or left)
-        if amt < 1:
-            return b''
-        ans = self.f.read(amt)
-        if self.check_hash:
+        ans = self.pos.read(size)
+        if self.check_hash and ans:
             self.hasher.update(ans)
         return ans
 
     def close(self):
         if self.check_hash and self.hasher.hexdigest() != self.digest:
             self.importer.corrupted_files.append(self.description)
-        self.f.close()
-        self.hasher = self.f = None
+        self.hasher = None
+        self.pos.close()
+        self.pos = None
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *a):
+        self.close()
 
 
 class Importer:
@@ -265,11 +361,14 @@ class Importer:
     def __init__(self, path_to_export_dir):
         self.corrupted_files = []
         part_map = {}
-        tail_size = struct.calcsize(Exporter.TAIL_FMT)
+        self.tail_size = tail_size = struct.calcsize(Exporter.TAIL_FMT)
+        self.version = -1
         for name in os.listdir(path_to_export_dir):
             if name.lower().endswith(Exporter.EXT):
                 path = os.path.join(path_to_export_dir, name)
                 with open(path, 'rb') as f:
+                    f.seek(0, os.SEEK_END)
+                    size_of_part = f.tell()
                     f.seek(-tail_size, os.SEEK_END)
                     raw = f.read()
                 if len(raw) != tail_size:
@@ -279,7 +378,11 @@ def __init__(self, path_to_export_dir):
                     raise ValueError('The exported data in %s is not valid,'
                                      ' version (%d) is higher than maximum supported version.'
                                      ' You might need to upgrade calibre first.' % (name, version))
-                part_map[part_num] =  path, is_last
+                part_map[part_num] = path, is_last, size_of_part
+                if self.version == -1:
+                    self.version = version
+                if version != self.version:
+                    raise ValueError(f'The exported data in {name} is not valid as it contains a mix of parts with versions: {self.version} and {version}')
         nums = sorted(part_map)
         if not nums:
             raise ValueError('No exported data found in: %s' % path_to_export_dir)
@@ -289,37 +392,44 @@ def __init__(self, path_to_export_dir):
             raise ValueError('The last part of this exported data set is missing')
         if len(nums) != nums[-1]:
             raise ValueError('There are some parts of the exported data set missing')
-        self.part_map = {num:path for num, (path, is_last) in iteritems(part_map)}
+        self.part_map, self.part_size_map = {}, {}
+        for part_num, (path, is_last, size_of_part) in part_map.items():
+            self.part_map[part_num] = path
+            self.part_size_map[part_num] = size_of_part
         msf = struct.calcsize(Exporter.MDATA_SZ_FMT)
         offset = tail_size + msf
-        with self.part(nums[-1]) as f:
+        with self.open_part(nums[-1]) as f:
             f.seek(-offset, os.SEEK_END)
             sz, = struct.unpack(Exporter.MDATA_SZ_FMT, f.read(msf))
             f.seek(- sz - offset, os.SEEK_END)
             self.metadata = json.loads(f.read(sz))
             self.file_metadata = self.metadata['file_metadata']
 
-    def part(self, num):
+    def size_of_part(self, num):
+        return self.part_size_map[num] - self.tail_size
+
+    def open_part(self, num):
         return open(self.part_map[num], 'rb')
 
     def start_file(self, key, description):
         partnum, pos, size, digest, mtime = self.file_metadata[key]
-        f = self.part(partnum)
-        f.seek(pos)
-        return FileSource(f, size, digest, description, mtime, self)
+        return FileSource(partnum, pos, size, digest, description, mtime, self)
+
+    def save_file(self, key, description, output_path):
+        with open(output_path, 'wb') as dest, self.start_file(key, description) as src:
+            shutil.copyfileobj(src, dest)
 
     def export_config(self, base_dir, library_usage_stats):
         for key, relpath in self.metadata['config_dir']:
-            f = self.start_file(key, relpath)
-            path = os.path.join(base_dir, relpath.replace('/', os.sep))
-            try:
-                with open(path, 'wb') as dest:
-                    shutil.copyfileobj(f, dest)
-            except OSError:
-                os.makedirs(os.path.dirname(path))
-                with open(path, 'wb') as dest:
-                    shutil.copyfileobj(f, dest)
-            f.close()
+            with self.start_file(key, relpath) as f:
+                path = os.path.join(base_dir, relpath.replace('/', os.sep))
+                try:
+                    with open(path, 'wb') as dest:
+                        shutil.copyfileobj(f, dest)
+                except OSError:
+                    os.makedirs(os.path.dirname(path))
+                    with open(path, 'wb') as dest:
+                        shutil.copyfileobj(f, dest)
         gpath = os.path.join(base_dir, 'global.py')
         try:
             with open(gpath, 'rb') as f:
diff --git a/src/calibre/utils/hyphenation/dictionaries.py b/src/calibre/utils/hyphenation/dictionaries.py
index 000f0b78258b..92f011ec57bd 100644
--- a/src/calibre/utils/hyphenation/dictionaries.py
+++ b/src/calibre/utils/hyphenation/dictionaries.py
@@ -72,7 +72,10 @@ def extract_dicts(cache_path):
             buf.seek(0)
             tf = tarfile.TarFile(fileobj=buf)
         with tf:
-            tf.extractall(tdir)
+            try:
+                tf.extractall(tdir, filter='data')
+            except TypeError:
+                tf.extractall(tdir)
         with open(os.path.join(tdir, 'sha1sum'), 'wb') as f:
             f.write(expected_hash())
         dest = os.path.join(cache_path, 'f')
diff --git a/src/calibre/utils/icu.c b/src/calibre/utils/icu.c
index c67449018e3a..58053bb520b4 100644
--- a/src/calibre/utils/icu.c
+++ b/src/calibre/utils/icu.c
@@ -1231,9 +1231,16 @@ icu_set_filesystem_encoding(PyObject *self, PyObject *args) {
     char *encoding;
     if (!PyArg_ParseTuple(args, "s:setfilesystemencoding", &encoding))
         return NULL;
+#if PY_VERSION_HEX < 0x03012000
+    // The nitwits at Python deprecated this in 3.12 claiming we should use
+    // PyConfig.filesystem_encoding instead. But that can only be used if we
+    // control the interpreter, which we do not in Linux distro builds. Sigh.
+    // Well, if this causes issues we just continue to tell people not to use
+    // Linux distro builds. On frozen aka non-distro builds we set
+    // PyPreConfig.utf8_mode = 1 which supposedly sets this to utf-8 anyway.
     Py_FileSystemDefaultEncoding = strdup(encoding);
+#endif
     Py_RETURN_NONE;
-
 }
 // }}}
 
diff --git a/src/calibre/web/fetch/simple.py b/src/calibre/web/fetch/simple.py
index 044a0402e59a..7835cdfc73f8 100644
--- a/src/calibre/web/fetch/simple.py
+++ b/src/calibre/web/fetch/simple.py
@@ -17,6 +17,7 @@
 import threading
 import time
 import traceback
+from base64 import standard_b64decode
 from urllib.request import urlopen
 
 from calibre import browser, relpath, unicode_path
@@ -248,13 +249,18 @@ def fetch_url(self, url):
             ans = response(q)
             ans.newurl = url
             return ans
-        self.log.debug('Fetching', url)
         st = time.monotonic()
 
+        is_data_url = url.startswith('data:')
+        if not is_data_url:
+            self.log.debug('Fetching', url)
         # Check for a URL pointing to the local filesystem and special case it
         # for efficiency and robustness. Bypasses delay checking as it does not
         # apply to local fetches. Ensures that unicode paths that are not
         # representable in the filesystem_encoding work.
+        if is_data_url:
+            payload = url.partition(',')[2]
+            return standard_b64decode(payload)
         is_local = 0
         if url.startswith('file://'):
             is_local = 7
diff --git a/src/calibre/web/site_parsers/nytimes.py b/src/calibre/web/site_parsers/nytimes.py
index c78e3edc08cf..919cabe61a86 100644
--- a/src/calibre/web/site_parsers/nytimes.py
+++ b/src/calibre/web/site_parsers/nytimes.py
@@ -9,7 +9,7 @@
 
 from calibre.utils.iso8601 import parse_iso8601
 
-module_version = 4  # needed for live updates
+module_version = 5  # needed for live updates
 pprint
 
 
@@ -185,15 +185,12 @@ def extract_html(soup):
     return json_to_html(raw)
 
 
-def download_url(url=None, br=None):
-    # Get the URL from the Wayback machine
+def download_url_from_wayback(category, url, br=None):
     from mechanize import Request
     host = 'http://localhost:8090'
     host = 'https://wayback1.calibre-ebook.com'
-    if url is None:
-        url = sys.argv[-1]
     rq = Request(
-        host + '/nytimes',
+        host + '/' + category,
         data=json.dumps({"url": url}),
         headers={'User-Agent': 'calibre', 'Content-Type': 'application/json'}
     )
@@ -204,6 +201,13 @@ def download_url(url=None, br=None):
     return br.open_novisit(rq, timeout=3 * 60).read()
 
 
+def download_url(url=None, br=None):
+    # Get the URL from the Wayback machine
+    if url is None:
+        url = sys.argv[-1]
+    return download_url_from_wayback('nytimes', url, br)
+
+
 if __name__ == '__main__':
     f = sys.argv[-1]
     raw = open(f).read()