Skip to content

Commit

Permalink
Merge pull request #3653 from rebeccacremona/remove-perma-capture-eng…
Browse files Browse the repository at this point in the history
…ine-checks

Remove references to the Perma capture engine.
  • Loading branch information
rebeccacremona authored Nov 13, 2024
2 parents 64d5b83 + e73b98f commit 7230e0e
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 34 deletions.
27 changes: 5 additions & 22 deletions perma_web/api/tests/test_link_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def setUp(self):
def assertRecordsInWarc(self, link, upload=False, expected_records=None, check_screenshot=False, check_provenance_summary=False):

def find_recording_in_warc(index, capture_url, content_type):
warc_content_type = f"application/http;{ '' if settings.CAPTURE_ENGINE == 'perma' else ' '}msgtype=response"
warc_content_type = "application/http; msgtype=response"
return next(
(entry for entry in index if
entry['content-type'] == warc_content_type and
Expand Down Expand Up @@ -141,10 +141,7 @@ def find_attachment_in_warc(index, capture_url):
if check_screenshot:
self.assertEqual(link.screenshot_capture.status, 'success')
self.assertTrue(link.screenshot_capture.content_type, "Capture is missing a content type.")
if settings.CAPTURE_ENGINE == 'perma':
self.assertTrue(find_file_in_warc(index, link.screenshot_capture.url, link.screenshot_capture.content_type))
else:
self.assertTrue(find_attachment_in_warc(index, link.screenshot_capture.url))
self.assertTrue(find_attachment_in_warc(index, link.screenshot_capture.url))

# repeat for the provenance summary
if check_provenance_summary:
Expand Down Expand Up @@ -404,21 +401,12 @@ def test_should_create_archive_from_html_url(self):
user=self.org_user)

link = Link.objects.get(guid=obj['guid'])
self.assertRecordsInWarc(link, check_screenshot=True, check_provenance_summary=(settings.CAPTURE_ENGINE == 'scoop-api'))
self.assertRecordsInWarc(link, check_screenshot=True, check_provenance_summary=True)
self.assertTrue(link.primary_capture.content_type.startswith('text/html'))

if settings.CAPTURE_ENGINE == 'perma':
# test favicon captured via meta tag
self.assertIn("favicon_meta.ico", link.favicon_capture.url)

self.assertFalse(link.is_private)
self.assertEqual(link.submitted_title, "Test title.")
self.assertEqual(link.submitted_description, "Test description.")
if settings.CAPTURE_ENGINE == 'perma':
software_pattern = '^perma$'
else:
software_pattern = r'scoop @ harvard library innovation lab: \d+\.\d+.\d+'
self.assertRegex(link.captured_by_software, software_pattern)
self.assertRegex(link.captured_by_software, r'scoop @ harvard library innovation lab: \d+\.\d+.\d+')
expected_size = 15340
self.assertLessEqual(abs(link.warc_size-expected_size), 100)

Expand All @@ -437,7 +425,7 @@ def test_should_create_archive_from_pdf_url(self, allowed):
user=self.org_user)

link = Link.objects.get(guid=obj['guid'])
self.assertRecordsInWarc(link, check_provenance_summary=(settings.CAPTURE_ENGINE == 'scoop-api'))
self.assertRecordsInWarc(link, check_provenance_summary=True)
self.assertEqual(link.primary_capture.content_type, 'application/pdf')

# check folder
Expand Down Expand Up @@ -608,11 +596,6 @@ def test_media_capture_in_iframes(self):
("test1.jpg", "image/jpeg"), ("test2.png", "image/png"),
("wide1.png", "image/png"), ("wide2.png", "image/png"), ("narrow.png", "image/png")
]
if settings.CAPTURE_ENGINE == 'perma':
expected_records = expected_records + [
("test.swf", "application/vnd.adobe.flash.movie"), ("test2.swf", "application/vnd.adobe.flash.movie"), ("test3.swf", "application/vnd.adobe.flash.movie"),
("test_fallback.jpg", "image/jpeg"),
]
link = Link.objects.get(guid=obj['guid'])
self.assertRecordsInWarc(link, expected_records=expected_records)

Expand Down
12 changes: 0 additions & 12 deletions perma_web/api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -527,18 +527,6 @@ def post(self, request, format=None):
url=link.ascii_safe_url,
).save()

# create screenshot placeholder
if settings.CAPTURE_ENGINE == 'perma':
Capture(
link=link,
role='screenshot',
status='pending',
record_type='resource',
url=f"file:///{link.guid}/cap.png",
content_type='image/png',
).save()


# kick off capture tasks -- no need for guid since it'll work through the queue
capture_job.status = 'pending'
capture_job.link = link
Expand Down

0 comments on commit 7230e0e

Please sign in to comment.