Create snapshots on web.archive.org for bookmarks (#150)

* Implement initial background tasks concept

* fix property reference

* update requirements.txt

* simplify bookmark null check

* improve web archive url display

* add background tasks test

* add basic supervisor setup

* schedule missing snapshot creation on login

* remove task locks and clear task history before starting background task processor

* batch create snapshots after import

* fix script reference in supervisord.conf

* add option to disable background tasks

* restructure feature overview
This commit is contained in:
Sascha Ißbrücker
2021-09-04 22:31:04 +02:00
committed by GitHub
parent 8d214649b7
commit d87dde6bae
27 changed files with 470 additions and 19 deletions

View File

@@ -28,6 +28,7 @@ class BookmarkFactoryMixin:
description: str = '',
website_title: str = '',
website_description: str = '',
web_archive_snapshot_url: str = '',
):
if tags is None:
tags = []
@@ -45,7 +46,8 @@ class BookmarkFactoryMixin:
date_added=timezone.now(),
date_modified=timezone.now(),
owner=user,
is_archived=is_archived
is_archived=is_archived,
web_archive_snapshot_url=web_archive_snapshot_url,
)
bookmark.save()
for tag in tags:

View File

@@ -24,9 +24,10 @@ class BookmarkListTagTest(TestCase, BookmarkFactoryMixin):
)
return template_to_render.render(context)
def setup_date_format_test(self, date_display_setting):
def setup_date_format_test(self, date_display_setting: str, web_archive_url: str = ''):
bookmark = self.setup_bookmark()
bookmark.date_added = timezone.now() - relativedelta(days=8)
bookmark.web_archive_snapshot_url = web_archive_url
bookmark.save()
user = self.get_or_create_test_user()
user.profile.bookmark_date_display = date_display_setting
@@ -39,7 +40,27 @@ class BookmarkListTagTest(TestCase, BookmarkFactoryMixin):
formatted_date = formats.date_format(bookmark.date_added, 'SHORT_DATE_FORMAT')
self.assertInHTML(f'''
<span class="text-gray text-sm">{formatted_date}</span>
<span class="date-label text-gray text-sm">
<span>{formatted_date}</span>
</span>
<span class="text-gray text-sm">|</span>
''', html)
def test_should_render_web_archive_link_with_absolute_date_setting(self):
bookmark = self.setup_date_format_test(UserProfile.BOOKMARK_DATE_DISPLAY_ABSOLUTE,
'https://web.archive.org/web/20210811214511/https://wanikani.com/')
html = self.render_template([bookmark])
formatted_date = formats.date_format(bookmark.date_added, 'SHORT_DATE_FORMAT')
self.assertInHTML(f'''
<span class="date-label text-gray text-sm">
<a href="{bookmark.web_archive_snapshot_url}"
title="Show snapshot on web archive" target="_blank" rel="noopener">
<span>{formatted_date}</span>
<span>∞</span>
</a>
</span>
<span class="text-gray text-sm">|</span>
''', html)
def test_should_respect_relative_date_setting(self):
@@ -47,5 +68,23 @@ class BookmarkListTagTest(TestCase, BookmarkFactoryMixin):
html = self.render_template([bookmark])
self.assertInHTML('''
<span class="text-gray text-sm">1 week ago</span>
<span class="date-label text-gray text-sm">
<span>1 week ago</span>
</span>
<span class="text-gray text-sm">|</span>
''', html)
def test_should_render_web_archive_link_with_relative_date_setting(self):
bookmark = self.setup_date_format_test(UserProfile.BOOKMARK_DATE_DISPLAY_RELATIVE,
'https://web.archive.org/web/20210811214511/https://wanikani.com/')
html = self.render_template([bookmark])
self.assertInHTML(f'''
<span class="date-label text-gray text-sm">
<a href="{bookmark.web_archive_snapshot_url}"
title="Show snapshot on web archive" target="_blank" rel="noopener">
<span>1 week ago</span>
<span>∞</span>
</a>
</span>
<span class="text-gray text-sm">|</span>
''', html)

View File

@@ -1,11 +1,14 @@
from unittest.mock import patch
from django.contrib.auth import get_user_model
from django.test import TestCase
from django.utils import timezone
from bookmarks.models import Bookmark, Tag
from bookmarks.services.bookmarks import archive_bookmark, archive_bookmarks, unarchive_bookmark, unarchive_bookmarks, \
delete_bookmarks, tag_bookmarks, untag_bookmarks
from bookmarks.services.bookmarks import create_bookmark, update_bookmark, archive_bookmark, archive_bookmarks, \
unarchive_bookmark, unarchive_bookmarks, delete_bookmarks, tag_bookmarks, untag_bookmarks
from bookmarks.tests.helpers import BookmarkFactoryMixin
from bookmarks.services import tasks
User = get_user_model()
@@ -13,7 +16,30 @@ User = get_user_model()
class BookmarkServiceTestCase(TestCase, BookmarkFactoryMixin):
def setUp(self) -> None:
self.user = User.objects.create_user('testuser', 'test@example.com', 'password123')
self.get_or_create_test_user()
def test_create_should_create_web_archive_snapshot(self):
with patch.object(tasks, 'create_web_archive_snapshot') as mock_create_web_archive_snapshot:
bookmark_data = Bookmark(url='https://example.com')
bookmark = create_bookmark(bookmark_data, 'tag1 tag2', self.user)
mock_create_web_archive_snapshot.assert_called_once_with(bookmark.id, False)
def test_update_should_create_web_archive_snapshot_if_url_did_change(self):
with patch.object(tasks, 'create_web_archive_snapshot') as mock_create_web_archive_snapshot:
bookmark = self.setup_bookmark()
bookmark.url = 'https://example.com/updated'
update_bookmark(bookmark, 'tag1 tag2', self.user)
mock_create_web_archive_snapshot.assert_called_once_with(bookmark.id, True)
def test_update_should_not_create_web_archive_snapshot_if_url_did_not_change(self):
with patch.object(tasks, 'create_web_archive_snapshot') as mock_create_web_archive_snapshot:
bookmark = self.setup_bookmark()
bookmark.title = 'updated title'
update_bookmark(bookmark, 'tag1 tag2', self.user)
mock_create_web_archive_snapshot.assert_not_called()
def test_archive_bookmark(self):
bookmark = Bookmark(

View File

@@ -0,0 +1,154 @@
from unittest.mock import patch
import waybackpy
from background_task.models import Task
from django.contrib.auth.models import User
from django.test import TestCase, override_settings
from bookmarks.models import Bookmark
from bookmarks.services.tasks import create_web_archive_snapshot, schedule_bookmarks_without_snapshots
from bookmarks.tests.helpers import BookmarkFactoryMixin, disable_logging
class MockWaybackUrl:
def __init__(self, archive_url: str):
self.archive_url = archive_url
def save(self):
return self
class MockWaybackUrlWithSaveError:
def save(self):
raise NotImplementedError
class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
@disable_logging
def run_pending_task(self, task_function):
func = getattr(task_function, 'task_function', None)
task = Task.objects.all()[0]
args, kwargs = task.params()
func(*args, **kwargs)
task.delete()
@disable_logging
def run_all_pending_tasks(self, task_function):
func = getattr(task_function, 'task_function', None)
tasks = Task.objects.all()
for task in tasks:
args, kwargs = task.params()
func(*args, **kwargs)
task.delete()
def test_create_web_archive_snapshot_should_update_snapshot_url(self):
bookmark = self.setup_bookmark()
with patch.object(waybackpy, 'Url', return_value=MockWaybackUrl('https://example.com')):
create_web_archive_snapshot(bookmark.id, False)
self.run_pending_task(create_web_archive_snapshot)
bookmark.refresh_from_db()
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com')
def test_create_web_archive_snapshot_should_handle_missing_bookmark_id(self):
with patch.object(waybackpy, 'Url', return_value=MockWaybackUrl('https://example.com')) as mock_wayback_url:
create_web_archive_snapshot(123, False)
self.run_pending_task(create_web_archive_snapshot)
mock_wayback_url.assert_not_called()
def test_create_web_archive_snapshot_should_handle_wayback_save_error(self):
bookmark = self.setup_bookmark()
with patch.object(waybackpy, 'Url',
return_value=MockWaybackUrlWithSaveError()):
with self.assertRaises(NotImplementedError):
create_web_archive_snapshot(bookmark.id, False)
self.run_pending_task(create_web_archive_snapshot)
def test_create_web_archive_snapshot_should_skip_if_snapshot_exists(self):
bookmark = self.setup_bookmark(web_archive_snapshot_url='https://example.com')
with patch.object(waybackpy, 'Url', return_value=MockWaybackUrl('https://other.com')):
create_web_archive_snapshot(bookmark.id, False)
self.run_pending_task(create_web_archive_snapshot)
bookmark.refresh_from_db()
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com')
def test_create_web_archive_snapshot_should_force_update_snapshot(self):
bookmark = self.setup_bookmark(web_archive_snapshot_url='https://example.com')
with patch.object(waybackpy, 'Url', return_value=MockWaybackUrl('https://other.com')):
create_web_archive_snapshot(bookmark.id, True)
self.run_pending_task(create_web_archive_snapshot)
bookmark.refresh_from_db()
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://other.com')
@override_settings(LD_DISABLE_BACKGROUND_TASKS=True)
def test_create_web_archive_snapshot_should_not_run_when_background_tasks_are_disabled(self):
bookmark = self.setup_bookmark()
create_web_archive_snapshot(bookmark.id, False)
self.assertEqual(Task.objects.count(), 0)
def test_schedule_bookmarks_without_snapshots_should_create_snapshot_task_for_all_bookmarks_without_snapshot(self):
user = self.get_or_create_test_user()
self.setup_bookmark()
self.setup_bookmark()
self.setup_bookmark()
with patch.object(waybackpy, 'Url', return_value=MockWaybackUrl('https://example.com')):
schedule_bookmarks_without_snapshots(user.id)
self.run_pending_task(schedule_bookmarks_without_snapshots)
self.run_all_pending_tasks(create_web_archive_snapshot)
for bookmark in Bookmark.objects.all():
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com')
def test_schedule_bookmarks_without_snapshots_should_not_update_bookmarks_with_existing_snapshot(self):
user = self.get_or_create_test_user()
self.setup_bookmark(web_archive_snapshot_url='https://example.com')
self.setup_bookmark(web_archive_snapshot_url='https://example.com')
self.setup_bookmark(web_archive_snapshot_url='https://example.com')
with patch.object(waybackpy, 'Url', return_value=MockWaybackUrl('https://other.com')):
schedule_bookmarks_without_snapshots(user.id)
self.run_pending_task(schedule_bookmarks_without_snapshots)
self.run_all_pending_tasks(create_web_archive_snapshot)
for bookmark in Bookmark.objects.all():
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com')
def test_schedule_bookmarks_without_snapshots_should_only_update_user_owned_bookmarks(self):
user = self.get_or_create_test_user()
other_user = User.objects.create_user('otheruser', 'otheruser@example.com', 'password123')
self.setup_bookmark()
self.setup_bookmark()
self.setup_bookmark()
self.setup_bookmark(user=other_user)
self.setup_bookmark(user=other_user)
self.setup_bookmark(user=other_user)
with patch.object(waybackpy, 'Url', return_value=MockWaybackUrl('https://example.com')):
schedule_bookmarks_without_snapshots(user.id)
self.run_pending_task(schedule_bookmarks_without_snapshots)
self.run_all_pending_tasks(create_web_archive_snapshot)
for bookmark in Bookmark.objects.all().filter(owner=user):
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com')
for bookmark in Bookmark.objects.all().filter(owner=other_user):
self.assertEqual(bookmark.web_archive_snapshot_url, '')
@override_settings(LD_DISABLE_BACKGROUND_TASKS=True)
def test_schedule_bookmarks_without_snapshots_should_not_run_when_background_tasks_are_disabled(self):
user = self.get_or_create_test_user()
schedule_bookmarks_without_snapshots(user.id)
self.assertEqual(Task.objects.count(), 0)

View File

@@ -1,5 +1,8 @@
from unittest.mock import patch
from django.test import TestCase
from bookmarks.services import tasks
from bookmarks.services.importer import import_netscape_html
from bookmarks.tests.helpers import BookmarkFactoryMixin, disable_logging
@@ -31,3 +34,12 @@ class ImporterTestCase(TestCase, BookmarkFactoryMixin):
import_result = import_netscape_html(test_html, self.get_or_create_test_user())
self.assertEqual(import_result.success, 0)
def test_schedule_snapshot_creation(self):
user = self.get_or_create_test_user()
test_html = self.create_import_html('')
with patch.object(tasks, 'schedule_bookmarks_without_snapshots') as mock_schedule_bookmarks_without_snapshots:
import_netscape_html(test_html, user)
mock_schedule_bookmarks_without_snapshots.assert_called_once_with(user.id)

View File

@@ -0,0 +1,15 @@
from unittest.mock import patch
from django.test import TestCase
from bookmarks.services import tasks
from bookmarks.tests.helpers import BookmarkFactoryMixin
class SignalsTestCase(TestCase, BookmarkFactoryMixin):
def test_login_should_schedule_snapshot_creation(self):
user = self.get_or_create_test_user()
with patch.object(tasks, 'schedule_bookmarks_without_snapshots') as mock_schedule_bookmarks_without_snapshots:
self.client.force_login(user)
mock_schedule_bookmarks_without_snapshots.assert_called_once_with(user.id)