Prevent rate limit errors in wayback machine API (#339)

The Wayback Machine Save API only allows a limited number of requests within a timespan. This introduces several changes to avoid rate limit errors:
- There will be max. 1 attempt to create a new snapshot
- If a new snapshot could not be created, then attempt to use the latest existing snapshot
- Bulk snapshot updates (bookmark import, load missing snapshots after login) will only attempt to load the latest snapshot instead of creating new ones
This commit is contained in:
Sascha Ißbrücker
2022-09-10 20:43:15 +02:00
committed by GitHub
parent 6420ec173a
commit 1b35d5b5ef
4 changed files with 364 additions and 188 deletions

View File

@@ -5,8 +5,9 @@ from background_task import background
from django.conf import settings from django.conf import settings
from django.contrib.auth import get_user_model from django.contrib.auth import get_user_model
from django.contrib.auth.models import User from django.contrib.auth.models import User
from waybackpy.exceptions import WaybackError from waybackpy.exceptions import WaybackError, TooManyRequestsError, NoCDXRecordFound
import bookmarks.services.wayback
from bookmarks.models import Bookmark, UserProfile from bookmarks.models import Bookmark, UserProfile
from bookmarks.services.website_loader import DEFAULT_USER_AGENT from bookmarks.services.website_loader import DEFAULT_USER_AGENT
@@ -26,6 +27,32 @@ def create_web_archive_snapshot(user: User, bookmark: Bookmark, force_update: bo
_create_web_archive_snapshot_task(bookmark.id, force_update) _create_web_archive_snapshot_task(bookmark.id, force_update)
def _load_newest_snapshot(bookmark: Bookmark):
try:
logger.debug(f'Load existing snapshot for bookmark. url={bookmark.url}')
cdx_api = bookmarks.services.wayback.CustomWaybackMachineCDXServerAPI(bookmark.url)
existing_snapshot = cdx_api.newest()
if existing_snapshot:
bookmark.web_archive_snapshot_url = existing_snapshot.archive_url
bookmark.save()
logger.debug(f'Using newest snapshot. url={bookmark.url} from={existing_snapshot.datetime_timestamp}')
except NoCDXRecordFound:
logger.error(f'Could not find any snapshots for bookmark. url={bookmark.url}')
except WaybackError as error:
logger.error(f'Failed to load existing snapshot. url={bookmark.url}', exc_info=error)
def _create_snapshot(bookmark: Bookmark):
logger.debug(f'Create new snapshot for bookmark. url={bookmark.url}...')
archive = waybackpy.WaybackMachineSaveAPI(bookmark.url, DEFAULT_USER_AGENT, max_tries=1)
archive.save()
bookmark.web_archive_snapshot_url = archive.archive_url
bookmark.save()
logger.debug(f'Successfully created new snapshot for bookmark:. url={bookmark.url}')
@background() @background()
def _create_web_archive_snapshot_task(bookmark_id: int, force_update: bool): def _create_web_archive_snapshot_task(bookmark_id: int, force_update: bool):
try: try:
@@ -37,19 +64,31 @@ def _create_web_archive_snapshot_task(bookmark_id: int, force_update: bool):
if bookmark.web_archive_snapshot_url and not force_update: if bookmark.web_archive_snapshot_url and not force_update:
return return
logger.debug(f'Create web archive link for bookmark: {bookmark}...') # Create new snapshot
archive = waybackpy.WaybackMachineSaveAPI(bookmark.url, DEFAULT_USER_AGENT)
try: try:
archive.save() _create_snapshot(bookmark)
except WaybackError as error: return
logger.exception(f'Error creating web archive link for bookmark: {bookmark}...', exc_info=error) except TooManyRequestsError:
raise logger.error(
f'Failed to create snapshot due to rate limiting, trying to load newest snapshot as fallback. url={bookmark.url}')
except WaybackError:
logger.error(f'Failed to create snapshot, trying to load newest snapshot as fallback. url={bookmark.url}')
bookmark.web_archive_snapshot_url = archive.archive_url # Load the newest snapshot as fallback
bookmark.save() _load_newest_snapshot(bookmark)
logger.debug(f'Successfully created web archive link for bookmark: {bookmark}...')
@background()
def _load_web_archive_snapshot_task(bookmark_id: int):
try:
bookmark = Bookmark.objects.get(id=bookmark_id)
except Bookmark.DoesNotExist:
return
# Skip if snapshot exists
if bookmark.web_archive_snapshot_url:
return
# Load the newest snapshot
_load_newest_snapshot(bookmark)
def schedule_bookmarks_without_snapshots(user: User): def schedule_bookmarks_without_snapshots(user: User):
@@ -63,4 +102,6 @@ def _schedule_bookmarks_without_snapshots_task(user_id: int):
bookmarks_without_snapshots = Bookmark.objects.filter(web_archive_snapshot_url__exact='', owner=user) bookmarks_without_snapshots = Bookmark.objects.filter(web_archive_snapshot_url__exact='', owner=user)
for bookmark in bookmarks_without_snapshots: for bookmark in bookmarks_without_snapshots:
_create_web_archive_snapshot_task(bookmark.id, False) # To prevent rate limit errors from the Wayback API only try to load the latest snapshots instead of creating
# new ones when processing bookmarks in bulk
_load_web_archive_snapshot_task(bookmark.id)

View File

@@ -0,0 +1,40 @@
import time
from typing import Dict
import waybackpy
import waybackpy.utils
from waybackpy.exceptions import NoCDXRecordFound
class CustomWaybackMachineCDXServerAPI(waybackpy.WaybackMachineCDXServerAPI):
"""
Customized WaybackMachineCDXServerAPI to work around some issues with retrieving the newest snapshot.
See https://github.com/akamhy/waybackpy/issues/176
"""
def newest(self):
unix_timestamp = int(time.time())
self.closest = waybackpy.utils.unix_timestamp_to_wayback_timestamp(unix_timestamp)
self.sort = 'closest'
self.limit = -5
newest_snapshot = None
for snapshot in self.snapshots():
newest_snapshot = snapshot
break
if not newest_snapshot:
raise NoCDXRecordFound(
"Wayback Machine's CDX server did not return any records "
+ "for the query. The URL may not have any archives "
+ " on the Wayback Machine or the URL may have been recently "
+ "archived and is still not available on the CDX server."
)
return newest_snapshot
def add_payload(self, payload: Dict[str, str]) -> None:
super().add_payload(payload)
# Set fastLatest query param, as we are only using this API to get the latest snapshot and using fastLatest
# makes searching for latest snapshots faster
payload['fastLatest'] = 'true'

View File

@@ -2,136 +2,139 @@
{% load widget_tweaks %} {% load widget_tweaks %}
{% block content %} {% block content %}
<div class="settings-page"> <div class="settings-page">
{% include 'settings/nav.html' %} {% include 'settings/nav.html' %}
{# Profile section #} {# Profile section #}
<section class="content-area"> <section class="content-area">
<h2>Profile</h2> <h2>Profile</h2>
<p> <p>
<a href="{% url 'change_password' %}">Change password</a> <a href="{% url 'change_password' %}">Change password</a>
</p> </p>
<form action="{% url 'bookmarks:settings.general' %}" method="post" novalidate> <form action="{% url 'bookmarks:settings.general' %}" method="post" novalidate>
{% csrf_token %} {% csrf_token %}
<div class="form-group"> <div class="form-group">
<label for="{{ form.theme.id_for_label }}" class="form-label">Theme</label> <label for="{{ form.theme.id_for_label }}" class="form-label">Theme</label>
{{ form.theme|add_class:"form-select col-2 col-sm-12" }} {{ form.theme|add_class:"form-select col-2 col-sm-12" }}
<div class="form-input-hint"> <div class="form-input-hint">
Whether to use a light or dark theme, or automatically adjust the theme based on your system's settings. Whether to use a light or dark theme, or automatically adjust the theme based on your system's settings.
</div> </div>
</div> </div>
<div class="form-group"> <div class="form-group">
<label for="{{ form.bookmark_date_display.id_for_label }}" class="form-label">Bookmark date format</label> <label for="{{ form.bookmark_date_display.id_for_label }}" class="form-label">Bookmark date format</label>
{{ form.bookmark_date_display|add_class:"form-select col-2 col-sm-12" }} {{ form.bookmark_date_display|add_class:"form-select col-2 col-sm-12" }}
<div class="form-input-hint"> <div class="form-input-hint">
Whether to show bookmark dates as relative (how long ago), or as absolute dates. Alternatively the date can be hidden. Whether to show bookmark dates as relative (how long ago), or as absolute dates. Alternatively the date can
</div> be hidden.
</div> </div>
<div class="form-group"> </div>
<label for="{{ form.bookmark_link_target.id_for_label }}" class="form-label">Open bookmarks in</label> <div class="form-group">
{{ form.bookmark_link_target|add_class:"form-select col-2 col-sm-12" }} <label for="{{ form.bookmark_link_target.id_for_label }}" class="form-label">Open bookmarks in</label>
<div class="form-input-hint"> {{ form.bookmark_link_target|add_class:"form-select col-2 col-sm-12" }}
Whether to open bookmarks a new page or in the same page. <div class="form-input-hint">
</div> Whether to open bookmarks a new page or in the same page.
</div> </div>
<div class="form-group"> </div>
<label for="{{ form.web_archive_integration.id_for_label }}" class="form-label">Internet Archive <div class="form-group">
integration</label> <label for="{{ form.web_archive_integration.id_for_label }}" class="form-label">Internet Archive
{{ form.web_archive_integration|add_class:"form-select col-2 col-sm-12" }} integration</label>
<div class="form-input-hint"> {{ form.web_archive_integration|add_class:"form-select col-2 col-sm-12" }}
Enabling this feature will automatically create snapshots of bookmarked websites on the <a <div class="form-input-hint">
href="https://web.archive.org/" target="_blank" rel="noopener">Internet Archive Wayback Enabling this feature will automatically create snapshots of bookmarked websites on the <a
Machine</a>. This allows href="https://web.archive.org/" target="_blank" rel="noopener">Internet Archive Wayback
to preserve, and later access, the website as it was at the point in time it was bookmarked, in Machine</a>.
case it goes offline or its content is modified. This allows to preserve, and later access the website as it was at the point in time it was bookmarked, in
</div> case it goes offline or its content is modified.
</div> Please consider donating to the <a href="https://archive.org/donate/index.php" target="_blank"
<div class="form-group"> rel="noopener">Internet Archive</a> if you make use of this feature.
<label for="{{ form.enable_sharing.id_for_label }}" class="form-checkbox"> </div>
{{ form.enable_sharing }} </div>
<i class="form-icon"></i> Enable bookmark sharing <div class="form-group">
</label> <label for="{{ form.enable_sharing.id_for_label }}" class="form-checkbox">
<div class="form-input-hint"> {{ form.enable_sharing }}
Allows to share bookmarks with other users, and to view shared bookmarks. <i class="form-icon"></i> Enable bookmark sharing
Disabling this feature will hide all previously shared bookmarks from other users. </label>
</div> <div class="form-input-hint">
</div> Allows to share bookmarks with other users, and to view shared bookmarks.
<div class="form-group"> Disabling this feature will hide all previously shared bookmarks from other users.
<input type="submit" value="Save" class="btn btn-primary mt-2"> </div>
</div> </div>
</form> <div class="form-group">
</section> <input type="submit" value="Save" class="btn btn-primary mt-2">
</div>
</form>
</section>
{# Import section #} {# Import section #}
<section class="content-area"> <section class="content-area">
<h2>Import</h2> <h2>Import</h2>
<p>Import bookmarks and tags in the Netscape HTML format. This will execute a sync where new bookmarks are <p>Import bookmarks and tags in the Netscape HTML format. This will execute a sync where new bookmarks are
added and existing ones are updated.</p> added and existing ones are updated.</p>
<form method="post" enctype="multipart/form-data" action="{% url 'bookmarks:settings.import' %}"> <form method="post" enctype="multipart/form-data" action="{% url 'bookmarks:settings.import' %}">
{% csrf_token %} {% csrf_token %}
<div class="form-group"> <div class="form-group">
<div class="input-group col-8 col-md-12"> <div class="input-group col-8 col-md-12">
<input class="form-input" type="file" name="import_file"> <input class="form-input" type="file" name="import_file">
<input type="submit" class="input-group-btn col-2 btn btn-primary" value="Upload"> <input type="submit" class="input-group-btn col-2 btn btn-primary" value="Upload">
</div> </div>
{% if import_success_message %} {% if import_success_message %}
<div class="has-success"> <div class="has-success">
<p class="form-input-hint"> <p class="form-input-hint">
{{ import_success_message }} {{ import_success_message }}
</p> </p>
</div> </div>
{% endif %} {% endif %}
{% if import_errors_message %} {% if import_errors_message %}
<div class="has-error"> <div class="has-error">
<p class="form-input-hint"> <p class="form-input-hint">
{{ import_errors_message }} {{ import_errors_message }}
</p> </p>
</div> </div>
{% endif %} {% endif %}
</div> </div>
</form> </form>
</section> </section>
{# Export section #} {# Export section #}
<section class="content-area"> <section class="content-area">
<h2>Export</h2> <h2>Export</h2>
<p>Export all bookmarks in Netscape HTML format.</p> <p>Export all bookmarks in Netscape HTML format.</p>
<a class="btn btn-primary" href="{% url 'bookmarks:settings.export' %}">Download (.html)</a> <a class="btn btn-primary" href="{% url 'bookmarks:settings.export' %}">Download (.html)</a>
{% if export_error %} {% if export_error %}
<div class="has-error"> <div class="has-error">
<p class="form-input-hint"> <p class="form-input-hint">
{{ export_error }} {{ export_error }}
</p> </p>
</div> </div>
{% endif %} {% endif %}
</section> </section>
{# About section #} {# About section #}
<section class="content-area about"> <section class="content-area about">
<h2>About</h2> <h2>About</h2>
<table class="table"> <table class="table">
<tbody> <tbody>
<tr> <tr>
<td>Version</td> <td>Version</td>
<td>{{ version_info }}</td> <td>{{ version_info }}</td>
</tr> </tr>
<tr> <tr>
<td rowspan="3" style="vertical-align: top">Links</td> <td rowspan="3" style="vertical-align: top">Links</td>
<td><a href="https://github.com/sissbruecker/linkding/" <td><a href="https://github.com/sissbruecker/linkding/"
target="_blank">GitHub</a></td> target="_blank">GitHub</a></td>
</tr> </tr>
<tr> <tr>
<td><a href="https://github.com/sissbruecker/linkding#documentation" <td><a href="https://github.com/sissbruecker/linkding#documentation"
target="_blank">Documentation</a></td> target="_blank">Documentation</a></td>
</tr> </tr>
<tr> <tr>
<td><a href="https://github.com/sissbruecker/linkding/blob/master/CHANGELOG.md" <td><a href="https://github.com/sissbruecker/linkding/blob/master/CHANGELOG.md"
target="_blank">Changelog</a></td> target="_blank">Changelog</a></td>
</tr> </tr>
</tbody> </tbody>
</table> </table>
</section> </section>
</div> </div>
{% endblock %} {% endblock %}

View File

@@ -1,25 +1,51 @@
import datetime
from dataclasses import dataclass
from unittest.mock import patch from unittest.mock import patch
import waybackpy import waybackpy
from background_task.models import Task from background_task.models import Task
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.test import TestCase, override_settings from django.test import TestCase, override_settings
from waybackpy.exceptions import WaybackError
from bookmarks.models import Bookmark, UserProfile import bookmarks.services.wayback
from bookmarks.models import UserProfile
from bookmarks.services import tasks from bookmarks.services import tasks
from bookmarks.tests.helpers import BookmarkFactoryMixin, disable_logging from bookmarks.tests.helpers import BookmarkFactoryMixin, disable_logging
class MockWaybackMachineSaveAPI: class MockWaybackMachineSaveAPI:
def __init__(self, archive_url: str): def __init__(self, archive_url: str = 'https://example.com/created_snapshot', fail_on_save: bool = False):
self.archive_url = archive_url self.archive_url = archive_url
self.fail_on_save = fail_on_save
def save(self): def save(self):
if self.fail_on_save:
raise WaybackError
return self return self
class MockWaybackUrlWithSaveError:
def save(self): @dataclass
raise NotImplementedError class MockCdxSnapshot:
archive_url: str
datetime_timestamp: datetime.datetime
class MockWaybackMachineCDXServerAPI:
def __init__(self,
archive_url: str = 'https://example.com/newest_snapshot',
has_no_snapshot=False,
fail_loading_snapshot=False):
self.archive_url = archive_url
self.has_no_snapshot = has_no_snapshot
self.fail_loading_snapshot = fail_loading_snapshot
def newest(self):
if self.has_no_snapshot:
return None
if self.fail_loading_snapshot:
raise WaybackError
return MockCdxSnapshot(self.archive_url, datetime.datetime.now())
class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin): class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
@@ -50,49 +76,130 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
def test_create_web_archive_snapshot_should_update_snapshot_url(self): def test_create_web_archive_snapshot_should_update_snapshot_url(self):
bookmark = self.setup_bookmark() bookmark = self.setup_bookmark()
with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI('https://example.com')): with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI()):
tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, False) tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, False)
self.run_pending_task(tasks._create_web_archive_snapshot_task) self.run_pending_task(tasks._create_web_archive_snapshot_task)
bookmark.refresh_from_db() bookmark.refresh_from_db()
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com') self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com/created_snapshot')
def test_create_web_archive_snapshot_should_handle_missing_bookmark_id(self): def test_create_web_archive_snapshot_should_handle_missing_bookmark_id(self):
with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI('https://example.com')) as mock_wayback_url: with patch.object(waybackpy, 'WaybackMachineSaveAPI',
return_value=MockWaybackMachineSaveAPI()) as mock_save_api:
tasks._create_web_archive_snapshot_task(123, False) tasks._create_web_archive_snapshot_task(123, False)
self.run_pending_task(tasks._create_web_archive_snapshot_task) self.run_pending_task(tasks._create_web_archive_snapshot_task)
mock_wayback_url.assert_not_called() mock_save_api.assert_not_called()
def test_create_web_archive_snapshot_should_handle_wayback_save_error(self):
bookmark = self.setup_bookmark()
with patch.object(waybackpy, 'WaybackMachineSaveAPI',
return_value=MockWaybackUrlWithSaveError()):
with self.assertRaises(NotImplementedError):
tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, False)
self.run_pending_task(tasks._create_web_archive_snapshot_task)
def test_create_web_archive_snapshot_should_skip_if_snapshot_exists(self): def test_create_web_archive_snapshot_should_skip_if_snapshot_exists(self):
bookmark = self.setup_bookmark(web_archive_snapshot_url='https://example.com') bookmark = self.setup_bookmark(web_archive_snapshot_url='https://example.com')
with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI('https://other.com')): with patch.object(waybackpy, 'WaybackMachineSaveAPI',
return_value=MockWaybackMachineSaveAPI()) as mock_save_api:
tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, False) tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, False)
self.run_pending_task(tasks._create_web_archive_snapshot_task) self.run_pending_task(tasks._create_web_archive_snapshot_task)
bookmark.refresh_from_db()
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com') mock_save_api.assert_not_called()
def test_create_web_archive_snapshot_should_force_update_snapshot(self): def test_create_web_archive_snapshot_should_force_update_snapshot(self):
bookmark = self.setup_bookmark(web_archive_snapshot_url='https://example.com') bookmark = self.setup_bookmark(web_archive_snapshot_url='https://example.com')
with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI('https://other.com')): with patch.object(waybackpy, 'WaybackMachineSaveAPI',
return_value=MockWaybackMachineSaveAPI('https://other.com')):
tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, True) tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, True)
self.run_pending_task(tasks._create_web_archive_snapshot_task) self.run_pending_task(tasks._create_web_archive_snapshot_task)
bookmark.refresh_from_db() bookmark.refresh_from_db()
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://other.com') self.assertEqual(bookmark.web_archive_snapshot_url, 'https://other.com')
def test_create_web_archive_snapshot_should_use_newest_snapshot_as_fallback(self):
bookmark = self.setup_bookmark()
with patch.object(waybackpy, 'WaybackMachineSaveAPI',
return_value=MockWaybackMachineSaveAPI(fail_on_save=True)):
with patch.object(bookmarks.services.wayback, 'CustomWaybackMachineCDXServerAPI',
return_value=MockWaybackMachineCDXServerAPI()):
tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, False)
self.run_pending_task(tasks._create_web_archive_snapshot_task)
bookmark.refresh_from_db()
self.assertEqual('https://example.com/newest_snapshot', bookmark.web_archive_snapshot_url)
def test_create_web_archive_snapshot_should_ignore_missing_newest_snapshot(self):
bookmark = self.setup_bookmark()
with patch.object(waybackpy, 'WaybackMachineSaveAPI',
return_value=MockWaybackMachineSaveAPI(fail_on_save=True)):
with patch.object(bookmarks.services.wayback, 'CustomWaybackMachineCDXServerAPI',
return_value=MockWaybackMachineCDXServerAPI(has_no_snapshot=True)):
tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, False)
self.run_pending_task(tasks._create_web_archive_snapshot_task)
bookmark.refresh_from_db()
self.assertEqual('', bookmark.web_archive_snapshot_url)
def test_create_web_archive_snapshot_should_ignore_newest_snapshot_errors(self):
bookmark = self.setup_bookmark()
with patch.object(waybackpy, 'WaybackMachineSaveAPI',
return_value=MockWaybackMachineSaveAPI(fail_on_save=True)):
with patch.object(bookmarks.services.wayback, 'CustomWaybackMachineCDXServerAPI',
return_value=MockWaybackMachineCDXServerAPI(fail_loading_snapshot=True)):
tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, False)
self.run_pending_task(tasks._create_web_archive_snapshot_task)
bookmark.refresh_from_db()
self.assertEqual('', bookmark.web_archive_snapshot_url)
def test_load_web_archive_snapshot_should_update_snapshot_url(self):
bookmark = self.setup_bookmark()
with patch.object(bookmarks.services.wayback, 'CustomWaybackMachineCDXServerAPI',
return_value=MockWaybackMachineCDXServerAPI()):
tasks._load_web_archive_snapshot_task(bookmark.id)
self.run_pending_task(tasks._load_web_archive_snapshot_task)
bookmark.refresh_from_db()
self.assertEqual('https://example.com/newest_snapshot', bookmark.web_archive_snapshot_url)
def test_load_web_archive_snapshot_should_handle_missing_bookmark_id(self):
with patch.object(bookmarks.services.wayback, 'CustomWaybackMachineCDXServerAPI',
return_value=MockWaybackMachineCDXServerAPI()) as mock_cdx_api:
tasks._load_web_archive_snapshot_task(123)
self.run_pending_task(tasks._load_web_archive_snapshot_task)
mock_cdx_api.assert_not_called()
def test_load_web_archive_snapshot_should_skip_if_snapshot_exists(self):
bookmark = self.setup_bookmark(web_archive_snapshot_url='https://example.com')
with patch.object(bookmarks.services.wayback, 'CustomWaybackMachineCDXServerAPI',
return_value=MockWaybackMachineCDXServerAPI()) as mock_cdx_api:
tasks._load_web_archive_snapshot_task(bookmark.id)
self.run_pending_task(tasks._load_web_archive_snapshot_task)
mock_cdx_api.assert_not_called()
def test_load_web_archive_snapshot_should_handle_missing_snapshot(self):
bookmark = self.setup_bookmark()
with patch.object(bookmarks.services.wayback, 'CustomWaybackMachineCDXServerAPI',
return_value=MockWaybackMachineCDXServerAPI(has_no_snapshot=True)):
tasks._load_web_archive_snapshot_task(bookmark.id)
self.run_pending_task(tasks._load_web_archive_snapshot_task)
self.assertEqual('', bookmark.web_archive_snapshot_url)
def test_load_web_archive_snapshot_should_handle_wayback_errors(self):
bookmark = self.setup_bookmark()
with patch.object(bookmarks.services.wayback, 'CustomWaybackMachineCDXServerAPI',
return_value=MockWaybackMachineCDXServerAPI(fail_loading_snapshot=True)):
tasks._load_web_archive_snapshot_task(bookmark.id)
self.run_pending_task(tasks._load_web_archive_snapshot_task)
self.assertEqual('', bookmark.web_archive_snapshot_url)
@override_settings(LD_DISABLE_BACKGROUND_TASKS=True) @override_settings(LD_DISABLE_BACKGROUND_TASKS=True)
def test_create_web_archive_snapshot_should_not_run_when_background_tasks_are_disabled(self): def test_create_web_archive_snapshot_should_not_run_when_background_tasks_are_disabled(self):
bookmark = self.setup_bookmark() bookmark = self.setup_bookmark()
@@ -109,33 +216,23 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
self.assertEqual(Task.objects.count(), 0) self.assertEqual(Task.objects.count(), 0)
def test_schedule_bookmarks_without_snapshots_should_create_snapshot_task_for_all_bookmarks_without_snapshot(self): def test_schedule_bookmarks_without_snapshots_should_load_snapshot_for_all_bookmarks_without_snapshot(self):
user = self.get_or_create_test_user() user = self.get_or_create_test_user()
self.setup_bookmark() self.setup_bookmark()
self.setup_bookmark() self.setup_bookmark()
self.setup_bookmark() self.setup_bookmark()
with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI('https://example.com')):
tasks.schedule_bookmarks_without_snapshots(user)
self.run_pending_task(tasks._schedule_bookmarks_without_snapshots_task)
self.run_all_pending_tasks(tasks._create_web_archive_snapshot_task)
for bookmark in Bookmark.objects.all():
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com')
def test_schedule_bookmarks_without_snapshots_should_not_update_bookmarks_with_existing_snapshot(self):
user = self.get_or_create_test_user()
self.setup_bookmark(web_archive_snapshot_url='https://example.com') self.setup_bookmark(web_archive_snapshot_url='https://example.com')
self.setup_bookmark(web_archive_snapshot_url='https://example.com') self.setup_bookmark(web_archive_snapshot_url='https://example.com')
self.setup_bookmark(web_archive_snapshot_url='https://example.com') self.setup_bookmark(web_archive_snapshot_url='https://example.com')
with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI('https://other.com')): tasks.schedule_bookmarks_without_snapshots(user)
tasks.schedule_bookmarks_without_snapshots(user) self.run_pending_task(tasks._schedule_bookmarks_without_snapshots_task)
self.run_pending_task(tasks._schedule_bookmarks_without_snapshots_task)
self.run_all_pending_tasks(tasks._create_web_archive_snapshot_task)
for bookmark in Bookmark.objects.all(): task_list = Task.objects.all()
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com') self.assertEqual(task_list.count(), 3)
for task in task_list:
self.assertEqual(task.task_name, 'bookmarks.services.tasks._load_web_archive_snapshot_task')
def test_schedule_bookmarks_without_snapshots_should_only_update_user_owned_bookmarks(self): def test_schedule_bookmarks_without_snapshots_should_only_update_user_owned_bookmarks(self):
user = self.get_or_create_test_user() user = self.get_or_create_test_user()
@@ -147,16 +244,11 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
self.setup_bookmark(user=other_user) self.setup_bookmark(user=other_user)
self.setup_bookmark(user=other_user) self.setup_bookmark(user=other_user)
with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI('https://example.com')): tasks.schedule_bookmarks_without_snapshots(user)
tasks.schedule_bookmarks_without_snapshots(user) self.run_pending_task(tasks._schedule_bookmarks_without_snapshots_task)
self.run_pending_task(tasks._schedule_bookmarks_without_snapshots_task)
self.run_all_pending_tasks(tasks._create_web_archive_snapshot_task)
for bookmark in Bookmark.objects.all().filter(owner=user): task_list = Task.objects.all()
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com') self.assertEqual(task_list.count(), 3)
for bookmark in Bookmark.objects.all().filter(owner=other_user):
self.assertEqual(bookmark.web_archive_snapshot_url, '')
@override_settings(LD_DISABLE_BACKGROUND_TASKS=True) @override_settings(LD_DISABLE_BACKGROUND_TASKS=True)
def test_schedule_bookmarks_without_snapshots_should_not_run_when_background_tasks_are_disabled(self): def test_schedule_bookmarks_without_snapshots_should_not_run_when_background_tasks_are_disabled(self):