mirror of
https://github.com/sissbruecker/linkding.git
synced 2025-08-07 10:58:25 +02:00
Prevent rate limit errors in wayback machine API (#339)
The Wayback Machine Save API only allows a limited number of requests within a timespan. This introduces several changes to avoid rate limit errors: - There will be max. 1 attempt to create a new snapshot - If a new snapshot could not be created, then attempt to use the latest existing snapshot - Bulk snapshot updates (bookmark import, load missing snapshots after login) will only attempt to load the latest snapshot instead of creating new ones
This commit is contained in:
@@ -5,8 +5,9 @@ from background_task import background
|
|||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.contrib.auth import get_user_model
|
from django.contrib.auth import get_user_model
|
||||||
from django.contrib.auth.models import User
|
from django.contrib.auth.models import User
|
||||||
from waybackpy.exceptions import WaybackError
|
from waybackpy.exceptions import WaybackError, TooManyRequestsError, NoCDXRecordFound
|
||||||
|
|
||||||
|
import bookmarks.services.wayback
|
||||||
from bookmarks.models import Bookmark, UserProfile
|
from bookmarks.models import Bookmark, UserProfile
|
||||||
from bookmarks.services.website_loader import DEFAULT_USER_AGENT
|
from bookmarks.services.website_loader import DEFAULT_USER_AGENT
|
||||||
|
|
||||||
@@ -26,6 +27,32 @@ def create_web_archive_snapshot(user: User, bookmark: Bookmark, force_update: bo
|
|||||||
_create_web_archive_snapshot_task(bookmark.id, force_update)
|
_create_web_archive_snapshot_task(bookmark.id, force_update)
|
||||||
|
|
||||||
|
|
||||||
|
def _load_newest_snapshot(bookmark: Bookmark):
|
||||||
|
try:
|
||||||
|
logger.debug(f'Load existing snapshot for bookmark. url={bookmark.url}')
|
||||||
|
cdx_api = bookmarks.services.wayback.CustomWaybackMachineCDXServerAPI(bookmark.url)
|
||||||
|
existing_snapshot = cdx_api.newest()
|
||||||
|
|
||||||
|
if existing_snapshot:
|
||||||
|
bookmark.web_archive_snapshot_url = existing_snapshot.archive_url
|
||||||
|
bookmark.save()
|
||||||
|
logger.debug(f'Using newest snapshot. url={bookmark.url} from={existing_snapshot.datetime_timestamp}')
|
||||||
|
|
||||||
|
except NoCDXRecordFound:
|
||||||
|
logger.error(f'Could not find any snapshots for bookmark. url={bookmark.url}')
|
||||||
|
except WaybackError as error:
|
||||||
|
logger.error(f'Failed to load existing snapshot. url={bookmark.url}', exc_info=error)
|
||||||
|
|
||||||
|
|
||||||
|
def _create_snapshot(bookmark: Bookmark):
|
||||||
|
logger.debug(f'Create new snapshot for bookmark. url={bookmark.url}...')
|
||||||
|
archive = waybackpy.WaybackMachineSaveAPI(bookmark.url, DEFAULT_USER_AGENT, max_tries=1)
|
||||||
|
archive.save()
|
||||||
|
bookmark.web_archive_snapshot_url = archive.archive_url
|
||||||
|
bookmark.save()
|
||||||
|
logger.debug(f'Successfully created new snapshot for bookmark:. url={bookmark.url}')
|
||||||
|
|
||||||
|
|
||||||
@background()
|
@background()
|
||||||
def _create_web_archive_snapshot_task(bookmark_id: int, force_update: bool):
|
def _create_web_archive_snapshot_task(bookmark_id: int, force_update: bool):
|
||||||
try:
|
try:
|
||||||
@@ -37,19 +64,31 @@ def _create_web_archive_snapshot_task(bookmark_id: int, force_update: bool):
|
|||||||
if bookmark.web_archive_snapshot_url and not force_update:
|
if bookmark.web_archive_snapshot_url and not force_update:
|
||||||
return
|
return
|
||||||
|
|
||||||
logger.debug(f'Create web archive link for bookmark: {bookmark}...')
|
# Create new snapshot
|
||||||
|
|
||||||
archive = waybackpy.WaybackMachineSaveAPI(bookmark.url, DEFAULT_USER_AGENT)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
archive.save()
|
_create_snapshot(bookmark)
|
||||||
except WaybackError as error:
|
return
|
||||||
logger.exception(f'Error creating web archive link for bookmark: {bookmark}...', exc_info=error)
|
except TooManyRequestsError:
|
||||||
raise
|
logger.error(
|
||||||
|
f'Failed to create snapshot due to rate limiting, trying to load newest snapshot as fallback. url={bookmark.url}')
|
||||||
|
except WaybackError:
|
||||||
|
logger.error(f'Failed to create snapshot, trying to load newest snapshot as fallback. url={bookmark.url}')
|
||||||
|
|
||||||
bookmark.web_archive_snapshot_url = archive.archive_url
|
# Load the newest snapshot as fallback
|
||||||
bookmark.save()
|
_load_newest_snapshot(bookmark)
|
||||||
logger.debug(f'Successfully created web archive link for bookmark: {bookmark}...')
|
|
||||||
|
|
||||||
|
@background()
|
||||||
|
def _load_web_archive_snapshot_task(bookmark_id: int):
|
||||||
|
try:
|
||||||
|
bookmark = Bookmark.objects.get(id=bookmark_id)
|
||||||
|
except Bookmark.DoesNotExist:
|
||||||
|
return
|
||||||
|
# Skip if snapshot exists
|
||||||
|
if bookmark.web_archive_snapshot_url:
|
||||||
|
return
|
||||||
|
# Load the newest snapshot
|
||||||
|
_load_newest_snapshot(bookmark)
|
||||||
|
|
||||||
|
|
||||||
def schedule_bookmarks_without_snapshots(user: User):
|
def schedule_bookmarks_without_snapshots(user: User):
|
||||||
@@ -63,4 +102,6 @@ def _schedule_bookmarks_without_snapshots_task(user_id: int):
|
|||||||
bookmarks_without_snapshots = Bookmark.objects.filter(web_archive_snapshot_url__exact='', owner=user)
|
bookmarks_without_snapshots = Bookmark.objects.filter(web_archive_snapshot_url__exact='', owner=user)
|
||||||
|
|
||||||
for bookmark in bookmarks_without_snapshots:
|
for bookmark in bookmarks_without_snapshots:
|
||||||
_create_web_archive_snapshot_task(bookmark.id, False)
|
# To prevent rate limit errors from the Wayback API only try to load the latest snapshots instead of creating
|
||||||
|
# new ones when processing bookmarks in bulk
|
||||||
|
_load_web_archive_snapshot_task(bookmark.id)
|
||||||
|
40
bookmarks/services/wayback.py
Normal file
40
bookmarks/services/wayback.py
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
import time
|
||||||
|
from typing import Dict
|
||||||
|
|
||||||
|
import waybackpy
|
||||||
|
import waybackpy.utils
|
||||||
|
from waybackpy.exceptions import NoCDXRecordFound
|
||||||
|
|
||||||
|
|
||||||
|
class CustomWaybackMachineCDXServerAPI(waybackpy.WaybackMachineCDXServerAPI):
|
||||||
|
"""
|
||||||
|
Customized WaybackMachineCDXServerAPI to work around some issues with retrieving the newest snapshot.
|
||||||
|
See https://github.com/akamhy/waybackpy/issues/176
|
||||||
|
"""
|
||||||
|
|
||||||
|
def newest(self):
|
||||||
|
unix_timestamp = int(time.time())
|
||||||
|
self.closest = waybackpy.utils.unix_timestamp_to_wayback_timestamp(unix_timestamp)
|
||||||
|
self.sort = 'closest'
|
||||||
|
self.limit = -5
|
||||||
|
|
||||||
|
newest_snapshot = None
|
||||||
|
for snapshot in self.snapshots():
|
||||||
|
newest_snapshot = snapshot
|
||||||
|
break
|
||||||
|
|
||||||
|
if not newest_snapshot:
|
||||||
|
raise NoCDXRecordFound(
|
||||||
|
"Wayback Machine's CDX server did not return any records "
|
||||||
|
+ "for the query. The URL may not have any archives "
|
||||||
|
+ " on the Wayback Machine or the URL may have been recently "
|
||||||
|
+ "archived and is still not available on the CDX server."
|
||||||
|
)
|
||||||
|
|
||||||
|
return newest_snapshot
|
||||||
|
|
||||||
|
def add_payload(self, payload: Dict[str, str]) -> None:
|
||||||
|
super().add_payload(payload)
|
||||||
|
# Set fastLatest query param, as we are only using this API to get the latest snapshot and using fastLatest
|
||||||
|
# makes searching for latest snapshots faster
|
||||||
|
payload['fastLatest'] = 'true'
|
@@ -2,136 +2,139 @@
|
|||||||
{% load widget_tweaks %}
|
{% load widget_tweaks %}
|
||||||
|
|
||||||
{% block content %}
|
{% block content %}
|
||||||
<div class="settings-page">
|
<div class="settings-page">
|
||||||
|
|
||||||
{% include 'settings/nav.html' %}
|
{% include 'settings/nav.html' %}
|
||||||
|
|
||||||
{# Profile section #}
|
{# Profile section #}
|
||||||
<section class="content-area">
|
<section class="content-area">
|
||||||
<h2>Profile</h2>
|
<h2>Profile</h2>
|
||||||
<p>
|
<p>
|
||||||
<a href="{% url 'change_password' %}">Change password</a>
|
<a href="{% url 'change_password' %}">Change password</a>
|
||||||
</p>
|
</p>
|
||||||
<form action="{% url 'bookmarks:settings.general' %}" method="post" novalidate>
|
<form action="{% url 'bookmarks:settings.general' %}" method="post" novalidate>
|
||||||
{% csrf_token %}
|
{% csrf_token %}
|
||||||
<div class="form-group">
|
<div class="form-group">
|
||||||
<label for="{{ form.theme.id_for_label }}" class="form-label">Theme</label>
|
<label for="{{ form.theme.id_for_label }}" class="form-label">Theme</label>
|
||||||
{{ form.theme|add_class:"form-select col-2 col-sm-12" }}
|
{{ form.theme|add_class:"form-select col-2 col-sm-12" }}
|
||||||
<div class="form-input-hint">
|
<div class="form-input-hint">
|
||||||
Whether to use a light or dark theme, or automatically adjust the theme based on your system's settings.
|
Whether to use a light or dark theme, or automatically adjust the theme based on your system's settings.
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="form-group">
|
<div class="form-group">
|
||||||
<label for="{{ form.bookmark_date_display.id_for_label }}" class="form-label">Bookmark date format</label>
|
<label for="{{ form.bookmark_date_display.id_for_label }}" class="form-label">Bookmark date format</label>
|
||||||
{{ form.bookmark_date_display|add_class:"form-select col-2 col-sm-12" }}
|
{{ form.bookmark_date_display|add_class:"form-select col-2 col-sm-12" }}
|
||||||
<div class="form-input-hint">
|
<div class="form-input-hint">
|
||||||
Whether to show bookmark dates as relative (how long ago), or as absolute dates. Alternatively the date can be hidden.
|
Whether to show bookmark dates as relative (how long ago), or as absolute dates. Alternatively the date can
|
||||||
</div>
|
be hidden.
|
||||||
</div>
|
</div>
|
||||||
<div class="form-group">
|
</div>
|
||||||
<label for="{{ form.bookmark_link_target.id_for_label }}" class="form-label">Open bookmarks in</label>
|
<div class="form-group">
|
||||||
{{ form.bookmark_link_target|add_class:"form-select col-2 col-sm-12" }}
|
<label for="{{ form.bookmark_link_target.id_for_label }}" class="form-label">Open bookmarks in</label>
|
||||||
<div class="form-input-hint">
|
{{ form.bookmark_link_target|add_class:"form-select col-2 col-sm-12" }}
|
||||||
Whether to open bookmarks a new page or in the same page.
|
<div class="form-input-hint">
|
||||||
</div>
|
Whether to open bookmarks a new page or in the same page.
|
||||||
</div>
|
</div>
|
||||||
<div class="form-group">
|
</div>
|
||||||
<label for="{{ form.web_archive_integration.id_for_label }}" class="form-label">Internet Archive
|
<div class="form-group">
|
||||||
integration</label>
|
<label for="{{ form.web_archive_integration.id_for_label }}" class="form-label">Internet Archive
|
||||||
{{ form.web_archive_integration|add_class:"form-select col-2 col-sm-12" }}
|
integration</label>
|
||||||
<div class="form-input-hint">
|
{{ form.web_archive_integration|add_class:"form-select col-2 col-sm-12" }}
|
||||||
Enabling this feature will automatically create snapshots of bookmarked websites on the <a
|
<div class="form-input-hint">
|
||||||
href="https://web.archive.org/" target="_blank" rel="noopener">Internet Archive Wayback
|
Enabling this feature will automatically create snapshots of bookmarked websites on the <a
|
||||||
Machine</a>. This allows
|
href="https://web.archive.org/" target="_blank" rel="noopener">Internet Archive Wayback
|
||||||
to preserve, and later access, the website as it was at the point in time it was bookmarked, in
|
Machine</a>.
|
||||||
case it goes offline or its content is modified.
|
This allows to preserve, and later access the website as it was at the point in time it was bookmarked, in
|
||||||
</div>
|
case it goes offline or its content is modified.
|
||||||
</div>
|
Please consider donating to the <a href="https://archive.org/donate/index.php" target="_blank"
|
||||||
<div class="form-group">
|
rel="noopener">Internet Archive</a> if you make use of this feature.
|
||||||
<label for="{{ form.enable_sharing.id_for_label }}" class="form-checkbox">
|
</div>
|
||||||
{{ form.enable_sharing }}
|
</div>
|
||||||
<i class="form-icon"></i> Enable bookmark sharing
|
<div class="form-group">
|
||||||
</label>
|
<label for="{{ form.enable_sharing.id_for_label }}" class="form-checkbox">
|
||||||
<div class="form-input-hint">
|
{{ form.enable_sharing }}
|
||||||
Allows to share bookmarks with other users, and to view shared bookmarks.
|
<i class="form-icon"></i> Enable bookmark sharing
|
||||||
Disabling this feature will hide all previously shared bookmarks from other users.
|
</label>
|
||||||
</div>
|
<div class="form-input-hint">
|
||||||
</div>
|
Allows to share bookmarks with other users, and to view shared bookmarks.
|
||||||
<div class="form-group">
|
Disabling this feature will hide all previously shared bookmarks from other users.
|
||||||
<input type="submit" value="Save" class="btn btn-primary mt-2">
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</form>
|
<div class="form-group">
|
||||||
</section>
|
<input type="submit" value="Save" class="btn btn-primary mt-2">
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
</section>
|
||||||
|
|
||||||
{# Import section #}
|
{# Import section #}
|
||||||
<section class="content-area">
|
<section class="content-area">
|
||||||
<h2>Import</h2>
|
<h2>Import</h2>
|
||||||
<p>Import bookmarks and tags in the Netscape HTML format. This will execute a sync where new bookmarks are
|
<p>Import bookmarks and tags in the Netscape HTML format. This will execute a sync where new bookmarks are
|
||||||
added and existing ones are updated.</p>
|
added and existing ones are updated.</p>
|
||||||
<form method="post" enctype="multipart/form-data" action="{% url 'bookmarks:settings.import' %}">
|
<form method="post" enctype="multipart/form-data" action="{% url 'bookmarks:settings.import' %}">
|
||||||
{% csrf_token %}
|
{% csrf_token %}
|
||||||
<div class="form-group">
|
<div class="form-group">
|
||||||
<div class="input-group col-8 col-md-12">
|
<div class="input-group col-8 col-md-12">
|
||||||
<input class="form-input" type="file" name="import_file">
|
<input class="form-input" type="file" name="import_file">
|
||||||
<input type="submit" class="input-group-btn col-2 btn btn-primary" value="Upload">
|
<input type="submit" class="input-group-btn col-2 btn btn-primary" value="Upload">
|
||||||
</div>
|
</div>
|
||||||
{% if import_success_message %}
|
{% if import_success_message %}
|
||||||
<div class="has-success">
|
<div class="has-success">
|
||||||
<p class="form-input-hint">
|
<p class="form-input-hint">
|
||||||
{{ import_success_message }}
|
{{ import_success_message }}
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% if import_errors_message %}
|
{% if import_errors_message %}
|
||||||
<div class="has-error">
|
<div class="has-error">
|
||||||
<p class="form-input-hint">
|
<p class="form-input-hint">
|
||||||
{{ import_errors_message }}
|
{{ import_errors_message }}
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
</form>
|
</form>
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
{# Export section #}
|
{# Export section #}
|
||||||
<section class="content-area">
|
<section class="content-area">
|
||||||
<h2>Export</h2>
|
<h2>Export</h2>
|
||||||
<p>Export all bookmarks in Netscape HTML format.</p>
|
<p>Export all bookmarks in Netscape HTML format.</p>
|
||||||
<a class="btn btn-primary" href="{% url 'bookmarks:settings.export' %}">Download (.html)</a>
|
<a class="btn btn-primary" href="{% url 'bookmarks:settings.export' %}">Download (.html)</a>
|
||||||
{% if export_error %}
|
{% if export_error %}
|
||||||
<div class="has-error">
|
<div class="has-error">
|
||||||
<p class="form-input-hint">
|
<p class="form-input-hint">
|
||||||
{{ export_error }}
|
{{ export_error }}
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
{# About section #}
|
{# About section #}
|
||||||
<section class="content-area about">
|
<section class="content-area about">
|
||||||
<h2>About</h2>
|
<h2>About</h2>
|
||||||
<table class="table">
|
<table class="table">
|
||||||
<tbody>
|
<tbody>
|
||||||
<tr>
|
<tr>
|
||||||
<td>Version</td>
|
<td>Version</td>
|
||||||
<td>{{ version_info }}</td>
|
<td>{{ version_info }}</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td rowspan="3" style="vertical-align: top">Links</td>
|
<td rowspan="3" style="vertical-align: top">Links</td>
|
||||||
<td><a href="https://github.com/sissbruecker/linkding/"
|
<td><a href="https://github.com/sissbruecker/linkding/"
|
||||||
target="_blank">GitHub</a></td>
|
target="_blank">GitHub</a></td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td><a href="https://github.com/sissbruecker/linkding#documentation"
|
<td><a href="https://github.com/sissbruecker/linkding#documentation"
|
||||||
target="_blank">Documentation</a></td>
|
target="_blank">Documentation</a></td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td><a href="https://github.com/sissbruecker/linkding/blob/master/CHANGELOG.md"
|
<td><a href="https://github.com/sissbruecker/linkding/blob/master/CHANGELOG.md"
|
||||||
target="_blank">Changelog</a></td>
|
target="_blank">Changelog</a></td>
|
||||||
</tr>
|
</tr>
|
||||||
</tbody>
|
</tbody>
|
||||||
</table>
|
</table>
|
||||||
</section>
|
</section>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
@@ -1,25 +1,51 @@
|
|||||||
|
import datetime
|
||||||
|
from dataclasses import dataclass
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
|
|
||||||
import waybackpy
|
import waybackpy
|
||||||
from background_task.models import Task
|
from background_task.models import Task
|
||||||
from django.contrib.auth.models import User
|
from django.contrib.auth.models import User
|
||||||
from django.test import TestCase, override_settings
|
from django.test import TestCase, override_settings
|
||||||
|
from waybackpy.exceptions import WaybackError
|
||||||
|
|
||||||
from bookmarks.models import Bookmark, UserProfile
|
import bookmarks.services.wayback
|
||||||
|
from bookmarks.models import UserProfile
|
||||||
from bookmarks.services import tasks
|
from bookmarks.services import tasks
|
||||||
from bookmarks.tests.helpers import BookmarkFactoryMixin, disable_logging
|
from bookmarks.tests.helpers import BookmarkFactoryMixin, disable_logging
|
||||||
|
|
||||||
|
|
||||||
class MockWaybackMachineSaveAPI:
|
class MockWaybackMachineSaveAPI:
|
||||||
def __init__(self, archive_url: str):
|
def __init__(self, archive_url: str = 'https://example.com/created_snapshot', fail_on_save: bool = False):
|
||||||
self.archive_url = archive_url
|
self.archive_url = archive_url
|
||||||
|
self.fail_on_save = fail_on_save
|
||||||
|
|
||||||
def save(self):
|
def save(self):
|
||||||
|
if self.fail_on_save:
|
||||||
|
raise WaybackError
|
||||||
return self
|
return self
|
||||||
|
|
||||||
class MockWaybackUrlWithSaveError:
|
|
||||||
def save(self):
|
@dataclass
|
||||||
raise NotImplementedError
|
class MockCdxSnapshot:
|
||||||
|
archive_url: str
|
||||||
|
datetime_timestamp: datetime.datetime
|
||||||
|
|
||||||
|
|
||||||
|
class MockWaybackMachineCDXServerAPI:
|
||||||
|
def __init__(self,
|
||||||
|
archive_url: str = 'https://example.com/newest_snapshot',
|
||||||
|
has_no_snapshot=False,
|
||||||
|
fail_loading_snapshot=False):
|
||||||
|
self.archive_url = archive_url
|
||||||
|
self.has_no_snapshot = has_no_snapshot
|
||||||
|
self.fail_loading_snapshot = fail_loading_snapshot
|
||||||
|
|
||||||
|
def newest(self):
|
||||||
|
if self.has_no_snapshot:
|
||||||
|
return None
|
||||||
|
if self.fail_loading_snapshot:
|
||||||
|
raise WaybackError
|
||||||
|
return MockCdxSnapshot(self.archive_url, datetime.datetime.now())
|
||||||
|
|
||||||
|
|
||||||
class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
|
class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
|
||||||
@@ -50,49 +76,130 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
|
|||||||
def test_create_web_archive_snapshot_should_update_snapshot_url(self):
|
def test_create_web_archive_snapshot_should_update_snapshot_url(self):
|
||||||
bookmark = self.setup_bookmark()
|
bookmark = self.setup_bookmark()
|
||||||
|
|
||||||
with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI('https://example.com')):
|
with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI()):
|
||||||
tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, False)
|
tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, False)
|
||||||
self.run_pending_task(tasks._create_web_archive_snapshot_task)
|
self.run_pending_task(tasks._create_web_archive_snapshot_task)
|
||||||
bookmark.refresh_from_db()
|
bookmark.refresh_from_db()
|
||||||
|
|
||||||
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com')
|
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com/created_snapshot')
|
||||||
|
|
||||||
def test_create_web_archive_snapshot_should_handle_missing_bookmark_id(self):
|
def test_create_web_archive_snapshot_should_handle_missing_bookmark_id(self):
|
||||||
with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI('https://example.com')) as mock_wayback_url:
|
with patch.object(waybackpy, 'WaybackMachineSaveAPI',
|
||||||
|
return_value=MockWaybackMachineSaveAPI()) as mock_save_api:
|
||||||
tasks._create_web_archive_snapshot_task(123, False)
|
tasks._create_web_archive_snapshot_task(123, False)
|
||||||
self.run_pending_task(tasks._create_web_archive_snapshot_task)
|
self.run_pending_task(tasks._create_web_archive_snapshot_task)
|
||||||
|
|
||||||
mock_wayback_url.assert_not_called()
|
mock_save_api.assert_not_called()
|
||||||
|
|
||||||
def test_create_web_archive_snapshot_should_handle_wayback_save_error(self):
|
|
||||||
bookmark = self.setup_bookmark()
|
|
||||||
|
|
||||||
with patch.object(waybackpy, 'WaybackMachineSaveAPI',
|
|
||||||
return_value=MockWaybackUrlWithSaveError()):
|
|
||||||
with self.assertRaises(NotImplementedError):
|
|
||||||
tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, False)
|
|
||||||
self.run_pending_task(tasks._create_web_archive_snapshot_task)
|
|
||||||
|
|
||||||
def test_create_web_archive_snapshot_should_skip_if_snapshot_exists(self):
|
def test_create_web_archive_snapshot_should_skip_if_snapshot_exists(self):
|
||||||
bookmark = self.setup_bookmark(web_archive_snapshot_url='https://example.com')
|
bookmark = self.setup_bookmark(web_archive_snapshot_url='https://example.com')
|
||||||
|
|
||||||
with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI('https://other.com')):
|
with patch.object(waybackpy, 'WaybackMachineSaveAPI',
|
||||||
|
return_value=MockWaybackMachineSaveAPI()) as mock_save_api:
|
||||||
tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, False)
|
tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, False)
|
||||||
self.run_pending_task(tasks._create_web_archive_snapshot_task)
|
self.run_pending_task(tasks._create_web_archive_snapshot_task)
|
||||||
bookmark.refresh_from_db()
|
|
||||||
|
|
||||||
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com')
|
mock_save_api.assert_not_called()
|
||||||
|
|
||||||
def test_create_web_archive_snapshot_should_force_update_snapshot(self):
|
def test_create_web_archive_snapshot_should_force_update_snapshot(self):
|
||||||
bookmark = self.setup_bookmark(web_archive_snapshot_url='https://example.com')
|
bookmark = self.setup_bookmark(web_archive_snapshot_url='https://example.com')
|
||||||
|
|
||||||
with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI('https://other.com')):
|
with patch.object(waybackpy, 'WaybackMachineSaveAPI',
|
||||||
|
return_value=MockWaybackMachineSaveAPI('https://other.com')):
|
||||||
tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, True)
|
tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, True)
|
||||||
self.run_pending_task(tasks._create_web_archive_snapshot_task)
|
self.run_pending_task(tasks._create_web_archive_snapshot_task)
|
||||||
bookmark.refresh_from_db()
|
bookmark.refresh_from_db()
|
||||||
|
|
||||||
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://other.com')
|
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://other.com')
|
||||||
|
|
||||||
|
def test_create_web_archive_snapshot_should_use_newest_snapshot_as_fallback(self):
|
||||||
|
bookmark = self.setup_bookmark()
|
||||||
|
|
||||||
|
with patch.object(waybackpy, 'WaybackMachineSaveAPI',
|
||||||
|
return_value=MockWaybackMachineSaveAPI(fail_on_save=True)):
|
||||||
|
with patch.object(bookmarks.services.wayback, 'CustomWaybackMachineCDXServerAPI',
|
||||||
|
return_value=MockWaybackMachineCDXServerAPI()):
|
||||||
|
tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, False)
|
||||||
|
self.run_pending_task(tasks._create_web_archive_snapshot_task)
|
||||||
|
|
||||||
|
bookmark.refresh_from_db()
|
||||||
|
self.assertEqual('https://example.com/newest_snapshot', bookmark.web_archive_snapshot_url)
|
||||||
|
|
||||||
|
def test_create_web_archive_snapshot_should_ignore_missing_newest_snapshot(self):
|
||||||
|
bookmark = self.setup_bookmark()
|
||||||
|
|
||||||
|
with patch.object(waybackpy, 'WaybackMachineSaveAPI',
|
||||||
|
return_value=MockWaybackMachineSaveAPI(fail_on_save=True)):
|
||||||
|
with patch.object(bookmarks.services.wayback, 'CustomWaybackMachineCDXServerAPI',
|
||||||
|
return_value=MockWaybackMachineCDXServerAPI(has_no_snapshot=True)):
|
||||||
|
tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, False)
|
||||||
|
self.run_pending_task(tasks._create_web_archive_snapshot_task)
|
||||||
|
|
||||||
|
bookmark.refresh_from_db()
|
||||||
|
self.assertEqual('', bookmark.web_archive_snapshot_url)
|
||||||
|
|
||||||
|
def test_create_web_archive_snapshot_should_ignore_newest_snapshot_errors(self):
|
||||||
|
bookmark = self.setup_bookmark()
|
||||||
|
|
||||||
|
with patch.object(waybackpy, 'WaybackMachineSaveAPI',
|
||||||
|
return_value=MockWaybackMachineSaveAPI(fail_on_save=True)):
|
||||||
|
with patch.object(bookmarks.services.wayback, 'CustomWaybackMachineCDXServerAPI',
|
||||||
|
return_value=MockWaybackMachineCDXServerAPI(fail_loading_snapshot=True)):
|
||||||
|
tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, False)
|
||||||
|
self.run_pending_task(tasks._create_web_archive_snapshot_task)
|
||||||
|
|
||||||
|
bookmark.refresh_from_db()
|
||||||
|
self.assertEqual('', bookmark.web_archive_snapshot_url)
|
||||||
|
|
||||||
|
def test_load_web_archive_snapshot_should_update_snapshot_url(self):
|
||||||
|
bookmark = self.setup_bookmark()
|
||||||
|
|
||||||
|
with patch.object(bookmarks.services.wayback, 'CustomWaybackMachineCDXServerAPI',
|
||||||
|
return_value=MockWaybackMachineCDXServerAPI()):
|
||||||
|
tasks._load_web_archive_snapshot_task(bookmark.id)
|
||||||
|
self.run_pending_task(tasks._load_web_archive_snapshot_task)
|
||||||
|
|
||||||
|
bookmark.refresh_from_db()
|
||||||
|
self.assertEqual('https://example.com/newest_snapshot', bookmark.web_archive_snapshot_url)
|
||||||
|
|
||||||
|
def test_load_web_archive_snapshot_should_handle_missing_bookmark_id(self):
|
||||||
|
with patch.object(bookmarks.services.wayback, 'CustomWaybackMachineCDXServerAPI',
|
||||||
|
return_value=MockWaybackMachineCDXServerAPI()) as mock_cdx_api:
|
||||||
|
tasks._load_web_archive_snapshot_task(123)
|
||||||
|
self.run_pending_task(tasks._load_web_archive_snapshot_task)
|
||||||
|
|
||||||
|
mock_cdx_api.assert_not_called()
|
||||||
|
|
||||||
|
def test_load_web_archive_snapshot_should_skip_if_snapshot_exists(self):
|
||||||
|
bookmark = self.setup_bookmark(web_archive_snapshot_url='https://example.com')
|
||||||
|
|
||||||
|
with patch.object(bookmarks.services.wayback, 'CustomWaybackMachineCDXServerAPI',
|
||||||
|
return_value=MockWaybackMachineCDXServerAPI()) as mock_cdx_api:
|
||||||
|
tasks._load_web_archive_snapshot_task(bookmark.id)
|
||||||
|
self.run_pending_task(tasks._load_web_archive_snapshot_task)
|
||||||
|
|
||||||
|
mock_cdx_api.assert_not_called()
|
||||||
|
|
||||||
|
def test_load_web_archive_snapshot_should_handle_missing_snapshot(self):
|
||||||
|
bookmark = self.setup_bookmark()
|
||||||
|
|
||||||
|
with patch.object(bookmarks.services.wayback, 'CustomWaybackMachineCDXServerAPI',
|
||||||
|
return_value=MockWaybackMachineCDXServerAPI(has_no_snapshot=True)):
|
||||||
|
tasks._load_web_archive_snapshot_task(bookmark.id)
|
||||||
|
self.run_pending_task(tasks._load_web_archive_snapshot_task)
|
||||||
|
|
||||||
|
self.assertEqual('', bookmark.web_archive_snapshot_url)
|
||||||
|
|
||||||
|
def test_load_web_archive_snapshot_should_handle_wayback_errors(self):
|
||||||
|
bookmark = self.setup_bookmark()
|
||||||
|
|
||||||
|
with patch.object(bookmarks.services.wayback, 'CustomWaybackMachineCDXServerAPI',
|
||||||
|
return_value=MockWaybackMachineCDXServerAPI(fail_loading_snapshot=True)):
|
||||||
|
tasks._load_web_archive_snapshot_task(bookmark.id)
|
||||||
|
self.run_pending_task(tasks._load_web_archive_snapshot_task)
|
||||||
|
|
||||||
|
self.assertEqual('', bookmark.web_archive_snapshot_url)
|
||||||
|
|
||||||
@override_settings(LD_DISABLE_BACKGROUND_TASKS=True)
|
@override_settings(LD_DISABLE_BACKGROUND_TASKS=True)
|
||||||
def test_create_web_archive_snapshot_should_not_run_when_background_tasks_are_disabled(self):
|
def test_create_web_archive_snapshot_should_not_run_when_background_tasks_are_disabled(self):
|
||||||
bookmark = self.setup_bookmark()
|
bookmark = self.setup_bookmark()
|
||||||
@@ -109,33 +216,23 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
|
|||||||
|
|
||||||
self.assertEqual(Task.objects.count(), 0)
|
self.assertEqual(Task.objects.count(), 0)
|
||||||
|
|
||||||
def test_schedule_bookmarks_without_snapshots_should_create_snapshot_task_for_all_bookmarks_without_snapshot(self):
|
def test_schedule_bookmarks_without_snapshots_should_load_snapshot_for_all_bookmarks_without_snapshot(self):
|
||||||
user = self.get_or_create_test_user()
|
user = self.get_or_create_test_user()
|
||||||
self.setup_bookmark()
|
self.setup_bookmark()
|
||||||
self.setup_bookmark()
|
self.setup_bookmark()
|
||||||
self.setup_bookmark()
|
self.setup_bookmark()
|
||||||
|
|
||||||
with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI('https://example.com')):
|
|
||||||
tasks.schedule_bookmarks_without_snapshots(user)
|
|
||||||
self.run_pending_task(tasks._schedule_bookmarks_without_snapshots_task)
|
|
||||||
self.run_all_pending_tasks(tasks._create_web_archive_snapshot_task)
|
|
||||||
|
|
||||||
for bookmark in Bookmark.objects.all():
|
|
||||||
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com')
|
|
||||||
|
|
||||||
def test_schedule_bookmarks_without_snapshots_should_not_update_bookmarks_with_existing_snapshot(self):
|
|
||||||
user = self.get_or_create_test_user()
|
|
||||||
self.setup_bookmark(web_archive_snapshot_url='https://example.com')
|
self.setup_bookmark(web_archive_snapshot_url='https://example.com')
|
||||||
self.setup_bookmark(web_archive_snapshot_url='https://example.com')
|
self.setup_bookmark(web_archive_snapshot_url='https://example.com')
|
||||||
self.setup_bookmark(web_archive_snapshot_url='https://example.com')
|
self.setup_bookmark(web_archive_snapshot_url='https://example.com')
|
||||||
|
|
||||||
with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI('https://other.com')):
|
tasks.schedule_bookmarks_without_snapshots(user)
|
||||||
tasks.schedule_bookmarks_without_snapshots(user)
|
self.run_pending_task(tasks._schedule_bookmarks_without_snapshots_task)
|
||||||
self.run_pending_task(tasks._schedule_bookmarks_without_snapshots_task)
|
|
||||||
self.run_all_pending_tasks(tasks._create_web_archive_snapshot_task)
|
|
||||||
|
|
||||||
for bookmark in Bookmark.objects.all():
|
task_list = Task.objects.all()
|
||||||
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com')
|
self.assertEqual(task_list.count(), 3)
|
||||||
|
|
||||||
|
for task in task_list:
|
||||||
|
self.assertEqual(task.task_name, 'bookmarks.services.tasks._load_web_archive_snapshot_task')
|
||||||
|
|
||||||
def test_schedule_bookmarks_without_snapshots_should_only_update_user_owned_bookmarks(self):
|
def test_schedule_bookmarks_without_snapshots_should_only_update_user_owned_bookmarks(self):
|
||||||
user = self.get_or_create_test_user()
|
user = self.get_or_create_test_user()
|
||||||
@@ -147,16 +244,11 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
|
|||||||
self.setup_bookmark(user=other_user)
|
self.setup_bookmark(user=other_user)
|
||||||
self.setup_bookmark(user=other_user)
|
self.setup_bookmark(user=other_user)
|
||||||
|
|
||||||
with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI('https://example.com')):
|
tasks.schedule_bookmarks_without_snapshots(user)
|
||||||
tasks.schedule_bookmarks_without_snapshots(user)
|
self.run_pending_task(tasks._schedule_bookmarks_without_snapshots_task)
|
||||||
self.run_pending_task(tasks._schedule_bookmarks_without_snapshots_task)
|
|
||||||
self.run_all_pending_tasks(tasks._create_web_archive_snapshot_task)
|
|
||||||
|
|
||||||
for bookmark in Bookmark.objects.all().filter(owner=user):
|
task_list = Task.objects.all()
|
||||||
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com')
|
self.assertEqual(task_list.count(), 3)
|
||||||
|
|
||||||
for bookmark in Bookmark.objects.all().filter(owner=other_user):
|
|
||||||
self.assertEqual(bookmark.web_archive_snapshot_url, '')
|
|
||||||
|
|
||||||
@override_settings(LD_DISABLE_BACKGROUND_TASKS=True)
|
@override_settings(LD_DISABLE_BACKGROUND_TASKS=True)
|
||||||
def test_schedule_bookmarks_without_snapshots_should_not_run_when_background_tasks_are_disabled(self):
|
def test_schedule_bookmarks_without_snapshots_should_not_run_when_background_tasks_are_disabled(self):
|
||||||
|
Reference in New Issue
Block a user