diff --git a/bookmarks/services/importer.py b/bookmarks/services/importer.py
index 5a35427..d4bbd5a 100644
--- a/bookmarks/services/importer.py
+++ b/bookmarks/services/importer.py
@@ -79,8 +79,6 @@ def import_netscape_html(
for batch in batches:
_import_batch(batch, user, options, tag_cache, result)
- # Create snapshots for newly imported bookmarks
- tasks.schedule_bookmarks_without_snapshots(user)
# Load favicons for newly imported bookmarks
tasks.schedule_bookmarks_without_favicons(user)
# Load previews for newly imported bookmarks
diff --git a/bookmarks/services/tasks.py b/bookmarks/services/tasks.py
index a6de13f..a0655fe 100644
--- a/bookmarks/services/tasks.py
+++ b/bookmarks/services/tasks.py
@@ -12,9 +12,8 @@ from django.utils import timezone, formats
from huey import crontab
from huey.contrib.djhuey import HUEY as huey
from huey.exceptions import TaskLockedException
-from waybackpy.exceptions import WaybackError, TooManyRequestsError, NoCDXRecordFound
+from waybackpy.exceptions import WaybackError, TooManyRequestsError
-import bookmarks.services.wayback
from bookmarks.models import Bookmark, BookmarkAsset, UserProfile
from bookmarks.services import favicon_loader, singlefile, preview_image_loader
from bookmarks.services.website_loader import DEFAULT_USER_AGENT
@@ -66,29 +65,6 @@ def create_web_archive_snapshot(user: User, bookmark: Bookmark, force_update: bo
_create_web_archive_snapshot_task(bookmark.id, force_update)
-def _load_newest_snapshot(bookmark: Bookmark):
- try:
- logger.info(f"Load existing snapshot for bookmark. url={bookmark.url}")
- cdx_api = bookmarks.services.wayback.CustomWaybackMachineCDXServerAPI(
- bookmark.url
- )
- existing_snapshot = cdx_api.newest()
-
- if existing_snapshot:
- bookmark.web_archive_snapshot_url = existing_snapshot.archive_url
- bookmark.save(update_fields=["web_archive_snapshot_url"])
- logger.info(
- f"Using newest snapshot. url={bookmark.url} from={existing_snapshot.datetime_timestamp}"
- )
-
- except NoCDXRecordFound:
- logger.info(f"Could not find any snapshots for bookmark. url={bookmark.url}")
- except WaybackError as error:
- logger.error(
- f"Failed to load existing snapshot. url={bookmark.url}", exc_info=error
- )
-
-
def _create_snapshot(bookmark: Bookmark):
logger.info(f"Create new snapshot for bookmark. url={bookmark.url}...")
archive = waybackpy.WaybackMachineSaveAPI(
@@ -117,48 +93,27 @@ def _create_web_archive_snapshot_task(bookmark_id: int, force_update: bool):
return
except TooManyRequestsError:
logger.error(
- f"Failed to create snapshot due to rate limiting, trying to load newest snapshot as fallback. url={bookmark.url}"
+ f"Failed to create snapshot due to rate limiting. url={bookmark.url}"
)
except WaybackError as error:
logger.error(
- f"Failed to create snapshot, trying to load newest snapshot as fallback. url={bookmark.url}",
+ f"Failed to create snapshot. url={bookmark.url}",
exc_info=error,
)
- # Load the newest snapshot as fallback
- _load_newest_snapshot(bookmark)
-
@task()
def _load_web_archive_snapshot_task(bookmark_id: int):
- try:
- bookmark = Bookmark.objects.get(id=bookmark_id)
- except Bookmark.DoesNotExist:
- return
- # Skip if snapshot exists
- if bookmark.web_archive_snapshot_url:
- return
- # Load the newest snapshot
- _load_newest_snapshot(bookmark)
-
-
-def schedule_bookmarks_without_snapshots(user: User):
- if is_web_archive_integration_active(user):
- _schedule_bookmarks_without_snapshots_task(user.id)
+ # Loading snapshots from CDX API has been removed, keeping the task function
+ # for now to prevent errors when huey tries to run the task
+ pass
@task()
def _schedule_bookmarks_without_snapshots_task(user_id: int):
- user = get_user_model().objects.get(id=user_id)
- bookmarks_without_snapshots = Bookmark.objects.filter(
- web_archive_snapshot_url__exact="", owner=user
- )
-
- # TODO: Implement bulk task creation
- for bookmark in bookmarks_without_snapshots:
- # To prevent rate limit errors from the Wayback API only try to load the latest snapshots instead of creating
- # new ones when processing bookmarks in bulk
- _load_web_archive_snapshot_task(bookmark.id)
+ # Loading snapshots from CDX API has been removed, keeping the task function
+ # for now to prevent errors when huey tries to run the task
+ pass
def is_favicon_feature_active(user: User) -> bool:
diff --git a/bookmarks/services/wayback.py b/bookmarks/services/wayback.py
index b527b07..aaeb99d 100644
--- a/bookmarks/services/wayback.py
+++ b/bookmarks/services/wayback.py
@@ -1,42 +1,20 @@
-import time
-from typing import Dict
+import datetime
-import waybackpy
-import waybackpy.utils
-from waybackpy.exceptions import NoCDXRecordFound
+from django.utils import timezone
-class CustomWaybackMachineCDXServerAPI(waybackpy.WaybackMachineCDXServerAPI):
+def generate_fallback_webarchive_url(
+ url: str, timestamp: datetime.datetime
+) -> str | None:
"""
- Customized WaybackMachineCDXServerAPI to work around some issues with retrieving the newest snapshot.
- See https://github.com/akamhy/waybackpy/issues/176
+ Generate a URL to the web archive for the given URL and timestamp.
+ A snapshot for the specific timestamp might not exist, in which case the
+ web archive will show the closest snapshot to the given timestamp.
+ If there is no snapshot at all the URL will be invalid.
"""
+ if not url:
+ return None
+ if not timestamp:
+ timestamp = timezone.now()
- def newest(self):
- unix_timestamp = int(time.time())
- self.closest = waybackpy.utils.unix_timestamp_to_wayback_timestamp(
- unix_timestamp
- )
- self.sort = "closest"
- self.limit = -5
-
- newest_snapshot = None
- for snapshot in self.snapshots():
- newest_snapshot = snapshot
- break
-
- if not newest_snapshot:
- raise NoCDXRecordFound(
- "Wayback Machine's CDX server did not return any records "
- + "for the query. The URL may not have any archives "
- + " on the Wayback Machine or the URL may have been recently "
- + "archived and is still not available on the CDX server."
- )
-
- return newest_snapshot
-
- def add_payload(self, payload: Dict[str, str]) -> None:
- super().add_payload(payload)
- # Set fastLatest query param, as we are only using this API to get the latest snapshot and using fastLatest
- # makes searching for latest snapshots faster
- payload["fastLatest"] = "true"
+ return f"https://web.archive.org/web/{timestamp.strftime('%Y%m%d%H%M%S')}/{url}"
diff --git a/bookmarks/signals.py b/bookmarks/signals.py
index 8507c84..e8ca82a 100644
--- a/bookmarks/signals.py
+++ b/bookmarks/signals.py
@@ -1,15 +1,7 @@
from django.conf import settings
-from django.contrib.auth import user_logged_in
from django.db.backends.signals import connection_created
from django.dispatch import receiver
-from bookmarks.services import tasks
-
-
-@receiver(user_logged_in)
-def user_logged_in(sender, request, user, **kwargs):
- tasks.schedule_bookmarks_without_snapshots(user)
-
@receiver(connection_created)
def extend_sqlite(connection=None, **kwargs):
diff --git a/bookmarks/tests/test_bookmark_index_view.py b/bookmarks/tests/test_bookmark_index_view.py
index f7be4a1..608f281 100644
--- a/bookmarks/tests/test_bookmark_index_view.py
+++ b/bookmarks/tests/test_bookmark_index_view.py
@@ -6,11 +6,7 @@ from django.test import TestCase
from django.urls import reverse
from bookmarks.models import Bookmark, BookmarkSearch, Tag, UserProfile
-from bookmarks.tests.helpers import (
- BookmarkFactoryMixin,
- HtmlTestMixin,
- collapse_whitespace,
-)
+from bookmarks.tests.helpers import BookmarkFactoryMixin, HtmlTestMixin
class BookmarkIndexViewTestCase(TestCase, BookmarkFactoryMixin, HtmlTestMixin):
diff --git a/bookmarks/tests/test_bookmarks_list_template.py b/bookmarks/tests/test_bookmarks_list_template.py
index dc7522e..a8658f4 100644
--- a/bookmarks/tests/test_bookmarks_list_template.py
+++ b/bookmarks/tests/test_bookmarks_list_template.py
@@ -1,3 +1,4 @@
+import datetime
from typing import Type
from dateutil.relativedelta import relativedelta
@@ -36,15 +37,6 @@ class BookmarkListTemplateTest(TestCase, BookmarkFactoryMixin, HtmlTestMixin):
html,
)
- def assertDateLabel(self, html: str, label_content: str):
- self.assertInHTML(
- f"""
- {label_content}
- |
- """,
- html,
- )
-
def assertWebArchiveLink(
self, html: str, label_content: str, url: str, link_target: str = "_blank"
):
@@ -465,15 +457,6 @@ class BookmarkListTemplateTest(TestCase, BookmarkFactoryMixin, HtmlTestMixin):
style = bookmark_list["style"]
self.assertIn("--ld-bookmark-description-max-lines:3;", style)
- def test_should_respect_absolute_date_setting(self):
- bookmark = self.setup_date_format_test(
- UserProfile.BOOKMARK_DATE_DISPLAY_ABSOLUTE
- )
- html = self.render_template()
- formatted_date = formats.date_format(bookmark.date_added, "SHORT_DATE_FORMAT")
-
- self.assertDateLabel(html, formatted_date)
-
def test_should_render_web_archive_link_with_absolute_date_setting(self):
bookmark = self.setup_date_format_test(
UserProfile.BOOKMARK_DATE_DISPLAY_ABSOLUTE,
@@ -486,12 +469,6 @@ class BookmarkListTemplateTest(TestCase, BookmarkFactoryMixin, HtmlTestMixin):
html, formatted_date, bookmark.web_archive_snapshot_url
)
- def test_should_respect_relative_date_setting(self):
- self.setup_date_format_test(UserProfile.BOOKMARK_DATE_DISPLAY_RELATIVE)
- html = self.render_template()
-
- self.assertDateLabel(html, "1 week ago")
-
def test_should_render_web_archive_link_with_relative_date_setting(self):
bookmark = self.setup_date_format_test(
UserProfile.BOOKMARK_DATE_DISPLAY_RELATIVE,
@@ -501,6 +478,27 @@ class BookmarkListTemplateTest(TestCase, BookmarkFactoryMixin, HtmlTestMixin):
self.assertWebArchiveLink(html, "1 week ago", bookmark.web_archive_snapshot_url)
+ def test_should_render_generated_web_archive_link_without_saved_snapshot_url(self):
+ user = self.get_or_create_test_user()
+ user.profile.bookmark_date_display = UserProfile.BOOKMARK_DATE_DISPLAY_ABSOLUTE
+ user.profile.save()
+
+ date_added = timezone.datetime(
+ 2023, 8, 11, 21, 45, 11, tzinfo=datetime.timezone.utc
+ )
+ bookmark = self.setup_bookmark(
+ url="https://example.com/article", added=date_added
+ )
+
+ html = self.render_template()
+ formatted_date = formats.date_format(bookmark.date_added, "SHORT_DATE_FORMAT")
+
+ self.assertWebArchiveLink(
+ html,
+ formatted_date,
+ "https://web.archive.org/web/20230811214511/https://example.com/article",
+ )
+
def test_bookmark_link_target_should_be_blank_by_default(self):
bookmark = self.setup_bookmark()
html = self.render_template()
diff --git a/bookmarks/tests/test_bookmarks_tasks.py b/bookmarks/tests/test_bookmarks_tasks.py
index ade2532..25a42c9 100644
--- a/bookmarks/tests/test_bookmarks_tasks.py
+++ b/bookmarks/tests/test_bookmarks_tasks.py
@@ -1,6 +1,4 @@
-import datetime
import os.path
-from dataclasses import dataclass
from unittest import mock
import waybackpy
@@ -10,8 +8,6 @@ from django.test import TestCase, override_settings
from huey.contrib.djhuey import HUEY as huey
from waybackpy.exceptions import WaybackError
-import bookmarks.services.favicon_loader
-import bookmarks.services.wayback
from bookmarks.models import BookmarkAsset, UserProfile
from bookmarks.services import tasks, singlefile
from bookmarks.tests.helpers import BookmarkFactoryMixin
@@ -29,12 +25,6 @@ def create_wayback_machine_save_api_mock(
return mock_api
-@dataclass
-class MockCdxSnapshot:
- archive_url: str
- datetime_timestamp: datetime.datetime
-
-
class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
def setUp(self):
@@ -50,17 +40,6 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
)
self.mock_save_api_patcher.start()
- self.mock_cdx_api = mock.Mock()
- self.mock_cdx_api.newest.return_value = MockCdxSnapshot(
- "https://example.com/newest_snapshot", datetime.datetime.now()
- )
- self.mock_cdx_api_patcher = mock.patch.object(
- bookmarks.services.wayback,
- "CustomWaybackMachineCDXServerAPI",
- return_value=self.mock_cdx_api,
- )
- self.mock_cdx_api_patcher.start()
-
self.mock_load_favicon_patcher = mock.patch(
"bookmarks.services.favicon_loader.load_favicon"
)
@@ -90,7 +69,6 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
def tearDown(self):
self.mock_save_api_patcher.stop()
- self.mock_cdx_api_patcher.stop()
self.mock_load_favicon_patcher.stop()
self.mock_singlefile_create_snapshot_patcher.stop()
self.mock_load_preview_image_patcher.stop()
@@ -142,45 +120,6 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
self.assertEqual(bookmark.web_archive_snapshot_url, "https://other.com")
- def test_create_web_archive_snapshot_should_use_newest_snapshot_as_fallback(self):
- bookmark = self.setup_bookmark()
- self.mock_save_api.save.side_effect = WaybackError
-
- tasks.create_web_archive_snapshot(
- self.get_or_create_test_user(), bookmark, False
- )
-
- bookmark.refresh_from_db()
- self.mock_cdx_api.newest.assert_called_once()
- self.assertEqual(
- "https://example.com/newest_snapshot",
- bookmark.web_archive_snapshot_url,
- )
-
- def test_create_web_archive_snapshot_should_ignore_missing_newest_snapshot(self):
- bookmark = self.setup_bookmark()
- self.mock_save_api.save.side_effect = WaybackError
- self.mock_cdx_api.newest.return_value = None
-
- tasks.create_web_archive_snapshot(
- self.get_or_create_test_user(), bookmark, False
- )
-
- bookmark.refresh_from_db()
- self.assertEqual("", bookmark.web_archive_snapshot_url)
-
- def test_create_web_archive_snapshot_should_ignore_newest_snapshot_errors(self):
- bookmark = self.setup_bookmark()
- self.mock_save_api.save.side_effect = WaybackError
- self.mock_cdx_api.newest.side_effect = WaybackError
-
- tasks.create_web_archive_snapshot(
- self.get_or_create_test_user(), bookmark, False
- )
-
- bookmark.refresh_from_db()
- self.assertEqual("", bookmark.web_archive_snapshot_url)
-
def test_create_web_archive_snapshot_should_not_save_stale_bookmark_data(self):
bookmark = self.setup_bookmark()
@@ -203,69 +142,6 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
bookmark.web_archive_snapshot_url,
)
- def test_load_web_archive_snapshot_should_update_snapshot_url(self):
- bookmark = self.setup_bookmark()
-
- tasks._load_web_archive_snapshot_task(bookmark.id)
-
- bookmark.refresh_from_db()
-
- self.assertEqual(self.executed_count(), 1)
- self.mock_cdx_api.newest.assert_called_once()
- self.assertEqual(
- "https://example.com/newest_snapshot", bookmark.web_archive_snapshot_url
- )
-
- def test_load_web_archive_snapshot_should_handle_missing_bookmark_id(self):
- tasks._load_web_archive_snapshot_task(123)
-
- self.assertEqual(self.executed_count(), 1)
- self.mock_cdx_api.newest.assert_not_called()
-
- def test_load_web_archive_snapshot_should_skip_if_snapshot_exists(self):
- bookmark = self.setup_bookmark(web_archive_snapshot_url="https://example.com")
-
- tasks._load_web_archive_snapshot_task(bookmark.id)
-
- self.assertEqual(self.executed_count(), 1)
- self.mock_cdx_api.newest.assert_not_called()
-
- def test_load_web_archive_snapshot_should_handle_missing_snapshot(self):
- bookmark = self.setup_bookmark()
- self.mock_cdx_api.newest.return_value = None
-
- tasks._load_web_archive_snapshot_task(bookmark.id)
-
- self.assertEqual("", bookmark.web_archive_snapshot_url)
-
- def test_load_web_archive_snapshot_should_handle_wayback_errors(self):
- bookmark = self.setup_bookmark()
- self.mock_cdx_api.newest.side_effect = WaybackError
-
- tasks._load_web_archive_snapshot_task(bookmark.id)
-
- self.assertEqual("", bookmark.web_archive_snapshot_url)
-
- def test_load_web_archive_snapshot_should_not_save_stale_bookmark_data(self):
- bookmark = self.setup_bookmark()
-
- # update bookmark during API call to check that saving
- # the snapshot does not overwrite updated bookmark data
- def mock_newest_impl():
- bookmark.title = "Updated title"
- bookmark.save()
- return mock.DEFAULT
-
- self.mock_cdx_api.newest.side_effect = mock_newest_impl
-
- tasks._load_web_archive_snapshot_task(bookmark.id)
- bookmark.refresh_from_db()
-
- self.assertEqual("Updated title", bookmark.title)
- self.assertEqual(
- "https://example.com/newest_snapshot", bookmark.web_archive_snapshot_url
- )
-
@override_settings(LD_DISABLE_BACKGROUND_TASKS=True)
def test_create_web_archive_snapshot_should_not_run_when_background_tasks_are_disabled(
self,
@@ -292,59 +168,6 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
self.assertEqual(self.executed_count(), 0)
- def test_schedule_bookmarks_without_snapshots_should_load_snapshot_for_all_bookmarks_without_snapshot(
- self,
- ):
- user = self.get_or_create_test_user()
- self.setup_bookmark()
- self.setup_bookmark()
- self.setup_bookmark()
- self.setup_bookmark(web_archive_snapshot_url="https://example.com")
- self.setup_bookmark(web_archive_snapshot_url="https://example.com")
- self.setup_bookmark(web_archive_snapshot_url="https://example.com")
-
- tasks.schedule_bookmarks_without_snapshots(user)
-
- self.assertEqual(self.executed_count(), 4)
- self.assertEqual(self.mock_cdx_api.newest.call_count, 3)
-
- def test_schedule_bookmarks_without_snapshots_should_only_update_user_owned_bookmarks(
- self,
- ):
- user = self.get_or_create_test_user()
- other_user = User.objects.create_user(
- "otheruser", "otheruser@example.com", "password123"
- )
- self.setup_bookmark()
- self.setup_bookmark()
- self.setup_bookmark()
- self.setup_bookmark(user=other_user)
- self.setup_bookmark(user=other_user)
- self.setup_bookmark(user=other_user)
-
- tasks.schedule_bookmarks_without_snapshots(user)
-
- self.assertEqual(self.mock_cdx_api.newest.call_count, 3)
-
- @override_settings(LD_DISABLE_BACKGROUND_TASKS=True)
- def test_schedule_bookmarks_without_snapshots_should_not_run_when_background_tasks_are_disabled(
- self,
- ):
- tasks.schedule_bookmarks_without_snapshots(self.user)
-
- self.assertEqual(self.executed_count(), 0)
-
- def test_schedule_bookmarks_without_snapshots_should_not_run_when_web_archive_integration_is_disabled(
- self,
- ):
- self.user.profile.web_archive_integration = (
- UserProfile.WEB_ARCHIVE_INTEGRATION_DISABLED
- )
- self.user.profile.save()
- tasks.schedule_bookmarks_without_snapshots(self.user)
-
- self.assertEqual(self.executed_count(), 0)
-
def test_load_favicon_should_create_favicon_file(self):
bookmark = self.setup_bookmark()
diff --git a/bookmarks/tests/test_importer.py b/bookmarks/tests/test_importer.py
index 163b9ad..56b97ce 100644
--- a/bookmarks/tests/test_importer.py
+++ b/bookmarks/tests/test_importer.py
@@ -444,17 +444,6 @@ class ImporterTestCase(TestCase, BookmarkFactoryMixin, ImportTestMixin):
self.assertEqual(Bookmark.objects.all()[0].description, "Example description")
self.assertEqual(Bookmark.objects.all()[0].notes, "Updated notes")
- def test_schedule_snapshot_creation(self):
- user = self.get_or_create_test_user()
- test_html = self.render_html(tags_html="")
-
- with patch.object(
- tasks, "schedule_bookmarks_without_snapshots"
- ) as mock_schedule_bookmarks_without_snapshots:
- import_netscape_html(test_html, user)
-
- mock_schedule_bookmarks_without_snapshots.assert_called_once_with(user)
-
def test_schedule_favicon_loading(self):
user = self.get_or_create_test_user()
test_html = self.render_html(tags_html="")
diff --git a/bookmarks/tests/test_signals.py b/bookmarks/tests/test_signals.py
deleted file mode 100644
index d86a717..0000000
--- a/bookmarks/tests/test_signals.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from unittest.mock import patch
-
-from django.test import TestCase
-
-from bookmarks.services import tasks
-from bookmarks.tests.helpers import BookmarkFactoryMixin
-
-
-class SignalsTestCase(TestCase, BookmarkFactoryMixin):
- def test_login_should_schedule_snapshot_creation(self):
- user = self.get_or_create_test_user()
-
- with patch.object(
- tasks, "schedule_bookmarks_without_snapshots"
- ) as mock_schedule_bookmarks_without_snapshots:
- self.client.force_login(user)
- mock_schedule_bookmarks_without_snapshots.assert_called_once_with(user)
diff --git a/bookmarks/views/partials/contexts.py b/bookmarks/views/partials/contexts.py
index f1412b9..871faab 100644
--- a/bookmarks/views/partials/contexts.py
+++ b/bookmarks/views/partials/contexts.py
@@ -1,12 +1,12 @@
+import re
import urllib.parse
from typing import Set, List
-import re
+from django.conf import settings
from django.core.handlers.wsgi import WSGIRequest
from django.core.paginator import Paginator
from django.db import models
from django.urls import reverse
-from django.conf import settings
from bookmarks import queries
from bookmarks import utils
@@ -18,6 +18,7 @@ from bookmarks.models import (
UserProfile,
Tag,
)
+from bookmarks.services.wayback import generate_fallback_webarchive_url
DEFAULT_PAGE_SIZE = 30
CJK_RE = re.compile(r"[\u4e00-\u9fff]+")
@@ -144,6 +145,10 @@ class BookmarkItem:
self.notes = bookmark.notes
self.tag_names = bookmark.tag_names
self.web_archive_snapshot_url = bookmark.web_archive_snapshot_url
+ if not self.web_archive_snapshot_url:
+ self.web_archive_snapshot_url = generate_fallback_webarchive_url(
+ bookmark.url, bookmark.date_added
+ )
self.favicon_file = bookmark.favicon_file
self.preview_image_file = bookmark.preview_image_file
self.is_archived = bookmark.is_archived