Prefer local snapshot over web archive link in bookmark list links (#1021)

* Prefer local snapshot over web archive link

* Update latest snapshot when it is deleted

* fix filter in migration

* improve migration performance
This commit is contained in:
Sascha Ißbrücker
2025-03-22 19:07:05 +01:00
committed by GitHub
parent 6d9d0e19f1
commit 6bba4f35c8
10 changed files with 300 additions and 10 deletions

View File

@@ -4,7 +4,7 @@ serve:
python manage.py runserver
tasks:
python manage.py process_tasks
python manage.py run_huey
test:
pytest -n auto

View File

@@ -237,6 +237,9 @@ class BookmarkAssetViewSet(
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)
def perform_destroy(self, instance):
assets.remove_asset(instance)
class TagViewSet(
viewsets.GenericViewSet,

View File

@@ -0,0 +1,46 @@
# Generated by Django 5.1.7 on 2025-03-22 12:28
import django.db.models.deletion
from django.db import migrations, models
from django.db.models import OuterRef, Subquery
def forwards(apps, schema_editor):
# Update the latest snapshot for each bookmark
Bookmark = apps.get_model("bookmarks", "bookmark")
BookmarkAsset = apps.get_model("bookmarks", "bookmarkasset")
latest_snapshots = (
BookmarkAsset.objects.filter(
bookmark=OuterRef("pk"), asset_type="snapshot", status="complete"
)
.order_by("-date_created")
.values("id")[:1]
)
Bookmark.objects.update(latest_snapshot_id=Subquery(latest_snapshots))
def reverse(apps, schema_editor):
pass
class Migration(migrations.Migration):
dependencies = [
("bookmarks", "0043_userprofile_collapse_side_panel"),
]
operations = [
migrations.AddField(
model_name="bookmark",
name="latest_snapshot",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="latest_snapshot",
to="bookmarks.bookmarkasset",
),
),
migrations.RunPython(forwards, reverse),
]

View File

@@ -71,6 +71,13 @@ class Bookmark(models.Model):
date_accessed = models.DateTimeField(blank=True, null=True)
owner = models.ForeignKey(User, on_delete=models.CASCADE)
tags = models.ManyToManyField(Tag)
latest_snapshot = models.ForeignKey(
"BookmarkAsset",
on_delete=models.SET_NULL,
null=True,
blank=True,
related_name="latest_snapshot",
)
@property
def resolved_title(self):

View File

@@ -51,6 +51,9 @@ def create_snapshot(asset: BookmarkAsset):
asset.file = filename
asset.gzip = True
asset.save()
asset.bookmark.latest_snapshot = asset
asset.bookmark.save()
except Exception as error:
asset.status = BookmarkAsset.STATUS_FAILURE
asset.save()
@@ -71,6 +74,9 @@ def upload_snapshot(bookmark: Bookmark, html: bytes):
asset.gzip = True
asset.save()
asset.bookmark.latest_snapshot = asset
asset.bookmark.save()
return asset
@@ -106,6 +112,27 @@ def upload_asset(bookmark: Bookmark, upload_file: UploadedFile):
raise e
def remove_asset(asset: BookmarkAsset):
# If this asset is the latest_snapshot for a bookmark, try to find the next most recent snapshot
bookmark = asset.bookmark
if bookmark and bookmark.latest_snapshot == asset:
latest = (
BookmarkAsset.objects.filter(
bookmark=bookmark,
asset_type=BookmarkAsset.TYPE_SNAPSHOT,
status=BookmarkAsset.STATUS_COMPLETE,
)
.exclude(pk=asset.pk)
.order_by("-date_created")
.first()
)
bookmark.latest_snapshot = latest
bookmark.save()
asset.delete()
def _generate_asset_filename(
asset: BookmarkAsset, filename: str, extension: str
) -> str:

View File

@@ -67,9 +67,9 @@
{% endif %}
<div class="actions">
{% if bookmark_item.display_date %}
{% if bookmark_item.web_archive_snapshot_url %}
<a href="{{ bookmark_item.web_archive_snapshot_url }}"
title="Show snapshot on the Internet Archive Wayback Machine"
{% if bookmark_item.snapshot_url %}
<a href="{{ bookmark_item.snapshot_url }}"
title="{{ bookmark_item.snapshot_title }}"
target="{{ bookmark_list.link_target }}"
rel="noopener">
{{ bookmark_item.display_date }}

View File

@@ -1,6 +1,7 @@
import datetime
import gzip
import os
from datetime import timedelta
from unittest import mock
from django.core.files.uploadedfile import SimpleUploadedFile
@@ -236,3 +237,175 @@ class AssetServiceTestCase(TestCase, BookmarkFactoryMixin):
# asset is not saved to the database
self.assertIsNone(BookmarkAsset.objects.first())
def test_create_snapshot_updates_bookmark_latest_snapshot(self):
bookmark = self.setup_bookmark(url="https://example.com")
first_asset = assets.create_snapshot_asset(bookmark)
first_asset.save()
assets.create_snapshot(first_asset)
bookmark.refresh_from_db()
self.assertEqual(bookmark.latest_snapshot, first_asset)
second_asset = assets.create_snapshot_asset(bookmark)
second_asset.save()
assets.create_snapshot(second_asset)
bookmark.refresh_from_db()
self.assertEqual(bookmark.latest_snapshot, second_asset)
def test_upload_snapshot_updates_bookmark_latest_snapshot(self):
bookmark = self.setup_bookmark(url="https://example.com")
first_asset = assets.upload_snapshot(bookmark, self.html_content.encode())
bookmark.refresh_from_db()
self.assertEqual(bookmark.latest_snapshot, first_asset)
second_asset = assets.upload_snapshot(bookmark, self.html_content.encode())
bookmark.refresh_from_db()
self.assertEqual(bookmark.latest_snapshot, second_asset)
self.assertNotEqual(bookmark.latest_snapshot, first_asset)
def test_create_snapshot_failure_does_not_update_latest_snapshot(self):
# Create a bookmark with an existing latest_snapshot
bookmark = self.setup_bookmark(url="https://example.com")
initial_snapshot = assets.upload_snapshot(bookmark, self.html_content.encode())
bookmark.refresh_from_db()
self.assertEqual(bookmark.latest_snapshot, initial_snapshot)
# Create a new snapshot asset that will fail
failing_asset = assets.create_snapshot_asset(bookmark)
failing_asset.save()
# Make the snapshot creation fail
self.mock_singlefile_create_snapshot.side_effect = Exception(
"Snapshot creation failed"
)
# Attempt to create a snapshot (which will fail)
with self.assertRaises(Exception):
assets.create_snapshot(failing_asset)
# Verify that the bookmark's latest_snapshot is still the initial snapshot
bookmark.refresh_from_db()
self.assertEqual(bookmark.latest_snapshot, initial_snapshot)
def test_upload_snapshot_failure_does_not_update_latest_snapshot(self):
# Create a bookmark with an existing latest_snapshot
bookmark = self.setup_bookmark(url="https://example.com")
initial_snapshot = assets.upload_snapshot(bookmark, self.html_content.encode())
bookmark.refresh_from_db()
self.assertEqual(bookmark.latest_snapshot, initial_snapshot)
# Make the gzip.open function fail
with mock.patch("gzip.open") as mock_gzip_open:
mock_gzip_open.side_effect = Exception("Upload failed")
# Attempt to upload a snapshot (which will fail)
with self.assertRaises(Exception):
assets.upload_snapshot(bookmark, b"New content")
# Verify that the bookmark's latest_snapshot is still the initial snapshot
bookmark.refresh_from_db()
self.assertEqual(bookmark.latest_snapshot, initial_snapshot)
def test_remove_latest_snapshot_updates_bookmark(self):
# Create a bookmark with multiple snapshots
bookmark = self.setup_bookmark()
# Create base time (1 hour ago)
base_time = timezone.now() - timedelta(hours=1)
# Create three snapshots with explicitly different dates
old_asset = self.setup_asset(
bookmark=bookmark,
asset_type=BookmarkAsset.TYPE_SNAPSHOT,
status=BookmarkAsset.STATUS_COMPLETE,
file="old_snapshot.html.gz",
date_created=base_time,
)
self.setup_asset_file(old_asset)
middle_asset = self.setup_asset(
bookmark=bookmark,
asset_type=BookmarkAsset.TYPE_SNAPSHOT,
status=BookmarkAsset.STATUS_COMPLETE,
file="middle_snapshot.html.gz",
date_created=base_time + timedelta(minutes=30),
)
self.setup_asset_file(middle_asset)
latest_asset = self.setup_asset(
bookmark=bookmark,
asset_type=BookmarkAsset.TYPE_SNAPSHOT,
status=BookmarkAsset.STATUS_COMPLETE,
file="latest_snapshot.html.gz",
date_created=base_time + timedelta(minutes=60),
)
self.setup_asset_file(latest_asset)
# Set the latest asset as the bookmark's latest_snapshot
bookmark.latest_snapshot = latest_asset
bookmark.save()
# Delete the latest snapshot
assets.remove_asset(latest_asset)
bookmark.refresh_from_db()
# Verify that middle_asset is now the latest_snapshot
self.assertEqual(bookmark.latest_snapshot, middle_asset)
# Delete the middle snapshot
assets.remove_asset(middle_asset)
bookmark.refresh_from_db()
# Verify that old_asset is now the latest_snapshot
self.assertEqual(bookmark.latest_snapshot, old_asset)
# Delete the last snapshot
assets.remove_asset(old_asset)
bookmark.refresh_from_db()
# Verify that latest_snapshot is now None
self.assertIsNone(bookmark.latest_snapshot)
def test_remove_non_latest_snapshot_does_not_affect_bookmark(self):
# Create a bookmark with multiple snapshots
bookmark = self.setup_bookmark()
# Create base time (1 hour ago)
base_time = timezone.now() - timedelta(hours=1)
# Create two snapshots with explicitly different dates
old_asset = self.setup_asset(
bookmark=bookmark,
asset_type=BookmarkAsset.TYPE_SNAPSHOT,
status=BookmarkAsset.STATUS_COMPLETE,
file="old_snapshot.html.gz",
date_created=base_time,
)
self.setup_asset_file(old_asset)
latest_asset = self.setup_asset(
bookmark=bookmark,
asset_type=BookmarkAsset.TYPE_SNAPSHOT,
status=BookmarkAsset.STATUS_COMPLETE,
file="latest_snapshot.html.gz",
date_created=base_time + timedelta(minutes=30),
)
self.setup_asset_file(latest_asset)
# Set the latest asset as the bookmark's latest_snapshot
bookmark.latest_snapshot = latest_asset
bookmark.save()
# Delete the old snapshot (not the latest)
assets.remove_asset(old_asset)
bookmark.refresh_from_db()
# Verify that latest_snapshot hasn't changed
self.assertEqual(bookmark.latest_snapshot, latest_asset)

View File

@@ -43,7 +43,7 @@ class BookmarkListTemplateTest(TestCase, BookmarkFactoryMixin, HtmlTestMixin):
self.assertInHTML(
f"""
<a href="{url}"
title="Show snapshot on the Internet Archive Wayback Machine" target="{link_target}" rel="noopener">
title="View snapshot on the Internet Archive Wayback Machine" target="{link_target}" rel="noopener">
{label_content}
</a>
<span>|</span>
@@ -559,6 +559,31 @@ class BookmarkListTemplateTest(TestCase, BookmarkFactoryMixin, HtmlTestMixin):
html, "1 week ago", bookmark.web_archive_snapshot_url, link_target="_self"
)
def test_should_render_latest_snapshot_link_if_one_exists(self):
bookmark = self.setup_date_format_test(
UserProfile.BOOKMARK_DATE_DISPLAY_ABSOLUTE
)
bookmark.latest_snapshot = self.setup_asset(bookmark)
bookmark.save()
html = self.render_template()
formatted_date = formats.date_format(bookmark.date_added, "SHORT_DATE_FORMAT")
snapshot_url = reverse(
"linkding:assets.view", args=[bookmark.latest_snapshot.id]
)
# Check that the snapshot link is rendered with the correct URL and title
self.assertInHTML(
f"""
<a href="{snapshot_url}"
title="View latest snapshot" target="_blank" rel="noopener">
{formatted_date}
</a>
<span>|</span>
""",
html,
)
def test_should_reflect_unread_state_as_css_class(self):
self.setup_bookmark(unread=True)
html = self.render_template()

View File

@@ -235,7 +235,7 @@ def upload_asset(request: HttpRequest, bookmark_id: int | str):
def remove_asset(request: HttpRequest, asset_id: int | str):
asset = access.asset_write(request, asset_id)
asset.delete()
asset_actions.remove_asset(asset)
def update_state(request: HttpRequest, bookmark_id: int | str):

View File

@@ -130,9 +130,18 @@ class BookmarkItem:
self.description = bookmark.resolved_description
self.notes = bookmark.notes
self.tag_names = bookmark.tag_names
self.web_archive_snapshot_url = bookmark.web_archive_snapshot_url
if not self.web_archive_snapshot_url:
self.web_archive_snapshot_url = generate_fallback_webarchive_url(
if bookmark.latest_snapshot_id:
self.snapshot_url = reverse(
"linkding:assets.view", args=[bookmark.latest_snapshot_id]
)
self.snapshot_title = "View latest snapshot"
else:
self.snapshot_url = bookmark.web_archive_snapshot_url
self.snapshot_title = (
"View snapshot on the Internet Archive Wayback Machine"
)
if not self.snapshot_url:
self.snapshot_url = generate_fallback_webarchive_url(
bookmark.url, bookmark.date_added
)
self.favicon_file = bookmark.favicon_file