Prefer local snapshot over web archive link in bookmark list links (#1021)

* Prefer local snapshot over web archive link

* Update latest snapshot when it is deleted

* fix filter in migration

* improve migration performance
This commit is contained in:
Sascha Ißbrücker
2025-03-22 19:07:05 +01:00
committed by GitHub
parent 6d9d0e19f1
commit 6bba4f35c8
10 changed files with 300 additions and 10 deletions

View File

@@ -4,7 +4,7 @@ serve:
python manage.py runserver python manage.py runserver
tasks: tasks:
python manage.py process_tasks python manage.py run_huey
test: test:
pytest -n auto pytest -n auto

View File

@@ -237,6 +237,9 @@ class BookmarkAssetViewSet(
status=status.HTTP_500_INTERNAL_SERVER_ERROR, status=status.HTTP_500_INTERNAL_SERVER_ERROR,
) )
def perform_destroy(self, instance):
assets.remove_asset(instance)
class TagViewSet( class TagViewSet(
viewsets.GenericViewSet, viewsets.GenericViewSet,

View File

@@ -0,0 +1,46 @@
# Generated by Django 5.1.7 on 2025-03-22 12:28
import django.db.models.deletion
from django.db import migrations, models
from django.db.models import OuterRef, Subquery
def forwards(apps, schema_editor):
# Update the latest snapshot for each bookmark
Bookmark = apps.get_model("bookmarks", "bookmark")
BookmarkAsset = apps.get_model("bookmarks", "bookmarkasset")
latest_snapshots = (
BookmarkAsset.objects.filter(
bookmark=OuterRef("pk"), asset_type="snapshot", status="complete"
)
.order_by("-date_created")
.values("id")[:1]
)
Bookmark.objects.update(latest_snapshot_id=Subquery(latest_snapshots))
def reverse(apps, schema_editor):
pass
class Migration(migrations.Migration):
dependencies = [
("bookmarks", "0043_userprofile_collapse_side_panel"),
]
operations = [
migrations.AddField(
model_name="bookmark",
name="latest_snapshot",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="latest_snapshot",
to="bookmarks.bookmarkasset",
),
),
migrations.RunPython(forwards, reverse),
]

View File

@@ -71,6 +71,13 @@ class Bookmark(models.Model):
date_accessed = models.DateTimeField(blank=True, null=True) date_accessed = models.DateTimeField(blank=True, null=True)
owner = models.ForeignKey(User, on_delete=models.CASCADE) owner = models.ForeignKey(User, on_delete=models.CASCADE)
tags = models.ManyToManyField(Tag) tags = models.ManyToManyField(Tag)
latest_snapshot = models.ForeignKey(
"BookmarkAsset",
on_delete=models.SET_NULL,
null=True,
blank=True,
related_name="latest_snapshot",
)
@property @property
def resolved_title(self): def resolved_title(self):

View File

@@ -51,6 +51,9 @@ def create_snapshot(asset: BookmarkAsset):
asset.file = filename asset.file = filename
asset.gzip = True asset.gzip = True
asset.save() asset.save()
asset.bookmark.latest_snapshot = asset
asset.bookmark.save()
except Exception as error: except Exception as error:
asset.status = BookmarkAsset.STATUS_FAILURE asset.status = BookmarkAsset.STATUS_FAILURE
asset.save() asset.save()
@@ -71,6 +74,9 @@ def upload_snapshot(bookmark: Bookmark, html: bytes):
asset.gzip = True asset.gzip = True
asset.save() asset.save()
asset.bookmark.latest_snapshot = asset
asset.bookmark.save()
return asset return asset
@@ -106,6 +112,27 @@ def upload_asset(bookmark: Bookmark, upload_file: UploadedFile):
raise e raise e
def remove_asset(asset: BookmarkAsset):
# If this asset is the latest_snapshot for a bookmark, try to find the next most recent snapshot
bookmark = asset.bookmark
if bookmark and bookmark.latest_snapshot == asset:
latest = (
BookmarkAsset.objects.filter(
bookmark=bookmark,
asset_type=BookmarkAsset.TYPE_SNAPSHOT,
status=BookmarkAsset.STATUS_COMPLETE,
)
.exclude(pk=asset.pk)
.order_by("-date_created")
.first()
)
bookmark.latest_snapshot = latest
bookmark.save()
asset.delete()
def _generate_asset_filename( def _generate_asset_filename(
asset: BookmarkAsset, filename: str, extension: str asset: BookmarkAsset, filename: str, extension: str
) -> str: ) -> str:

View File

@@ -67,9 +67,9 @@
{% endif %} {% endif %}
<div class="actions"> <div class="actions">
{% if bookmark_item.display_date %} {% if bookmark_item.display_date %}
{% if bookmark_item.web_archive_snapshot_url %} {% if bookmark_item.snapshot_url %}
<a href="{{ bookmark_item.web_archive_snapshot_url }}" <a href="{{ bookmark_item.snapshot_url }}"
title="Show snapshot on the Internet Archive Wayback Machine" title="{{ bookmark_item.snapshot_title }}"
target="{{ bookmark_list.link_target }}" target="{{ bookmark_list.link_target }}"
rel="noopener"> rel="noopener">
{{ bookmark_item.display_date }} {{ bookmark_item.display_date }}

View File

@@ -1,6 +1,7 @@
import datetime import datetime
import gzip import gzip
import os import os
from datetime import timedelta
from unittest import mock from unittest import mock
from django.core.files.uploadedfile import SimpleUploadedFile from django.core.files.uploadedfile import SimpleUploadedFile
@@ -236,3 +237,175 @@ class AssetServiceTestCase(TestCase, BookmarkFactoryMixin):
# asset is not saved to the database # asset is not saved to the database
self.assertIsNone(BookmarkAsset.objects.first()) self.assertIsNone(BookmarkAsset.objects.first())
def test_create_snapshot_updates_bookmark_latest_snapshot(self):
bookmark = self.setup_bookmark(url="https://example.com")
first_asset = assets.create_snapshot_asset(bookmark)
first_asset.save()
assets.create_snapshot(first_asset)
bookmark.refresh_from_db()
self.assertEqual(bookmark.latest_snapshot, first_asset)
second_asset = assets.create_snapshot_asset(bookmark)
second_asset.save()
assets.create_snapshot(second_asset)
bookmark.refresh_from_db()
self.assertEqual(bookmark.latest_snapshot, second_asset)
def test_upload_snapshot_updates_bookmark_latest_snapshot(self):
bookmark = self.setup_bookmark(url="https://example.com")
first_asset = assets.upload_snapshot(bookmark, self.html_content.encode())
bookmark.refresh_from_db()
self.assertEqual(bookmark.latest_snapshot, first_asset)
second_asset = assets.upload_snapshot(bookmark, self.html_content.encode())
bookmark.refresh_from_db()
self.assertEqual(bookmark.latest_snapshot, second_asset)
self.assertNotEqual(bookmark.latest_snapshot, first_asset)
def test_create_snapshot_failure_does_not_update_latest_snapshot(self):
# Create a bookmark with an existing latest_snapshot
bookmark = self.setup_bookmark(url="https://example.com")
initial_snapshot = assets.upload_snapshot(bookmark, self.html_content.encode())
bookmark.refresh_from_db()
self.assertEqual(bookmark.latest_snapshot, initial_snapshot)
# Create a new snapshot asset that will fail
failing_asset = assets.create_snapshot_asset(bookmark)
failing_asset.save()
# Make the snapshot creation fail
self.mock_singlefile_create_snapshot.side_effect = Exception(
"Snapshot creation failed"
)
# Attempt to create a snapshot (which will fail)
with self.assertRaises(Exception):
assets.create_snapshot(failing_asset)
# Verify that the bookmark's latest_snapshot is still the initial snapshot
bookmark.refresh_from_db()
self.assertEqual(bookmark.latest_snapshot, initial_snapshot)
def test_upload_snapshot_failure_does_not_update_latest_snapshot(self):
# Create a bookmark with an existing latest_snapshot
bookmark = self.setup_bookmark(url="https://example.com")
initial_snapshot = assets.upload_snapshot(bookmark, self.html_content.encode())
bookmark.refresh_from_db()
self.assertEqual(bookmark.latest_snapshot, initial_snapshot)
# Make the gzip.open function fail
with mock.patch("gzip.open") as mock_gzip_open:
mock_gzip_open.side_effect = Exception("Upload failed")
# Attempt to upload a snapshot (which will fail)
with self.assertRaises(Exception):
assets.upload_snapshot(bookmark, b"New content")
# Verify that the bookmark's latest_snapshot is still the initial snapshot
bookmark.refresh_from_db()
self.assertEqual(bookmark.latest_snapshot, initial_snapshot)
def test_remove_latest_snapshot_updates_bookmark(self):
# Create a bookmark with multiple snapshots
bookmark = self.setup_bookmark()
# Create base time (1 hour ago)
base_time = timezone.now() - timedelta(hours=1)
# Create three snapshots with explicitly different dates
old_asset = self.setup_asset(
bookmark=bookmark,
asset_type=BookmarkAsset.TYPE_SNAPSHOT,
status=BookmarkAsset.STATUS_COMPLETE,
file="old_snapshot.html.gz",
date_created=base_time,
)
self.setup_asset_file(old_asset)
middle_asset = self.setup_asset(
bookmark=bookmark,
asset_type=BookmarkAsset.TYPE_SNAPSHOT,
status=BookmarkAsset.STATUS_COMPLETE,
file="middle_snapshot.html.gz",
date_created=base_time + timedelta(minutes=30),
)
self.setup_asset_file(middle_asset)
latest_asset = self.setup_asset(
bookmark=bookmark,
asset_type=BookmarkAsset.TYPE_SNAPSHOT,
status=BookmarkAsset.STATUS_COMPLETE,
file="latest_snapshot.html.gz",
date_created=base_time + timedelta(minutes=60),
)
self.setup_asset_file(latest_asset)
# Set the latest asset as the bookmark's latest_snapshot
bookmark.latest_snapshot = latest_asset
bookmark.save()
# Delete the latest snapshot
assets.remove_asset(latest_asset)
bookmark.refresh_from_db()
# Verify that middle_asset is now the latest_snapshot
self.assertEqual(bookmark.latest_snapshot, middle_asset)
# Delete the middle snapshot
assets.remove_asset(middle_asset)
bookmark.refresh_from_db()
# Verify that old_asset is now the latest_snapshot
self.assertEqual(bookmark.latest_snapshot, old_asset)
# Delete the last snapshot
assets.remove_asset(old_asset)
bookmark.refresh_from_db()
# Verify that latest_snapshot is now None
self.assertIsNone(bookmark.latest_snapshot)
def test_remove_non_latest_snapshot_does_not_affect_bookmark(self):
# Create a bookmark with multiple snapshots
bookmark = self.setup_bookmark()
# Create base time (1 hour ago)
base_time = timezone.now() - timedelta(hours=1)
# Create two snapshots with explicitly different dates
old_asset = self.setup_asset(
bookmark=bookmark,
asset_type=BookmarkAsset.TYPE_SNAPSHOT,
status=BookmarkAsset.STATUS_COMPLETE,
file="old_snapshot.html.gz",
date_created=base_time,
)
self.setup_asset_file(old_asset)
latest_asset = self.setup_asset(
bookmark=bookmark,
asset_type=BookmarkAsset.TYPE_SNAPSHOT,
status=BookmarkAsset.STATUS_COMPLETE,
file="latest_snapshot.html.gz",
date_created=base_time + timedelta(minutes=30),
)
self.setup_asset_file(latest_asset)
# Set the latest asset as the bookmark's latest_snapshot
bookmark.latest_snapshot = latest_asset
bookmark.save()
# Delete the old snapshot (not the latest)
assets.remove_asset(old_asset)
bookmark.refresh_from_db()
# Verify that latest_snapshot hasn't changed
self.assertEqual(bookmark.latest_snapshot, latest_asset)

View File

@@ -43,7 +43,7 @@ class BookmarkListTemplateTest(TestCase, BookmarkFactoryMixin, HtmlTestMixin):
self.assertInHTML( self.assertInHTML(
f""" f"""
<a href="{url}" <a href="{url}"
title="Show snapshot on the Internet Archive Wayback Machine" target="{link_target}" rel="noopener"> title="View snapshot on the Internet Archive Wayback Machine" target="{link_target}" rel="noopener">
{label_content} {label_content}
</a> </a>
<span>|</span> <span>|</span>
@@ -559,6 +559,31 @@ class BookmarkListTemplateTest(TestCase, BookmarkFactoryMixin, HtmlTestMixin):
html, "1 week ago", bookmark.web_archive_snapshot_url, link_target="_self" html, "1 week ago", bookmark.web_archive_snapshot_url, link_target="_self"
) )
def test_should_render_latest_snapshot_link_if_one_exists(self):
bookmark = self.setup_date_format_test(
UserProfile.BOOKMARK_DATE_DISPLAY_ABSOLUTE
)
bookmark.latest_snapshot = self.setup_asset(bookmark)
bookmark.save()
html = self.render_template()
formatted_date = formats.date_format(bookmark.date_added, "SHORT_DATE_FORMAT")
snapshot_url = reverse(
"linkding:assets.view", args=[bookmark.latest_snapshot.id]
)
# Check that the snapshot link is rendered with the correct URL and title
self.assertInHTML(
f"""
<a href="{snapshot_url}"
title="View latest snapshot" target="_blank" rel="noopener">
{formatted_date}
</a>
<span>|</span>
""",
html,
)
def test_should_reflect_unread_state_as_css_class(self): def test_should_reflect_unread_state_as_css_class(self):
self.setup_bookmark(unread=True) self.setup_bookmark(unread=True)
html = self.render_template() html = self.render_template()

View File

@@ -235,7 +235,7 @@ def upload_asset(request: HttpRequest, bookmark_id: int | str):
def remove_asset(request: HttpRequest, asset_id: int | str): def remove_asset(request: HttpRequest, asset_id: int | str):
asset = access.asset_write(request, asset_id) asset = access.asset_write(request, asset_id)
asset.delete() asset_actions.remove_asset(asset)
def update_state(request: HttpRequest, bookmark_id: int | str): def update_state(request: HttpRequest, bookmark_id: int | str):

View File

@@ -130,11 +130,20 @@ class BookmarkItem:
self.description = bookmark.resolved_description self.description = bookmark.resolved_description
self.notes = bookmark.notes self.notes = bookmark.notes
self.tag_names = bookmark.tag_names self.tag_names = bookmark.tag_names
self.web_archive_snapshot_url = bookmark.web_archive_snapshot_url if bookmark.latest_snapshot_id:
if not self.web_archive_snapshot_url: self.snapshot_url = reverse(
self.web_archive_snapshot_url = generate_fallback_webarchive_url( "linkding:assets.view", args=[bookmark.latest_snapshot_id]
bookmark.url, bookmark.date_added
) )
self.snapshot_title = "View latest snapshot"
else:
self.snapshot_url = bookmark.web_archive_snapshot_url
self.snapshot_title = (
"View snapshot on the Internet Archive Wayback Machine"
)
if not self.snapshot_url:
self.snapshot_url = generate_fallback_webarchive_url(
bookmark.url, bookmark.date_added
)
self.favicon_file = bookmark.favicon_file self.favicon_file = bookmark.favicon_file
self.preview_image_file = bookmark.preview_image_file self.preview_image_file = bookmark.preview_image_file
self.is_archived = bookmark.is_archived self.is_archived = bookmark.is_archived