Add bulk and single bookmark metadata refresh (#999)

* Add url create/edit query paramter to clear cache

* Add refresh bookmark metadata button in create/edit bookmark page

* Fix refresh bookmark metadata when editing existing bookmark

* Add bulk refresh metadata functionality

* Fix test cases for bulk view dropdown selection list

* Allow bulk metadata refresh when background tasks are disabled

* Move load preview image call on refresh metadata

* Update bookmark modified time on metadata refresh

* Rename function to align with convention

* Add tests for refresh task

* Add tests for bookmarks service refresh metadata

* Add tests for bookmarks api disable cache on check

* Remove bulk refresh metadata when background tasks disabled

* Refactor refresh metadata task

* Remove unnecessary call

* Fix testing mock name

* Abstract clearing metadata cache

* Add test to check if load page is called twice when cache disabled

* Remove refresh button for new bookmarks

* Remove strict disable cache is true check

* Refactor refresh metadata form logic into its own function

* move button and highlight changes

* polish and update tests

---------

Co-authored-by: Sascha Ißbrücker <sascha.issbruecker@gmail.com>
This commit is contained in:
Josh S
2025-03-22 06:34:10 -04:00
committed by GitHub
parent f1acb4f7c9
commit a23c357f2f
14 changed files with 300 additions and 10 deletions

View File

@@ -197,6 +197,17 @@ def unshare_bookmarks(bookmark_ids: [Union[int, str]], current_user: User):
)
def refresh_bookmarks_metadata(bookmark_ids: [Union[int, str]], current_user: User):
sanitized_bookmark_ids = _sanitize_id_list(bookmark_ids)
owned_bookmarks = Bookmark.objects.filter(
owner=current_user, id__in=sanitized_bookmark_ids
)
for bookmark in owned_bookmarks:
tasks.refresh_metadata(bookmark)
tasks.load_preview_image(current_user, bookmark)
def _merge_bookmark_data(from_bookmark: Bookmark, to_bookmark: Bookmark):
to_bookmark.title = from_bookmark.title
to_bookmark.description = from_bookmark.description

View File

@@ -6,6 +6,7 @@ import waybackpy
from django.conf import settings
from django.contrib.auth.models import User
from django.db.models import Q
from django.utils import timezone
from huey import crontab
from huey.contrib.djhuey import HUEY as huey
from huey.exceptions import TaskLockedException
@@ -13,7 +14,7 @@ from waybackpy.exceptions import WaybackError, TooManyRequestsError
from bookmarks.models import Bookmark, BookmarkAsset, UserProfile
from bookmarks.services import assets, favicon_loader, preview_image_loader
from bookmarks.services.website_loader import DEFAULT_USER_AGENT
from bookmarks.services.website_loader import DEFAULT_USER_AGENT, load_website_metadata
logger = logging.getLogger(__name__)
@@ -225,6 +226,31 @@ def _schedule_bookmarks_without_previews_task(user_id: int):
logging.exception(exc)
def refresh_metadata(bookmark: Bookmark):
if not settings.LD_DISABLE_BACKGROUND_TASKS:
_refresh_metadata_task(bookmark.id)
@task()
def _refresh_metadata_task(bookmark_id: int):
try:
bookmark = Bookmark.objects.get(id=bookmark_id)
except Bookmark.DoesNotExist:
return
logger.info(f"Refresh metadata for bookmark. url={bookmark.url}")
metadata = load_website_metadata(bookmark.url)
if metadata.title:
bookmark.title = metadata.title
if metadata.description:
bookmark.description = metadata.description
bookmark.date_modified = timezone.now()
bookmark.save()
logger.info(f"Successfully refreshed metadata for bookmark. url={bookmark.url}")
def is_html_snapshot_feature_active() -> bool:
return settings.LD_ENABLE_SNAPSHOTS and not settings.LD_DISABLE_BACKGROUND_TASKS

View File

@@ -27,10 +27,20 @@ class WebsiteMetadata:
}
def load_website_metadata(url: str, ignore_cache: bool = False):
if ignore_cache:
return _load_website_metadata(url)
return _load_website_metadata_cached(url)
# Caching metadata avoids scraping again when saving bookmarks, in case the
# metadata was already scraped to show preview values in the bookmark form
@lru_cache(maxsize=10)
def load_website_metadata(url: str):
def _load_website_metadata_cached(url: str):
return _load_website_metadata(url)
def _load_website_metadata(url: str):
title = None
description = None
preview_image = None