Add bulk and single bookmark metadata refresh (#999)

* Add url create/edit query paramter to clear cache * Add refresh bookmark metadata button in create/edit bookmark page * Fix refresh bookmark metadata when editing existing bookmark * Add bulk refresh metadata functionality * Fix test cases for bulk view dropdown selection list * Allow bulk metadata refresh when background tasks are disabled * Move load preview image call on refresh metadata * Update bookmark modified time on metadata refresh * Rename function to align with convention * Add tests for refresh task * Add tests for bookmarks service refresh metadata * Add tests for bookmarks api disable cache on check * Remove bulk refresh metadata when background tasks disabled * Refactor refresh metadata task * Remove unnecessary call * Fix testing mock name * Abstract clearing metadata cache * Add test to check if load page is called twice when cache disabled * Remove refresh button for new bookmarks * Remove strict disable cache is true check * Refactor refresh metadata form logic into its own function * move button and highlight changes * polish and update tests --------- Co-authored-by: Sascha Ißbrücker <sascha.issbruecker@gmail.com>
2025-08-14 14:09:26 +02:00 · 2025-03-22 06:34:10 -04:00
parent f1acb4f7c9
commit a23c357f2f
14 changed files with 300 additions and 10 deletions
--- a/bookmarks/services/bookmarks.py
+++ b/bookmarks/services/bookmarks.py
@@ -197,6 +197,17 @@ def unshare_bookmarks(bookmark_ids: [Union[int, str]], current_user: User):
    )


+def refresh_bookmarks_metadata(bookmark_ids: [Union[int, str]], current_user: User):
+    sanitized_bookmark_ids = _sanitize_id_list(bookmark_ids)
+    owned_bookmarks = Bookmark.objects.filter(
+        owner=current_user, id__in=sanitized_bookmark_ids
+    )
+
+    for bookmark in owned_bookmarks:
+        tasks.refresh_metadata(bookmark)
+        tasks.load_preview_image(current_user, bookmark)
+
+
 def _merge_bookmark_data(from_bookmark: Bookmark, to_bookmark: Bookmark):
    to_bookmark.title = from_bookmark.title
    to_bookmark.description = from_bookmark.description
--- a/bookmarks/services/tasks.py
+++ b/bookmarks/services/tasks.py
@@ -6,6 +6,7 @@ import waybackpy
 from django.conf import settings
 from django.contrib.auth.models import User
 from django.db.models import Q
+from django.utils import timezone
 from huey import crontab
 from huey.contrib.djhuey import HUEY as huey
 from huey.exceptions import TaskLockedException
@@ -13,7 +14,7 @@ from waybackpy.exceptions import WaybackError, TooManyRequestsError

 from bookmarks.models import Bookmark, BookmarkAsset, UserProfile
 from bookmarks.services import assets, favicon_loader, preview_image_loader
-from bookmarks.services.website_loader import DEFAULT_USER_AGENT
+from bookmarks.services.website_loader import DEFAULT_USER_AGENT, load_website_metadata

 logger = logging.getLogger(__name__)

@@ -225,6 +226,31 @@ def _schedule_bookmarks_without_previews_task(user_id: int):
            logging.exception(exc)


+def refresh_metadata(bookmark: Bookmark):
+    if not settings.LD_DISABLE_BACKGROUND_TASKS:
+        _refresh_metadata_task(bookmark.id)
+
+
+@task()
+def _refresh_metadata_task(bookmark_id: int):
+    try:
+        bookmark = Bookmark.objects.get(id=bookmark_id)
+    except Bookmark.DoesNotExist:
+        return
+
+    logger.info(f"Refresh metadata for bookmark. url={bookmark.url}")
+
+    metadata = load_website_metadata(bookmark.url)
+    if metadata.title:
+        bookmark.title = metadata.title
+    if metadata.description:
+        bookmark.description = metadata.description
+    bookmark.date_modified = timezone.now()
+
+    bookmark.save()
+    logger.info(f"Successfully refreshed metadata for bookmark. url={bookmark.url}")
+
+
 def is_html_snapshot_feature_active() -> bool:
    return settings.LD_ENABLE_SNAPSHOTS and not settings.LD_DISABLE_BACKGROUND_TASKS

--- a/bookmarks/services/website_loader.py
+++ b/bookmarks/services/website_loader.py
@@ -27,10 +27,20 @@ class WebsiteMetadata:
        }


+def load_website_metadata(url: str, ignore_cache: bool = False):
+    if ignore_cache:
+        return _load_website_metadata(url)
+    return _load_website_metadata_cached(url)
+
+
 # Caching metadata avoids scraping again when saving bookmarks, in case the
 # metadata was already scraped to show preview values in the bookmark form
@lru_cache(maxsize=10)
-def load_website_metadata(url: str):
+def _load_website_metadata_cached(url: str):
+    return _load_website_metadata(url)
+
+
+def _load_website_metadata(url: str):
    title = None
    description = None
    preview_image = None