Add support for bookmark thumbnails (#721)

* Preview Image

* fix tests

* add test

* download preview image

* relative path

* gst

* details view

* fix tests

* Improve preview image styles

* Remove preview image URL from model

* Revert form changes

* update tests

* make it work in uwsgi

---------

Co-authored-by: Sascha Ißbrücker <sascha.issbruecker@gmail.com>
This commit is contained in:
Viacheslav Slinko
2024-05-07 19:58:52 +03:00
committed by GitHub
parent e2415f652b
commit 87cd4061cb
26 changed files with 640 additions and 147 deletions

View File

@@ -40,6 +40,8 @@ def create_bookmark(bookmark: Bookmark, tag_string: str, current_user: User):
tasks.create_web_archive_snapshot(current_user, bookmark, False)
# Load favicon
tasks.load_favicon(current_user, bookmark)
# Load preview image
tasks.load_preview_image(current_user, bookmark)
# Create HTML snapshot
if current_user.profile.enable_automatic_html_snapshots:
tasks.create_html_snapshot(bookmark)
@@ -58,6 +60,8 @@ def update_bookmark(bookmark: Bookmark, tag_string, current_user: User):
bookmark.save()
# Update favicon
tasks.load_favicon(current_user, bookmark)
# Update preview image
tasks.load_preview_image(current_user, bookmark)
if has_url_changed:
# Update web archive snapshot, if URL changed

View File

@@ -0,0 +1,46 @@
import logging
import mimetypes
import os.path
import hashlib
from pathlib import Path
import requests
from django.conf import settings
from bookmarks.services import website_loader
logger = logging.getLogger(__name__)
def _ensure_preview_folder():
Path(settings.LD_PREVIEW_FOLDER).mkdir(parents=True, exist_ok=True)
def _url_to_filename(preview_image: str) -> str:
return hashlib.md5(preview_image.encode()).hexdigest()
def _get_image_path(preview_image_file: str) -> Path:
return Path(os.path.join(settings.LD_PREVIEW_FOLDER, preview_image_file))
def load_preview_image(url: str) -> str | None:
_ensure_preview_folder()
metadata = website_loader.load_website_metadata(url)
if not metadata.preview_image:
logger.debug(f"Could not find preview image in metadata: {url}")
return None
logger.debug(f"Loading preview image: {metadata.preview_image}")
with requests.get(metadata.preview_image, stream=True) as response:
content_type = response.headers["Content-Type"]
preview_image_hash = _url_to_filename(url)
file_extension = mimetypes.guess_extension(content_type)
preview_image_file = f"{preview_image_hash}{file_extension}"
preview_image_path = _get_image_path(preview_image_file)
with open(preview_image_path, "wb") as file:
for chunk in response.iter_content(chunk_size=8192):
file.write(chunk)
logger.debug(f"Saved preview image as: {preview_image_path}")
return preview_image_file

View File

@@ -15,7 +15,7 @@ from waybackpy.exceptions import WaybackError, TooManyRequestsError, NoCDXRecord
import bookmarks.services.wayback
from bookmarks.models import Bookmark, BookmarkAsset, UserProfile
from bookmarks.services import favicon_loader, singlefile
from bookmarks.services import favicon_loader, singlefile, preview_image_loader
from bookmarks.services.website_loader import DEFAULT_USER_AGENT
logger = logging.getLogger(__name__)
@@ -221,6 +221,30 @@ def _schedule_refresh_favicons_task(user_id: int):
_load_favicon_task(bookmark.id)
def load_preview_image(user: User, bookmark: Bookmark):
if user.profile.enable_preview_images and not settings.LD_DISABLE_BACKGROUND_TASKS:
_load_preview_image_task(bookmark.id)
@task()
def _load_preview_image_task(bookmark_id: int):
try:
bookmark = Bookmark.objects.get(id=bookmark_id)
except Bookmark.DoesNotExist:
return
logger.info(f"Load preview image for bookmark. url={bookmark.url}")
new_preview_image_file = preview_image_loader.load_preview_image(bookmark.url)
if new_preview_image_file != bookmark.preview_image_file:
bookmark.preview_image_file = new_preview_image_file
bookmark.save(update_fields=["preview_image_file"])
logger.info(
f"Successfully updated preview image for bookmark. url={bookmark.url} preview_image_file={new_preview_image_file}"
)
def is_html_snapshot_feature_active() -> bool:
return settings.LD_ENABLE_SNAPSHOTS and not settings.LD_DISABLE_BACKGROUND_TASKS

View File

@@ -1,6 +1,7 @@
import logging
from dataclasses import dataclass
from functools import lru_cache
from urllib.parse import urljoin
import requests
from bs4 import BeautifulSoup
@@ -15,12 +16,14 @@ class WebsiteMetadata:
url: str
title: str
description: str
preview_image: str | None
def to_dict(self):
return {
"url": self.url,
"title": self.title,
"description": self.description,
"preview_image": self.preview_image,
}
@@ -30,6 +33,7 @@ class WebsiteMetadata:
def load_website_metadata(url: str):
title = None
description = None
preview_image = None
try:
start = timezone.now()
page_text = load_page(url)
@@ -55,10 +59,21 @@ def load_website_metadata(url: str):
else None
)
image_tag = soup.find("meta", attrs={"property": "og:image"})
preview_image = image_tag["content"].strip() if image_tag else None
if (
preview_image
and not preview_image.startswith("http://")
and not preview_image.startswith("https://")
):
preview_image = urljoin(url, preview_image)
end = timezone.now()
logger.debug(f"Parsing duration: {end - start}")
finally:
return WebsiteMetadata(url=url, title=title, description=description)
return WebsiteMetadata(
url=url, title=title, description=description, preview_image=preview_image
)
CHUNK_SIZE = 50 * 1024