Add REST endpoint for uploading snapshots from the Singlefile extension (#996)

* Extract asset logic

* Allow disabling HTML snapshot when creating bookmark

* Add endpoint for uploading singlefile snapshots

* Add URL parameter to disable HTML snapshots

* Allow using asset list in base Docker image

* Expose app version through profile
This commit is contained in:
Sascha Ißbrücker
2025-02-23 22:58:14 +01:00
committed by GitHub
parent 2e97b13bad
commit 2d81ea6f6e
18 changed files with 723 additions and 314 deletions

View File

@@ -1,6 +1,5 @@
import functools
import logging
import os
from typing import List
import waybackpy
@@ -8,14 +7,13 @@ from django.conf import settings
from django.contrib.auth import get_user_model
from django.contrib.auth.models import User
from django.db.models import Q
from django.utils import timezone, formats
from huey import crontab
from huey.contrib.djhuey import HUEY as huey
from huey.exceptions import TaskLockedException
from waybackpy.exceptions import WaybackError, TooManyRequestsError
from bookmarks.models import Bookmark, BookmarkAsset, UserProfile
from bookmarks.services import favicon_loader, singlefile, preview_image_loader
from bookmarks.services import assets, favicon_loader, preview_image_loader
from bookmarks.services.website_loader import DEFAULT_USER_AGENT
logger = logging.getLogger(__name__)
@@ -236,7 +234,7 @@ def create_html_snapshot(bookmark: Bookmark):
if not is_html_snapshot_feature_active():
return
asset = _create_snapshot_asset(bookmark)
asset = assets.create_snapshot_asset(bookmark)
asset.save()
@@ -246,47 +244,12 @@ def create_html_snapshots(bookmark_list: List[Bookmark]):
assets_to_create = []
for bookmark in bookmark_list:
asset = _create_snapshot_asset(bookmark)
asset = assets.create_snapshot_asset(bookmark)
assets_to_create.append(asset)
BookmarkAsset.objects.bulk_create(assets_to_create)
MAX_SNAPSHOT_FILENAME_LENGTH = 192
def _create_snapshot_asset(bookmark: Bookmark) -> BookmarkAsset:
timestamp = formats.date_format(timezone.now(), "SHORT_DATE_FORMAT")
asset = BookmarkAsset(
bookmark=bookmark,
asset_type=BookmarkAsset.TYPE_SNAPSHOT,
content_type="text/html",
display_name=f"HTML snapshot from {timestamp}",
status=BookmarkAsset.STATUS_PENDING,
)
return asset
def _generate_snapshot_filename(asset: BookmarkAsset) -> str:
def sanitize_char(char):
if char.isalnum() or char in ("-", "_", "."):
return char
else:
return "_"
formatted_datetime = asset.date_created.strftime("%Y-%m-%d_%H%M%S")
sanitized_url = "".join(sanitize_char(char) for char in asset.bookmark.url)
# Calculate the length of the non-URL parts of the filename
non_url_length = len(f"{asset.asset_type}{formatted_datetime}__.html.gz")
# Calculate the maximum length for the URL part
max_url_length = MAX_SNAPSHOT_FILENAME_LENGTH - non_url_length
# Truncate the URL if necessary
sanitized_url = sanitized_url[:max_url_length]
return f"{asset.asset_type}_{formatted_datetime}_{sanitized_url}.html.gz"
# singe-file does not support running multiple instances in parallel, so we can
# not queue up multiple snapshot tasks at once. Instead, schedule a periodic
# task that grabs a number of pending assets and creates snapshots for them in
@@ -313,13 +276,8 @@ def _create_html_snapshot_task(asset_id: int):
logger.info(f"Create HTML snapshot for bookmark. url={asset.bookmark.url}")
try:
filename = _generate_snapshot_filename(asset)
filepath = os.path.join(settings.LD_ASSET_FOLDER, filename)
singlefile.create_snapshot(asset.bookmark.url, filepath)
asset.status = BookmarkAsset.STATUS_COMPLETE
asset.file = filename
asset.gzip = True
asset.save()
assets.create_snapshot(asset)
logger.info(
f"Successfully created HTML snapshot for bookmark. url={asset.bookmark.url}"
)
@@ -328,8 +286,6 @@ def _create_html_snapshot_task(asset_id: int):
f"Failed to HTML snapshot for bookmark. url={asset.bookmark.url}",
exc_info=error,
)
asset.status = BookmarkAsset.STATUS_FAILURE
asset.save()
def create_missing_html_snapshots(user: User) -> int: