mirror of
https://github.com/sissbruecker/linkding.git
synced 2025-08-13 21:49:26 +02:00
Archive snapshots of websites locally (#672)
* Add basic HTML snapshots * Implement asset list * Add snapshot creation tests * Add deletion tests * Show file size * Remove snapshots * Create new snapshots * Switch to single-file * CSS tweak * Remove auto refresh * Show delete link when there is no file yet * Add current date to display name * Add flag for snapshot support * Add option for disabling automatic snapshots * Make snapshots sharable * Document image variants * Update README.md * Add migrations * Fix tests
This commit is contained in:
@@ -34,6 +34,9 @@ def create_bookmark(bookmark: Bookmark, tag_string: str, current_user: User):
|
||||
tasks.create_web_archive_snapshot(current_user, bookmark, False)
|
||||
# Load favicon
|
||||
tasks.load_favicon(current_user, bookmark)
|
||||
# Create HTML snapshot
|
||||
if current_user.profile.enable_automatic_html_snapshots:
|
||||
tasks.create_html_snapshot(bookmark)
|
||||
|
||||
return bookmark
|
||||
|
||||
|
32
bookmarks/services/monolith.py
Normal file
32
bookmarks/services/monolith.py
Normal file
@@ -0,0 +1,32 @@
|
||||
import gzip
|
||||
import shutil
|
||||
import subprocess
|
||||
import os
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
|
||||
class MonolithError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
# Monolith isn't used at the moment, as the local snapshot implementation
|
||||
# switched to single-file after the prototype. Keeping this around in case
|
||||
# it turns out to be useful in the future.
|
||||
def create_snapshot(url: str, filepath: str):
|
||||
monolith_path = settings.LD_MONOLITH_PATH
|
||||
monolith_options = settings.LD_MONOLITH_OPTIONS
|
||||
temp_filepath = filepath + ".tmp"
|
||||
|
||||
try:
|
||||
command = f"{monolith_path} '{url}' {monolith_options} -o {temp_filepath}"
|
||||
subprocess.run(command, check=True, shell=True)
|
||||
|
||||
with open(temp_filepath, "rb") as raw_file, gzip.open(
|
||||
filepath, "wb"
|
||||
) as gz_file:
|
||||
shutil.copyfileobj(raw_file, gz_file)
|
||||
|
||||
os.remove(temp_filepath)
|
||||
except subprocess.CalledProcessError as error:
|
||||
raise MonolithError(f"Failed to create snapshot: {error.stderr}")
|
33
bookmarks/services/singlefile.py
Normal file
33
bookmarks/services/singlefile.py
Normal file
@@ -0,0 +1,33 @@
|
||||
import gzip
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
|
||||
class SingeFileError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def create_snapshot(url: str, filepath: str):
|
||||
singlefile_path = settings.LD_SINGLEFILE_PATH
|
||||
singlefile_options = settings.LD_SINGLEFILE_OPTIONS
|
||||
temp_filepath = filepath + ".tmp"
|
||||
|
||||
try:
|
||||
command = f"{singlefile_path} '{url}' {singlefile_options} {temp_filepath}"
|
||||
subprocess.run(command, check=True, shell=True)
|
||||
|
||||
# single-file doesn't return exit codes apparently, so check if the file was created
|
||||
if not os.path.exists(temp_filepath):
|
||||
raise SingeFileError("Failed to create snapshot")
|
||||
|
||||
with open(temp_filepath, "rb") as raw_file, gzip.open(
|
||||
filepath, "wb"
|
||||
) as gz_file:
|
||||
shutil.copyfileobj(raw_file, gz_file)
|
||||
|
||||
os.remove(temp_filepath)
|
||||
except subprocess.CalledProcessError as error:
|
||||
raise SingeFileError(f"Failed to create snapshot: {error.stderr}")
|
@@ -1,4 +1,5 @@
|
||||
import logging
|
||||
import os
|
||||
|
||||
import waybackpy
|
||||
from background_task import background
|
||||
@@ -7,10 +8,11 @@ from django.conf import settings
|
||||
from django.contrib.auth import get_user_model
|
||||
from django.contrib.auth.models import User
|
||||
from waybackpy.exceptions import WaybackError, TooManyRequestsError, NoCDXRecordFound
|
||||
from django.utils import timezone, formats
|
||||
|
||||
import bookmarks.services.wayback
|
||||
from bookmarks.models import Bookmark, UserProfile
|
||||
from bookmarks.services import favicon_loader
|
||||
from bookmarks.models import Bookmark, BookmarkAsset, UserProfile
|
||||
from bookmarks.services import favicon_loader, singlefile
|
||||
from bookmarks.services.website_loader import DEFAULT_USER_AGENT
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -193,3 +195,64 @@ def _schedule_refresh_favicons_task(user_id: int):
|
||||
tasks.append(task)
|
||||
|
||||
Task.objects.bulk_create(tasks)
|
||||
|
||||
|
||||
def is_html_snapshot_feature_active() -> bool:
|
||||
return settings.LD_ENABLE_SNAPSHOTS and not settings.LD_DISABLE_BACKGROUND_TASKS
|
||||
|
||||
|
||||
def create_html_snapshot(bookmark: Bookmark):
|
||||
if not is_html_snapshot_feature_active():
|
||||
return
|
||||
|
||||
timestamp = formats.date_format(timezone.now(), "SHORT_DATE_FORMAT")
|
||||
asset = BookmarkAsset(
|
||||
bookmark=bookmark,
|
||||
asset_type=BookmarkAsset.TYPE_SNAPSHOT,
|
||||
content_type="text/html",
|
||||
display_name=f"HTML snapshot from {timestamp}",
|
||||
status=BookmarkAsset.STATUS_PENDING,
|
||||
)
|
||||
asset.save()
|
||||
_create_html_snapshot_task(asset.id)
|
||||
|
||||
|
||||
def _generate_snapshot_filename(asset: BookmarkAsset) -> str:
|
||||
def sanitize_char(char):
|
||||
if char.isalnum() or char in ("-", "_", "."):
|
||||
return char
|
||||
else:
|
||||
return "_"
|
||||
|
||||
formatted_datetime = asset.date_created.strftime("%Y-%m-%d_%H%M%S")
|
||||
sanitized_url = "".join(sanitize_char(char) for char in asset.bookmark.url)
|
||||
|
||||
return f"{asset.asset_type}_{formatted_datetime}_{sanitized_url}.html.gz"
|
||||
|
||||
|
||||
@background()
|
||||
def _create_html_snapshot_task(asset_id: int):
|
||||
try:
|
||||
asset = BookmarkAsset.objects.get(id=asset_id)
|
||||
except BookmarkAsset.DoesNotExist:
|
||||
return
|
||||
|
||||
logger.info(f"Create HTML snapshot for bookmark. url={asset.bookmark.url}")
|
||||
|
||||
try:
|
||||
filename = _generate_snapshot_filename(asset)
|
||||
filepath = os.path.join(settings.LD_ASSET_FOLDER, filename)
|
||||
singlefile.create_snapshot(asset.bookmark.url, filepath)
|
||||
asset.status = BookmarkAsset.STATUS_COMPLETE
|
||||
asset.file = filename
|
||||
asset.gzip = True
|
||||
logger.info(
|
||||
f"Successfully created HTML snapshot for bookmark. url={asset.bookmark.url}"
|
||||
)
|
||||
except singlefile.SingeFileError as error:
|
||||
asset.status = BookmarkAsset.STATUS_FAILURE
|
||||
logger.error(
|
||||
f"Failed to create HTML snapshot for bookmark. url={asset.bookmark.url}",
|
||||
exc_info=error,
|
||||
)
|
||||
asset.save()
|
||||
|
Reference in New Issue
Block a user