Create snapshots on web.archive.org for bookmarks (#150)

* Implement initial background tasks concept

* fix property reference

* update requirements.txt

* simplify bookmark null check

* improve web archive url display

* add background tasks test

* add basic supervisor setup

* schedule missing snapshot creation on login

* remove task locks and clear task history before starting background task processor

* batch create snapshots after import

* fix script reference in supervisord.conf

* add option to disable background tasks

* restructure feature overview
This commit is contained in:
Sascha Ißbrücker
2021-09-04 22:31:04 +02:00
committed by GitHub
parent 8d214649b7
commit d87dde6bae
27 changed files with 470 additions and 19 deletions

View File

@@ -6,6 +6,7 @@ from django.utils import timezone
from bookmarks.models import Bookmark, parse_tag_string
from bookmarks.services.tags import get_or_create_tags
from bookmarks.services.website_loader import load_website_metadata
from bookmarks.services import tasks
def create_bookmark(bookmark: Bookmark, tag_string: str, current_user: User):
@@ -27,10 +28,16 @@ def create_bookmark(bookmark: Bookmark, tag_string: str, current_user: User):
# Update tag list
_update_bookmark_tags(bookmark, tag_string, current_user)
bookmark.save()
# Create snapshot on web archive
tasks.create_web_archive_snapshot(bookmark.id, False)
return bookmark
def update_bookmark(bookmark: Bookmark, tag_string, current_user: User):
# Detect URL change
original_bookmark = Bookmark.objects.get(id=bookmark.id)
has_url_changed = original_bookmark.url != bookmark.url
# Update website info
_update_website_metadata(bookmark)
# Update tag list
@@ -38,6 +45,10 @@ def update_bookmark(bookmark: Bookmark, tag_string, current_user: User):
# Update dates
bookmark.date_modified = timezone.now()
bookmark.save()
# Update web archive snapshot, if URL changed
if has_url_changed:
tasks.create_web_archive_snapshot(bookmark.id, True)
return bookmark

View File

@@ -5,6 +5,7 @@ from django.contrib.auth.models import User
from django.utils import timezone
from bookmarks.models import Bookmark, parse_tag_string
from bookmarks.services import tasks
from bookmarks.services.parser import parse, NetscapeBookmark
from bookmarks.services.tags import get_or_create_tags
from bookmarks.utils import parse_timestamp
@@ -38,6 +39,9 @@ def import_netscape_html(html: str, user: User):
logging.exception('Error importing bookmark: ' + shortened_bookmark_tag_str)
result.failed = result.failed + 1
# Create snapshots for newly imported bookmarks
tasks.schedule_bookmarks_without_snapshots(user.id)
return result

View File

@@ -0,0 +1,62 @@
import logging
import waybackpy
from background_task import background
from django.conf import settings
from django.contrib.auth import get_user_model
from waybackpy.exceptions import WaybackError
from bookmarks.models import Bookmark
logger = logging.getLogger(__name__)
def when_background_tasks_enabled(fn):
def wrapper(*args, **kwargs):
if settings.LD_DISABLE_BACKGROUND_TASKS:
return
return fn(*args, **kwargs)
# Expose attributes from wrapped TaskProxy function
attrs = vars(fn)
for key, value in attrs.items():
setattr(wrapper, key, value)
return wrapper
@when_background_tasks_enabled
@background()
def create_web_archive_snapshot(bookmark_id: int, force_update: bool):
try:
bookmark = Bookmark.objects.get(id=bookmark_id)
except Bookmark.DoesNotExist:
return
# Skip if snapshot exists and update is not explicitly requested
if bookmark.web_archive_snapshot_url and not force_update:
return
logger.debug(f'Create web archive link for bookmark: {bookmark}...')
wayback = waybackpy.Url(bookmark.url)
try:
archive = wayback.save()
except WaybackError as error:
logger.exception(f'Error creating web archive link for bookmark: {bookmark}...', exc_info=error)
raise
bookmark.web_archive_snapshot_url = archive.archive_url
bookmark.save()
logger.debug(f'Successfully created web archive link for bookmark: {bookmark}...')
@when_background_tasks_enabled
@background()
def schedule_bookmarks_without_snapshots(user_id: int):
user = get_user_model().objects.get(id=user_id)
bookmarks_without_snapshots = Bookmark.objects.filter(web_archive_snapshot_url__exact='', owner=user)
for bookmark in bookmarks_without_snapshots:
create_web_archive_snapshot(bookmark.id, False)