diff --git a/bookmarks/migrations/0034_bookmark_preview_image_file_and_more.py b/bookmarks/migrations/0034_bookmark_preview_image_file_and_more.py index 1e3aca7..454d0d1 100644 --- a/bookmarks/migrations/0034_bookmark_preview_image_file_and_more.py +++ b/bookmarks/migrations/0034_bookmark_preview_image_file_and_more.py @@ -1,4 +1,4 @@ -# Generated by Django 5.0.3 on 2024-05-07 07:27 +# Generated by Django 5.0.3 on 2024-05-10 07:01 from django.db import migrations, models @@ -13,7 +13,7 @@ class Migration(migrations.Migration): migrations.AddField( model_name="bookmark", name="preview_image_file", - field=models.CharField(blank=True, max_length=512, null=True), + field=models.CharField(blank=True, max_length=512), ), migrations.AddField( model_name="userprofile", diff --git a/bookmarks/models.py b/bookmarks/models.py index b15963c..199142e 100644 --- a/bookmarks/models.py +++ b/bookmarks/models.py @@ -59,7 +59,7 @@ class Bookmark(models.Model): website_description = models.TextField(blank=True, null=True) web_archive_snapshot_url = models.CharField(max_length=2048, blank=True) favicon_file = models.CharField(max_length=512, blank=True) - preview_image_file = models.CharField(max_length=512, blank=True, null=True) + preview_image_file = models.CharField(max_length=512, blank=True) unread = models.BooleanField(default=False) is_archived = models.BooleanField(default=False) shared = models.BooleanField(default=False) diff --git a/bookmarks/services/importer.py b/bookmarks/services/importer.py index cdef8f5..5a35427 100644 --- a/bookmarks/services/importer.py +++ b/bookmarks/services/importer.py @@ -83,6 +83,8 @@ def import_netscape_html( tasks.schedule_bookmarks_without_snapshots(user) # Load favicons for newly imported bookmarks tasks.schedule_bookmarks_without_favicons(user) + # Load previews for newly imported bookmarks + tasks.schedule_bookmarks_without_previews(user) end = timezone.now() logger.debug(f"Import duration: {end - import_start}") diff --git a/bookmarks/services/preview_image_loader.py b/bookmarks/services/preview_image_loader.py index 2311f65..895f6ba 100644 --- a/bookmarks/services/preview_image_loader.py +++ b/bookmarks/services/preview_image_loader.py @@ -31,16 +31,58 @@ def load_preview_image(url: str) -> str | None: logger.debug(f"Could not find preview image in metadata: {url}") return None - logger.debug(f"Loading preview image: {metadata.preview_image}") - with requests.get(metadata.preview_image, stream=True) as response: - content_type = response.headers["Content-Type"] - preview_image_hash = _url_to_filename(url) + image_url = metadata.preview_image + + logger.debug(f"Loading preview image: {image_url}") + with requests.get(image_url, stream=True) as response: + if response.status_code < 200 or response.status_code >= 300: + logger.debug( + f"Bad response status code for preview image: {image_url} status_code={response.status_code}" + ) + return None + + if "Content-Length" not in response.headers: + logger.debug(f"Empty Content-Length for preview image: {image_url}") + return None + + content_length = int(response.headers["Content-Length"]) + if content_length > settings.LD_PREVIEW_MAX_SIZE: + logger.debug( + f"Content-Length exceeds LD_PREVIEW_MAX_SIZE: {image_url} length={content_length}" + ) + return None + + if "Content-Type" not in response.headers: + logger.debug(f"Empty Content-Type for preview image: {image_url}") + return None + + content_type = response.headers["Content-Type"].split(";", 1)[0] file_extension = mimetypes.guess_extension(content_type) + + if file_extension not in settings.LD_PREVIEW_ALLOWED_EXTENSIONS: + logger.debug( + f"Unsupported Content-Type for preview image: {image_url} content_type={content_type}" + ) + return None + + preview_image_hash = _url_to_filename(url) preview_image_file = f"{preview_image_hash}{file_extension}" preview_image_path = _get_image_path(preview_image_file) + with open(preview_image_path, "wb") as file: + downloaded = 0 for chunk in response.iter_content(chunk_size=8192): + downloaded += len(chunk) + if downloaded > content_length: + logger.debug( + f"Content-Length mismatch for preview image: {image_url} length={content_length} downloaded={downloaded}" + ) + file.close() + preview_image_path.unlink() + return None + file.write(chunk) + logger.debug(f"Saved preview image as: {preview_image_path}") return preview_image_file diff --git a/bookmarks/services/tasks.py b/bookmarks/services/tasks.py index e42c942..ab164f4 100644 --- a/bookmarks/services/tasks.py +++ b/bookmarks/services/tasks.py @@ -7,6 +7,7 @@ import waybackpy from django.conf import settings from django.contrib.auth import get_user_model from django.contrib.auth.models import User +from django.db.models import Q from django.utils import timezone, formats from huey import crontab from huey.contrib.djhuey import HUEY as huey @@ -166,6 +167,12 @@ def is_favicon_feature_active(user: User) -> bool: return background_tasks_enabled and user.profile.enable_favicons +def is_preview_feature_active(user: User) -> bool: + return ( + user.profile.enable_preview_images and not settings.LD_DISABLE_BACKGROUND_TASKS + ) + + def load_favicon(user: User, bookmark: Bookmark): if is_favicon_feature_active(user): _load_favicon_task(bookmark.id) @@ -222,7 +229,7 @@ def _schedule_refresh_favicons_task(user_id: int): def load_preview_image(user: User, bookmark: Bookmark): - if user.profile.enable_preview_images and not settings.LD_DISABLE_BACKGROUND_TASKS: + if is_preview_feature_active(user): _load_preview_image_task(bookmark.id) @@ -245,6 +252,27 @@ def _load_preview_image_task(bookmark_id: int): ) +def schedule_bookmarks_without_previews(user: User): + if is_preview_feature_active(user): + _schedule_bookmarks_without_previews_task(user.id) + + +@task() +def _schedule_bookmarks_without_previews_task(user_id: int): + user = get_user_model().objects.get(id=user_id) + bookmarks = Bookmark.objects.filter( + Q(preview_image_file__exact=""), + owner=user, + ) + + # TODO: Implement bulk task creation + for bookmark in bookmarks: + try: + _load_preview_image_task(bookmark.id) + except Exception as exc: + logging.exception(exc) + + def is_html_snapshot_feature_active() -> bool: return settings.LD_ENABLE_SNAPSHOTS and not settings.LD_DISABLE_BACKGROUND_TASKS diff --git a/bookmarks/tests/test_bookmarks_tasks.py b/bookmarks/tests/test_bookmarks_tasks.py index 1376285..0ac497d 100644 --- a/bookmarks/tests/test_bookmarks_tasks.py +++ b/bookmarks/tests/test_bookmarks_tasks.py @@ -578,6 +578,60 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin): self.assertEqual(self.executed_count(), 0) + def test_schedule_bookmarks_without_previews_should_load_preview_for_all_bookmarks_without_preview( + self, + ): + user = self.get_or_create_test_user() + self.setup_bookmark() + self.setup_bookmark() + self.setup_bookmark() + self.setup_bookmark(preview_image_file="test.png") + self.setup_bookmark(preview_image_file="test.png") + self.setup_bookmark(preview_image_file="test.png") + + tasks.schedule_bookmarks_without_previews(user) + + self.assertEqual(self.executed_count(), 4) + self.assertEqual(self.mock_load_preview_image.call_count, 3) + + def test_schedule_bookmarks_without_previews_should_only_update_user_owned_bookmarks( + self, + ): + user = self.get_or_create_test_user() + other_user = User.objects.create_user( + "otheruser", "otheruser@example.com", "password123" + ) + self.setup_bookmark() + self.setup_bookmark() + self.setup_bookmark() + self.setup_bookmark(user=other_user) + self.setup_bookmark(user=other_user) + self.setup_bookmark(user=other_user) + + tasks.schedule_bookmarks_without_previews(user) + + self.assertEqual(self.mock_load_preview_image.call_count, 3) + + @override_settings(LD_DISABLE_BACKGROUND_TASKS=True) + def test_schedule_bookmarks_without_previews_should_not_run_when_background_tasks_are_disabled( + self, + ): + self.setup_bookmark() + tasks.schedule_bookmarks_without_previews(self.get_or_create_test_user()) + + self.assertEqual(self.executed_count(), 0) + + def test_schedule_bookmarks_without_previews_should_not_run_when_preview_feature_is_disabled( + self, + ): + self.user.profile.enable_preview_images = False + self.user.profile.save() + + self.setup_bookmark() + tasks.schedule_bookmarks_without_previews(self.get_or_create_test_user()) + + self.assertEqual(self.executed_count(), 0) + @override_settings(LD_ENABLE_SNAPSHOTS=True) def test_create_html_snapshot_should_create_pending_asset(self): bookmark = self.setup_bookmark() diff --git a/bookmarks/tests/test_importer.py b/bookmarks/tests/test_importer.py index d8e59d6..163b9ad 100644 --- a/bookmarks/tests/test_importer.py +++ b/bookmarks/tests/test_importer.py @@ -465,3 +465,14 @@ class ImporterTestCase(TestCase, BookmarkFactoryMixin, ImportTestMixin): import_netscape_html(test_html, user) mock_schedule_bookmarks_without_favicons.assert_called_once_with(user) + + def test_schedule_preview_loading(self): + user = self.get_or_create_test_user() + test_html = self.render_html(tags_html="") + + with patch.object( + tasks, "schedule_bookmarks_without_previews" + ) as mock_schedule_bookmarks_without_previews: + import_netscape_html(test_html, user) + + mock_schedule_bookmarks_without_previews.assert_called_once_with(user) diff --git a/bookmarks/tests/test_preview_image_loader.py b/bookmarks/tests/test_preview_image_loader.py index 66dcd6c..df61ce5 100644 --- a/bookmarks/tests/test_preview_image_loader.py +++ b/bookmarks/tests/test_preview_image_loader.py @@ -13,9 +13,20 @@ mock_image_data = b"mock_image" class MockStreamingResponse: - def __init__(self, data=mock_image_data, content_type="image/png"): + def __init__( + self, + url, + data=mock_image_data, + content_type="image/png", + content_length=None, + status_code=200, + ): + self.url = url self.chunks = [data] - self.headers = {"Content-Type": content_type} + self.status_code = status_code + if not content_length: + content_length = len(data) + self.headers = {"Content-Type": content_type, "Content-Length": content_length} def iter_content(self, **kwargs): return self.chunks @@ -47,19 +58,29 @@ class PreviewImageLoaderTestCase(TestCase): self.settings_override.disable() self.mock_load_website_metadata_patcher.stop() - def create_mock_response(self, icon_data=mock_image_data, content_type="image/png"): + def create_mock_response( + self, + url="https://example.com/image.png", + icon_data=mock_image_data, + content_type="image/png", + content_length=len(mock_image_data), + status_code=200, + ): mock_response = mock.Mock() mock_response.raw = io.BytesIO(icon_data) - return MockStreamingResponse(icon_data, content_type) + return MockStreamingResponse( + url, icon_data, content_type, content_length, status_code + ) def get_image_path(self, filename): return Path(os.path.join(settings.LD_PREVIEW_FOLDER, filename)) - def image_exists(self, filename): - return self.get_image_path(filename).exists() + def assertImageExists(self, filename, data): + self.assertTrue(self.get_image_path(filename).exists()) + self.assertEqual(self.get_image_path(filename).read_bytes(), data) - def get_image_data(self, filename): - return self.get_image_path(filename).read_bytes() + def assertNoImageExists(self): + self.assertFalse(os.listdir(settings.LD_PREVIEW_FOLDER)) def test_load_preview_image(self): with mock.patch("requests.get") as mock_get: @@ -67,8 +88,8 @@ class PreviewImageLoaderTestCase(TestCase): file = preview_image_loader.load_preview_image("https://example.com") - self.assertTrue(self.image_exists(file)) - self.assertEqual(mock_image_data, self.get_image_data(file)) + self.assertIsNotNone(file) + self.assertImageExists(file, mock_image_data) def test_load_preview_image_returns_none_if_no_preview_image_detected(self): with mock.patch("requests.get") as mock_get: @@ -78,6 +99,80 @@ class PreviewImageLoaderTestCase(TestCase): file = preview_image_loader.load_preview_image("https://example.com") self.assertIsNone(file) + self.assertNoImageExists() + + def test_load_preview_image_returns_none_for_invalid_status_code(self): + invalid_status_codes = [199, 300, 400, 500] + + for status_code in invalid_status_codes: + with mock.patch("requests.get") as mock_get: + mock_get.return_value = self.create_mock_response( + status_code=status_code + ) + + file = preview_image_loader.load_preview_image("https://example.com") + + self.assertIsNone(file) + self.assertNoImageExists() + + def test_load_preview_image_returns_none_if_content_length_exceeds_limit(self): + # exceeds max size + with mock.patch("requests.get") as mock_get: + mock_get.return_value = self.create_mock_response( + content_length=settings.LD_PREVIEW_MAX_SIZE + 1 + ) + + file = preview_image_loader.load_preview_image("https://example.com") + + self.assertIsNone(file) + self.assertNoImageExists() + + # equals max size + with mock.patch("requests.get") as mock_get: + mock_get.return_value = self.create_mock_response( + content_length=settings.LD_PREVIEW_MAX_SIZE + ) + + file = preview_image_loader.load_preview_image("https://example.com") + + self.assertIsNotNone(file) + self.assertImageExists(file, mock_image_data) + + def test_load_preview_image_returns_none_for_invalid_content_type(self): + invalid_content_types = ["text/html", "application/json"] + + for content_type in invalid_content_types: + with mock.patch("requests.get") as mock_get: + mock_get.return_value = self.create_mock_response( + content_type=content_type + ) + + file = preview_image_loader.load_preview_image("https://example.com") + + self.assertIsNone(file) + self.assertNoImageExists() + + valid_content_types = ["image/png", "image/jpeg", "image/gif"] + + for content_type in valid_content_types: + with mock.patch("requests.get") as mock_get: + mock_get.return_value = self.create_mock_response( + content_type=content_type + ) + + file = preview_image_loader.load_preview_image("https://example.com") + + self.assertIsNotNone(file) + self.assertImageExists(file, mock_image_data) + + def test_load_preview_image_returns_none_if_download_exceeds_content_length(self): + with mock.patch("requests.get") as mock_get: + mock_get.return_value = self.create_mock_response(content_length=1) + + file = preview_image_loader.load_preview_image("https://example.com") + + self.assertIsNone(file) + self.assertNoImageExists() def test_load_preview_image_creates_folder_if_not_exists(self): with mock.patch("requests.get") as mock_get: @@ -95,14 +190,16 @@ class PreviewImageLoaderTestCase(TestCase): def test_guess_file_extension(self): with mock.patch("requests.get") as mock_get: mock_get.return_value = self.create_mock_response(content_type="image/png") + file = preview_image_loader.load_preview_image("https://example.com") - self.assertTrue(self.image_exists(file)) + self.assertImageExists(file, mock_image_data) self.assertEqual("png", file.split(".")[-1]) with mock.patch("requests.get") as mock_get: mock_get.return_value = self.create_mock_response(content_type="image/jpeg") + file = preview_image_loader.load_preview_image("https://example.com") - self.assertTrue(self.image_exists(file)) + self.assertImageExists(file, mock_image_data) self.assertEqual("jpg", file.split(".")[-1]) diff --git a/siteroot/settings/base.py b/siteroot/settings/base.py index 3f3ad59..641471f 100644 --- a/siteroot/settings/base.py +++ b/siteroot/settings/base.py @@ -289,6 +289,15 @@ LD_ENABLE_REFRESH_FAVICONS = os.getenv("LD_ENABLE_REFRESH_FAVICONS", True) in ( # Previews settings LD_PREVIEW_FOLDER = os.path.join(BASE_DIR, "data", "previews") +LD_PREVIEW_MAX_SIZE = int(os.getenv("LD_PREVIEW_MAX_SIZE", 5242880)) +LD_PREVIEW_ALLOWED_EXTENSIONS = [ + ".jpg", + ".jpeg", + ".png", + ".gif", + ".svg", + ".webp", +] # Asset / snapshot settings LD_ASSET_FOLDER = os.path.join(BASE_DIR, "data", "assets")