Automatically compress uploads with gzip (#1087)

* Gzip .html upload, tests for .html & .gz uploads

* Gzip all file types that aren't already gzips

* Show filename of what user uploaded before compression

* Remove line I thought we need but we don't

* cleanup and fix tests

---------

Co-authored-by: kclark <kclark@autoverify.net>
Co-authored-by: Sascha Ißbrücker <sascha.issbruecker@gmail.com>
This commit is contained in:
hkclark
2025-06-20 00:15:25 -04:00
committed by GitHub
parent e87304501f
commit 7a4236d179
4 changed files with 77 additions and 15 deletions

View File

@@ -94,6 +94,20 @@ def upload_asset(bookmark: Bookmark, upload_file: UploadedFile):
gzip=False, gzip=False,
) )
name, extension = os.path.splitext(upload_file.name) name, extension = os.path.splitext(upload_file.name)
# automatically gzip the file if it is not already gzipped
if upload_file.content_type != "application/gzip":
filename = _generate_asset_filename(
asset, name, extension.lstrip(".") + ".gz"
)
filepath = os.path.join(settings.LD_ASSET_FOLDER, filename)
with gzip.open(filepath, "wb", compresslevel=9) as f:
for chunk in upload_file.chunks():
f.write(chunk)
asset.gzip = True
asset.file = filename
asset.file_size = os.path.getsize(filepath)
else:
filename = _generate_asset_filename(asset, name, extension.lstrip(".")) filename = _generate_asset_filename(asset, name, extension.lstrip("."))
filepath = os.path.join(settings.LD_ASSET_FOLDER, filename) filepath = os.path.join(settings.LD_ASSET_FOLDER, filename)
with open(filepath, "wb") as f: with open(filepath, "wb") as f:
@@ -101,6 +115,7 @@ def upload_asset(bookmark: Bookmark, upload_file: UploadedFile):
f.write(chunk) f.write(chunk)
asset.file = filename asset.file = filename
asset.file_size = upload_file.size asset.file_size = upload_file.size
asset.save() asset.save()
asset.bookmark.date_modified = timezone.now() asset.bookmark.date_modified = timezone.now()

View File

@@ -236,9 +236,18 @@ class BookmarkFactoryMixin:
def read_asset_file(self, asset: BookmarkAsset): def read_asset_file(self, asset: BookmarkAsset):
filepath = os.path.join(settings.LD_ASSET_FOLDER, asset.file) filepath = os.path.join(settings.LD_ASSET_FOLDER, asset.file)
if asset.gzip:
with gzip.open(filepath, "rb") as f:
return f.read()
else:
with open(filepath, "rb") as f: with open(filepath, "rb") as f:
return f.read() return f.read()
def get_asset_filesize(self, asset: BookmarkAsset):
filepath = os.path.join(settings.LD_ASSET_FOLDER, asset.file)
return os.path.getsize(filepath) if os.path.exists(filepath) else 0
def has_asset_file(self, asset: BookmarkAsset): def has_asset_file(self, asset: BookmarkAsset):
filepath = os.path.join(settings.LD_ASSET_FOLDER, asset.file) filepath = os.path.join(settings.LD_ASSET_FOLDER, asset.file)
return os.path.exists(filepath) return os.path.exists(filepath)

View File

@@ -207,11 +207,10 @@ class AssetServiceTestCase(TestCase, BookmarkFactoryMixin):
# verify file name # verify file name
self.assertTrue(saved_file_name.startswith("upload_")) self.assertTrue(saved_file_name.startswith("upload_"))
self.assertTrue(saved_file_name.endswith("_test_file.txt")) self.assertTrue(saved_file_name.endswith("_test_file.txt.gz"))
# file should contain the correct content # file should contain the correct content
with open(os.path.join(self.assets_dir, saved_file_name), "rb") as file: self.assertEqual(self.read_asset_file(asset), file_content)
self.assertEqual(file.read(), file_content)
# should create asset # should create asset
self.assertIsNotNone(asset.id) self.assertIsNotNone(asset.id)
@@ -221,6 +220,45 @@ class AssetServiceTestCase(TestCase, BookmarkFactoryMixin):
self.assertEqual(asset.display_name, upload_file.name) self.assertEqual(asset.display_name, upload_file.name)
self.assertEqual(asset.status, BookmarkAsset.STATUS_COMPLETE) self.assertEqual(asset.status, BookmarkAsset.STATUS_COMPLETE)
self.assertEqual(asset.file, saved_file_name) self.assertEqual(asset.file, saved_file_name)
self.assertEqual(asset.file_size, self.get_asset_filesize(asset))
self.assertTrue(asset.gzip)
# should update bookmark modified date
bookmark.refresh_from_db()
self.assertGreater(bookmark.date_modified, initial_modified)
@disable_logging
def test_upload_gzip_asset(self):
initial_modified = timezone.datetime(
2025, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc
)
bookmark = self.setup_bookmark(modified=initial_modified)
file_content = gzip.compress(b"<html>test content</html>")
upload_file = SimpleUploadedFile(
"test_file.html.gz", file_content, content_type="application/gzip"
)
asset = assets.upload_asset(bookmark, upload_file)
# should create file in asset folder
saved_file_name = self.get_saved_snapshot_file()
self.assertIsNotNone(upload_file)
# verify file name
self.assertTrue(saved_file_name.startswith("upload_"))
self.assertTrue(saved_file_name.endswith("_test_file.html.gz"))
# file should contain the correct content
self.assertEqual(self.read_asset_file(asset), file_content)
# should create asset
self.assertIsNotNone(asset.id)
self.assertEqual(asset.bookmark, bookmark)
self.assertEqual(asset.asset_type, BookmarkAsset.TYPE_UPLOAD)
self.assertEqual(asset.content_type, "application/gzip")
self.assertEqual(asset.display_name, upload_file.name)
self.assertEqual(asset.status, BookmarkAsset.STATUS_COMPLETE)
self.assertEqual(asset.file, saved_file_name)
self.assertEqual(asset.file_size, len(file_content)) self.assertEqual(asset.file_size, len(file_content))
self.assertFalse(asset.gzip) self.assertFalse(asset.gzip)
@@ -245,7 +283,7 @@ class AssetServiceTestCase(TestCase, BookmarkFactoryMixin):
self.assertEqual(192, len(saved_file)) self.assertEqual(192, len(saved_file))
self.assertTrue(saved_file.startswith("upload_")) self.assertTrue(saved_file.startswith("upload_"))
self.assertTrue(saved_file.endswith("aaaa.txt")) self.assertTrue(saved_file.endswith("aaaa.txt.gz"))
@disable_logging @disable_logging
def test_upload_asset_failure(self): def test_upload_asset_failure(self):

View File

@@ -253,8 +253,8 @@ class BookmarkAssetsApiTestCase(LinkdingApiTestCase, BookmarkFactoryMixin):
self.assertEqual(asset.display_name, file_name) self.assertEqual(asset.display_name, file_name)
self.assertEqual(asset.asset_type, BookmarkAsset.TYPE_UPLOAD) self.assertEqual(asset.asset_type, BookmarkAsset.TYPE_UPLOAD)
self.assertEqual(asset.content_type, "text/plain") self.assertEqual(asset.content_type, "text/plain")
self.assertEqual(asset.file_size, len(file_content)) self.assertEqual(asset.file_size, self.get_asset_filesize(asset))
self.assertFalse(asset.gzip) self.assertTrue(asset.gzip)
content = self.read_asset_file(asset) content = self.read_asset_file(asset)
self.assertEqual(content, file_content) self.assertEqual(content, file_content)