Fix website loader content encoding detection (#482)

2025-09-22 00:49:34 +02:00 · 2023-05-30 22:04:54 +02:00
parent 5d48c64b2b
commit 4220ea0b4c
2 changed files with 16 additions and 2 deletions
--- a/bookmarks/services/website_loader.py
+++ b/bookmarks/services/website_loader.py
@@ -71,8 +71,10 @@ def load_page(url: str):
            logger.debug(f'Loaded chunk (iteration={iteration}, total={size / 1024})')

            # Stop reading if we have parsed end of head tag
-            if '</head>'.encode('utf-8') in content:
+            end_of_head = '</head>'.encode('utf-8')
+            if end_of_head in content:
                logger.debug(f'Found closing head tag after {size} bytes')
+                content = content.split(end_of_head)[0] + end_of_head
                break
            # Stop reading if we exceed limit
            if size > MAX_CONTENT_LIMIT: