Improve import performance (#261)

* Run import in batches, cache tags

* Use bulk operations for bookmarks and assigning tags

* Improve naming

* Restore bookmark validation

* Add logging

* Bulk create tags

* Use HTMLParser for parsing bookmarks

* add parser tests

* Add more importer tests

* Add more importer tests

* Remove pyparsing dependency

Co-authored-by: Sascha Ißbrücker <sascha.issbruecker@gmail.com>
This commit is contained in:
Sascha Ißbrücker
2022-05-21 09:27:30 +02:00
committed by GitHub
parent 117160ea87
commit f4e3d724f0
8 changed files with 571 additions and 103 deletions

View File

@@ -1,13 +1,13 @@
import logging import logging
from dataclasses import dataclass from dataclasses import dataclass
from typing import List
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.utils import timezone from django.utils import timezone
from bookmarks.models import Bookmark, parse_tag_string from bookmarks.models import Bookmark, Tag, parse_tag_string
from bookmarks.services import tasks from bookmarks.services import tasks
from bookmarks.services.parser import parse, NetscapeBookmark from bookmarks.services.parser import parse, NetscapeBookmark
from bookmarks.services.tags import get_or_create_tags
from bookmarks.utils import parse_timestamp from bookmarks.utils import parse_timestamp
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -20,8 +20,39 @@ class ImportResult:
failed: int = 0 failed: int = 0
class TagCache:
def __init__(self, user: User):
self.user = user
self.cache = dict()
# Init cache with all existing tags for that user
tags = Tag.objects.filter(owner=user)
for tag in tags:
self.put(tag)
def get(self, tag_name: str):
tag_name_lowercase = tag_name.lower()
if tag_name_lowercase in self.cache:
return self.cache[tag_name_lowercase]
else:
return None
def get_all(self, tag_names: List[str]):
result = []
for tag_name in tag_names:
tag = self.get(tag_name)
# Prevent returning duplicates
if not (tag in result):
result.append(tag)
return result
def put(self, tag: Tag):
self.cache[tag.name.lower()] = tag
def import_netscape_html(html: str, user: User): def import_netscape_html(html: str, user: User):
result = ImportResult() result = ImportResult()
import_start = timezone.now()
try: try:
netscape_bookmarks = parse(html) netscape_bookmarks = parse(html)
@@ -29,26 +60,130 @@ def import_netscape_html(html: str, user: User):
logging.exception('Could not read bookmarks file.') logging.exception('Could not read bookmarks file.')
raise raise
parse_end = timezone.now()
logger.debug(f'Parse duration: {parse_end - import_start}')
# Create and cache all tags beforehand
_create_missing_tags(netscape_bookmarks, user)
tag_cache = TagCache(user)
# Split bookmarks to import into batches, to keep memory usage for bulk operations manageable
batches = _get_batches(netscape_bookmarks, 200)
for batch in batches:
_import_batch(batch, user, tag_cache, result)
# Create snapshots for newly imported bookmarks
tasks.schedule_bookmarks_without_snapshots(user)
end = timezone.now()
logger.debug(f'Import duration: {end - import_start}')
return result
def _create_missing_tags(netscape_bookmarks: List[NetscapeBookmark], user: User):
tag_cache = TagCache(user)
tags_to_create = []
for netscape_bookmark in netscape_bookmarks:
tag_names = parse_tag_string(netscape_bookmark.tag_string)
for tag_name in tag_names:
tag = tag_cache.get(tag_name)
if not tag:
tag = Tag(name=tag_name, owner=user)
tag.date_added = timezone.now()
tags_to_create.append(tag)
Tag.objects.bulk_create(tags_to_create)
def _get_batches(items: List, batch_size: int):
batches = []
offset = 0
num_items = len(items)
while offset < num_items:
batch = items[offset:min(offset + batch_size, num_items)]
if len(batch) > 0:
batches.append(batch)
offset = offset + batch_size
return batches
def _import_batch(netscape_bookmarks: List[NetscapeBookmark], user: User, tag_cache: TagCache, result: ImportResult):
# Query existing bookmarks
batch_urls = [bookmark.href for bookmark in netscape_bookmarks]
existing_bookmarks = Bookmark.objects.filter(owner=user, url__in=batch_urls)
# Create or update bookmarks from parsed Netscape bookmarks
bookmarks_to_create = []
bookmarks_to_update = []
for netscape_bookmark in netscape_bookmarks: for netscape_bookmark in netscape_bookmarks:
result.total = result.total + 1 result.total = result.total + 1
try: try:
_import_bookmark_tag(netscape_bookmark, user) # Lookup existing bookmark by URL, or create new bookmark if there is no bookmark for that URL yet
bookmark = next(
(bookmark for bookmark in existing_bookmarks if bookmark.url == netscape_bookmark.href), None)
if not bookmark:
bookmark = Bookmark(owner=user)
is_update = False
else:
is_update = True
# Copy data from parsed bookmark
_copy_bookmark_data(netscape_bookmark, bookmark)
# Validate bookmark fields, exclude owner to prevent n+1 database query,
# also there is no specific validation on owner
bookmark.clean_fields(exclude=['owner'])
# Schedule for update or insert
if is_update:
bookmarks_to_update.append(bookmark)
else:
bookmarks_to_create.append(bookmark)
result.success = result.success + 1 result.success = result.success + 1
except: except:
shortened_bookmark_tag_str = str(netscape_bookmark)[:100] + '...' shortened_bookmark_tag_str = str(netscape_bookmark)[:100] + '...'
logging.exception('Error importing bookmark: ' + shortened_bookmark_tag_str) logging.exception('Error importing bookmark: ' + shortened_bookmark_tag_str)
result.failed = result.failed + 1 result.failed = result.failed + 1
# Create snapshots for newly imported bookmarks # Bulk update bookmarks in DB
tasks.schedule_bookmarks_without_snapshots(user) Bookmark.objects.bulk_update(bookmarks_to_update,
['url', 'date_added', 'date_modified', 'unread', 'title', 'description', 'owner'])
# Bulk insert new bookmarks into DB
Bookmark.objects.bulk_create(bookmarks_to_create)
return result # Bulk assign tags
# In Django 3, bulk_create does not return the auto-generated IDs when bulk inserting,
# so we have to reload the inserted bookmarks, and match them to the parsed bookmarks by URL
existing_bookmarks = Bookmark.objects.filter(owner=user, url__in=batch_urls)
BookmarkToTagRelationShip = Bookmark.tags.through
relationships = []
for netscape_bookmark in netscape_bookmarks:
# Lookup bookmark by URL again
bookmark = next(
(bookmark for bookmark in existing_bookmarks if bookmark.url == netscape_bookmark.href), None)
if not bookmark:
# Something is wrong, we should have just created this bookmark
shortened_bookmark_tag_str = str(netscape_bookmark)[:100] + '...'
logging.warning(
f'Failed to assign tags to the bookmark: {shortened_bookmark_tag_str}. Could not find bookmark by URL.')
# Get tag models by string, schedule inserts for bookmark -> tag associations
tag_names = parse_tag_string(netscape_bookmark.tag_string)
tags = tag_cache.get_all(tag_names)
for tag in tags:
relationships.append(BookmarkToTagRelationShip(bookmark=bookmark, tag=tag))
# Insert all bookmark -> tag associations at once, should ignore errors if association already exists
BookmarkToTagRelationShip.objects.bulk_create(relationships, ignore_conflicts=True)
def _import_bookmark_tag(netscape_bookmark: NetscapeBookmark, user: User): def _copy_bookmark_data(netscape_bookmark: NetscapeBookmark, bookmark: Bookmark):
# Either modify existing bookmark for the URL or create new one
bookmark = _get_or_create_bookmark(netscape_bookmark.href, user)
bookmark.url = netscape_bookmark.href bookmark.url = netscape_bookmark.href
if netscape_bookmark.date_added: if netscape_bookmark.date_added:
bookmark.date_added = parse_timestamp(netscape_bookmark.date_added) bookmark.date_added = parse_timestamp(netscape_bookmark.date_added)
@@ -56,24 +191,7 @@ def _import_bookmark_tag(netscape_bookmark: NetscapeBookmark, user: User):
bookmark.date_added = timezone.now() bookmark.date_added = timezone.now()
bookmark.date_modified = bookmark.date_added bookmark.date_modified = bookmark.date_added
bookmark.unread = False bookmark.unread = False
bookmark.title = netscape_bookmark.title if netscape_bookmark.title:
bookmark.title = netscape_bookmark.title
if netscape_bookmark.description: if netscape_bookmark.description:
bookmark.description = netscape_bookmark.description bookmark.description = netscape_bookmark.description
bookmark.owner = user
bookmark.full_clean()
bookmark.save()
# Set tags
tag_names = parse_tag_string(netscape_bookmark.tag_string)
tags = get_or_create_tags(tag_names, user)
bookmark.tags.set(tags)
bookmark.save()
def _get_or_create_bookmark(url: str, user: User):
try:
return Bookmark.objects.get(url=url, owner=user)
except Bookmark.DoesNotExist:
return Bookmark()

View File

@@ -1,6 +1,6 @@
from dataclasses import dataclass from dataclasses import dataclass
from html.parser import HTMLParser
import pyparsing as pp from typing import Dict, List
@dataclass @dataclass
@@ -12,60 +12,72 @@ class NetscapeBookmark:
tag_string: str tag_string: str
def extract_bookmark_link(tag): class BookmarkParser(HTMLParser):
href = tag[0].href def __init__(self):
title = tag[0].text super().__init__()
tag_string = tag[0].tags self.bookmarks = []
date_added = tag[0].add_date
return { self.current_tag = None
'href': href, self.bookmark = None
'title': title, self.href = ''
'tag_string': tag_string, self.add_date = ''
'date_added': date_added self.tags = ''
} self.title = ''
self.description = ''
def handle_starttag(self, tag: str, attrs: list):
name = 'handle_start_' + tag.lower()
if name in dir(self):
getattr(self, name)({k.lower(): v for k, v in attrs})
self.current_tag = tag
def extract_bookmark(tag): def handle_endtag(self, tag: str):
link = tag[0].link name = 'handle_end_' + tag.lower()
description = tag[0].description if name in dir(self):
description = description[0] if description else '' getattr(self, name)()
self.current_tag = None
return { def handle_data(self, data):
'link': link, name = f'handle_{self.current_tag}_data'
'description': description, if name in dir(self):
} getattr(self, name)(data)
def handle_end_dl(self):
self.add_bookmark()
def extract_description(tag): def handle_start_dt(self, attrs: Dict[str, str]):
return tag[0].strip() self.add_bookmark()
def handle_start_a(self, attrs: Dict[str, str]):
# define grammar vars(self).update(attrs)
dt_start, _ = pp.makeHTMLTags("DT") self.bookmark = NetscapeBookmark(
dd_start, _ = pp.makeHTMLTags("DD") href=self.href,
a_start, a_end = pp.makeHTMLTags("A") title='',
bookmark_link_tag = pp.Group(a_start + a_start.tag_body("text") + a_end.suppress()) description='',
bookmark_link_tag.addParseAction(extract_bookmark_link) date_added=self.add_date,
bookmark_description_tag = dd_start.suppress() + pp.SkipTo(pp.anyOpenTag | pp.anyCloseTag)("description") tag_string=self.tags,
bookmark_description_tag.addParseAction(extract_description)
bookmark_tag = pp.Group(dt_start + bookmark_link_tag("link") + pp.ZeroOrMore(bookmark_description_tag)("description"))
bookmark_tag.addParseAction(extract_bookmark)
def parse(html: str) -> [NetscapeBookmark]:
matches = bookmark_tag.searchString(html)
bookmarks = []
for match in matches:
bookmark_match = match[0]
bookmark = NetscapeBookmark(
href=bookmark_match['link']['href'],
title=bookmark_match['link']['title'],
description=bookmark_match['description'],
tag_string=bookmark_match['link']['tag_string'],
date_added=bookmark_match['link']['date_added'],
) )
bookmarks.append(bookmark)
return bookmarks def handle_a_data(self, data):
self.title = data.strip()
def handle_dd_data(self, data):
self.description = data.strip()
def add_bookmark(self):
if self.bookmark:
self.bookmark.title = self.title
self.bookmark.description = self.description
self.bookmarks.append(self.bookmark)
self.bookmark = None
self.href = ''
self.add_date = ''
self.tags = ''
self.title = ''
self.description = ''
def parse(html: str) -> List[NetscapeBookmark]:
parser = BookmarkParser()
parser.feed(html)
return parser.bookmarks

View File

@@ -1,5 +1,7 @@
import random import random
import logging import logging
from dataclasses import dataclass
from typing import Optional, List
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.utils import timezone from django.utils import timezone
@@ -87,6 +89,42 @@ class LinkdingApiTestCase(APITestCase):
return response return response
class BookmarkHtmlTag:
def __init__(self, href: str = '', title: str = '', description: str = '', add_date: str = '', tags: str = ''):
self.href = href
self.title = title
self.description = description
self.add_date = add_date
self.tags = tags
class ImportTestMixin:
def render_tag(self, tag: BookmarkHtmlTag):
return f'''
<DT>
<A {f'HREF="{tag.href}"' if tag.href else ''}
{f'ADD_DATE="{tag.add_date}"' if tag.add_date else ''}
{f'TAGS="{tag.tags}"' if tag.tags else ''}>
{tag.title if tag.title else ''}
</A>
{f'<DD>{tag.description}' if tag.description else ''}
'''
def render_html(self, tags: List[BookmarkHtmlTag] = None, tags_html: str = ''):
if tags:
rendered_tags = [self.render_tag(tag) for tag in tags]
tags_html = '\n'.join(rendered_tags)
return f'''
<!DOCTYPE NETSCAPE-Bookmark-file-1>
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
<TITLE>Bookmarks</TITLE>
<H1>Bookmarks</H1>
<DL><p>
{tags_html}
</DL><p>
'''
_words = [ _words = [
'quasi', 'quasi',
'consequatur', 'consequatur',

View File

@@ -1,29 +1,204 @@
from typing import List
from unittest.mock import patch from unittest.mock import patch
from django.test import TestCase from django.test import TestCase, override_settings
from django.utils import timezone
from bookmarks.models import Tag from bookmarks.models import Bookmark, Tag, parse_tag_string
from bookmarks.services import tasks from bookmarks.services import tasks
from bookmarks.services.importer import import_netscape_html from bookmarks.services.importer import import_netscape_html
from bookmarks.tests.helpers import BookmarkFactoryMixin, disable_logging from bookmarks.tests.helpers import BookmarkFactoryMixin, ImportTestMixin, BookmarkHtmlTag, disable_logging
from bookmarks.utils import parse_timestamp
class ImporterTestCase(TestCase, BookmarkFactoryMixin): class ImporterTestCase(TestCase, BookmarkFactoryMixin, ImportTestMixin):
def create_import_html(self, bookmark_tags_string: str): def assertBookmarksImported(self, html_tags: List[BookmarkHtmlTag]):
return f''' for html_tag in html_tags:
<!DOCTYPE NETSCAPE-Bookmark-file-1> bookmark = Bookmark.objects.get(url=html_tag.href)
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8"> self.assertIsNotNone(bookmark)
<TITLE>Bookmarks</TITLE>
<H1>Bookmarks</H1> self.assertEqual(bookmark.title, html_tag.title)
<DL><p> self.assertEqual(bookmark.description, html_tag.description)
{bookmark_tags_string} self.assertEqual(bookmark.date_added, parse_timestamp(html_tag.add_date))
</DL><p>
''' tag_names = parse_tag_string(html_tag.tags)
# Check assigned tags
for tag_name in tag_names:
tag = next(
(tag for tag in bookmark.tags.all() if tag.name == tag_name), None)
self.assertIsNotNone(tag)
def test_import(self):
html_tags = [
BookmarkHtmlTag(href='https://example.com', title='Example title', description='Example description',
add_date='1', tags='example-tag'),
BookmarkHtmlTag(href='https://foo.com', title='Foo title', description='',
add_date='2', tags=''),
BookmarkHtmlTag(href='https://bar.com', title='Bar title', description='Bar description',
add_date='3', tags='bar-tag, other-tag'),
]
import_html = self.render_html(tags=html_tags)
result = import_netscape_html(import_html, self.get_or_create_test_user())
# Check result
self.assertEqual(result.total, 3)
self.assertEqual(result.success, 3)
self.assertEqual(result.failed, 0)
# Check bookmarks
bookmarks = Bookmark.objects.all()
self.assertEqual(len(bookmarks), 3)
self.assertBookmarksImported(html_tags)
def test_synchronize(self):
# Initial import
html_tags = [
BookmarkHtmlTag(href='https://example.com', title='Example title', description='Example description',
add_date='1', tags='example-tag'),
BookmarkHtmlTag(href='https://foo.com', title='Foo title', description='',
add_date='2', tags=''),
BookmarkHtmlTag(href='https://bar.com', title='Bar title', description='Bar description',
add_date='3', tags='bar-tag, other-tag'),
]
import_html = self.render_html(tags=html_tags)
import_netscape_html(import_html, self.get_or_create_test_user())
# Change data, add some new data
html_tags = [
BookmarkHtmlTag(href='https://example.com', title='Updated Example title',
description='Updated Example description', add_date='111', tags='updated-example-tag'),
BookmarkHtmlTag(href='https://foo.com', title='Updated Foo title', description='Updated Foo description',
add_date='222', tags='new-tag'),
BookmarkHtmlTag(href='https://bar.com', title='Updated Bar title', description='Updated Bar description',
add_date='333', tags='updated-bar-tag, updated-other-tag'),
BookmarkHtmlTag(href='https://baz.com', add_date='444', tags='baz-tag')
]
# Import updated data
import_html = self.render_html(tags=html_tags)
result = import_netscape_html(import_html, self.get_or_create_test_user())
# Check result
self.assertEqual(result.total, 4)
self.assertEqual(result.success, 4)
self.assertEqual(result.failed, 0)
# Check bookmarks
bookmarks = Bookmark.objects.all()
self.assertEqual(len(bookmarks), 4)
self.assertBookmarksImported(html_tags)
def test_import_with_some_invalid_bookmarks(self):
html_tags = [
BookmarkHtmlTag(href='https://example.com'),
# Invalid URL
BookmarkHtmlTag(href='foo.com'),
# No URL
BookmarkHtmlTag(),
]
import_html = self.render_html(tags=html_tags)
result = import_netscape_html(import_html, self.get_or_create_test_user())
# Check result
self.assertEqual(result.total, 3)
self.assertEqual(result.success, 1)
self.assertEqual(result.failed, 2)
# Check bookmarks
bookmarks = Bookmark.objects.all()
self.assertEqual(len(bookmarks), 1)
self.assertBookmarksImported(html_tags[1:1])
def test_import_tags(self):
html_tags = [
BookmarkHtmlTag(href='https://example.com', tags='tag1'),
BookmarkHtmlTag(href='https://foo.com', tags='tag2'),
BookmarkHtmlTag(href='https://bar.com', tags='tag3'),
]
import_html = self.render_html(tags=html_tags)
import_netscape_html(import_html, self.get_or_create_test_user())
self.assertEqual(Tag.objects.count(), 3)
def test_create_missing_tags(self):
html_tags = [
BookmarkHtmlTag(href='https://example.com', tags='tag1'),
BookmarkHtmlTag(href='https://foo.com', tags='tag2'),
BookmarkHtmlTag(href='https://bar.com', tags='tag3'),
]
import_html = self.render_html(tags=html_tags)
import_netscape_html(import_html, self.get_or_create_test_user())
html_tags.append(
BookmarkHtmlTag(href='https://baz.com', tags='tag4')
)
import_html = self.render_html(tags=html_tags)
import_netscape_html(import_html, self.get_or_create_test_user())
self.assertEqual(Tag.objects.count(), 4)
def test_should_append_tags_to_bookmark_when_reimporting_with_different_tags(self):
html_tags = [
BookmarkHtmlTag(href='https://example.com', tags='tag1'),
]
import_html = self.render_html(tags=html_tags)
import_netscape_html(import_html, self.get_or_create_test_user())
html_tags.append(
BookmarkHtmlTag(href='https://example.com', tags='tag2, tag3')
)
import_html = self.render_html(tags=html_tags)
import_netscape_html(import_html, self.get_or_create_test_user())
self.assertEqual(Bookmark.objects.count(), 1)
self.assertEqual(Bookmark.objects.all()[0].tags.all().count(), 3)
@override_settings(USE_TZ=False)
def test_use_current_date_when_no_add_date(self):
test_html = self.render_html(tags_html=f'''
<DT><A HREF="https://example.com">Example.com</A>
<DD>Example.com
''')
with patch.object(timezone, 'now', return_value=timezone.datetime(2021, 1, 1)):
import_netscape_html(test_html, self.get_or_create_test_user())
self.assertEqual(Bookmark.objects.count(), 1)
self.assertEqual(Bookmark.objects.all()[0].date_added, timezone.datetime(2021, 1, 1))
def test_keep_title_if_imported_bookmark_has_empty_title(self):
test_html = self.render_html(tags=[
BookmarkHtmlTag(href='https://example.com', title='Example.com')
])
import_netscape_html(test_html, self.get_or_create_test_user())
test_html = self.render_html(tags=[
BookmarkHtmlTag(href='https://example.com')
])
import_netscape_html(test_html, self.get_or_create_test_user())
self.assertEqual(Bookmark.objects.count(), 1)
self.assertEqual(Bookmark.objects.all()[0].title, 'Example.com')
def test_keep_description_if_imported_bookmark_has_empty_description(self):
test_html = self.render_html(tags=[
BookmarkHtmlTag(href='https://example.com', description='Example.com')
])
import_netscape_html(test_html, self.get_or_create_test_user())
test_html = self.render_html(tags=[
BookmarkHtmlTag(href='https://example.com')
])
import_netscape_html(test_html, self.get_or_create_test_user())
self.assertEqual(Bookmark.objects.count(), 1)
self.assertEqual(Bookmark.objects.all()[0].description, 'Example.com')
def test_replace_whitespace_in_tag_names(self): def test_replace_whitespace_in_tag_names(self):
test_html = self.create_import_html(f''' test_html = self.render_html(tags_html=f'''
<DT><A HREF="https://example.com" ADD_DATE="1616337559" PRIVATE="0" TOREAD="0" TAGS="tag 1, tag 2, tag 3">Example.com</A> <DT><A HREF="https://example.com" TAGS="tag 1, tag 2, tag 3">Example.com</A>
<DD>Example.com <DD>Example.com
''') ''')
import_netscape_html(test_html, self.get_or_create_test_user()) import_netscape_html(test_html, self.get_or_create_test_user())
@@ -35,22 +210,22 @@ class ImporterTestCase(TestCase, BookmarkFactoryMixin):
@disable_logging @disable_logging
def test_validate_empty_or_missing_bookmark_url(self): def test_validate_empty_or_missing_bookmark_url(self):
test_html = self.create_import_html(f''' test_html = self.render_html(tags_html=f'''
<!-- Empty URL --> <DT><A HREF="">Empty URL</A>
<DT><A HREF="" ADD_DATE="1616337559" PRIVATE="0" TOREAD="0" TAGS="tag3">Empty URL</A>
<DD>Empty URL <DD>Empty URL
<!-- Missing URL --> <DT><A>Missing URL</A>
<DT><A ADD_DATE="1616337559" PRIVATE="0" TOREAD="0" TAGS="tag3">Missing URL</A>
<DD>Missing URL <DD>Missing URL
''') ''')
import_result = import_netscape_html(test_html, self.get_or_create_test_user()) import_result = import_netscape_html(test_html, self.get_or_create_test_user())
self.assertEqual(Bookmark.objects.count(), 0)
self.assertEqual(import_result.success, 0) self.assertEqual(import_result.success, 0)
self.assertEqual(import_result.failed, 2)
def test_schedule_snapshot_creation(self): def test_schedule_snapshot_creation(self):
user = self.get_or_create_test_user() user = self.get_or_create_test_user()
test_html = self.create_import_html('') test_html = self.render_html(tags_html='')
with patch.object(tasks, 'schedule_bookmarks_without_snapshots') as mock_schedule_bookmarks_without_snapshots: with patch.object(tasks, 'schedule_bookmarks_without_snapshots') as mock_schedule_bookmarks_without_snapshots:
import_netscape_html(test_html, user) import_netscape_html(test_html, user)

View File

@@ -0,0 +1,122 @@
from typing import List
from django.test import TestCase
from bookmarks.services.parser import NetscapeBookmark
from bookmarks.services.parser import parse
from bookmarks.tests.helpers import ImportTestMixin, BookmarkHtmlTag
class ParserTestCase(TestCase, ImportTestMixin):
def assertTagsEqual(self, bookmarks: List[NetscapeBookmark], html_tags: List[BookmarkHtmlTag]):
self.assertEqual(len(bookmarks), len(html_tags))
for bookmark in bookmarks:
html_tag = html_tags[bookmarks.index(bookmark)]
self.assertEqual(bookmark.href, html_tag.href)
self.assertEqual(bookmark.title, html_tag.title)
self.assertEqual(bookmark.date_added, html_tag.add_date)
self.assertEqual(bookmark.description, html_tag.description)
self.assertEqual(bookmark.tag_string, html_tag.tags)
def test_parse_bookmarks(self):
html_tags = [
BookmarkHtmlTag(href='https://example.com', title='Example title', description='Example description',
add_date='1', tags='example-tag'),
BookmarkHtmlTag(href='https://foo.com', title='Foo title', description='',
add_date='2', tags=''),
BookmarkHtmlTag(href='https://bar.com', title='Bar title', description='Bar description',
add_date='3', tags='bar-tag, other-tag'),
]
html = self.render_html(html_tags)
bookmarks = parse(html)
self.assertTagsEqual(bookmarks, html_tags)
def test_no_bookmarks(self):
html = self.render_html()
bookmarks = parse(html)
self.assertEqual(bookmarks, [])
def test_reset_properties_after_adding_bookmark(self):
html_tags = [
BookmarkHtmlTag(href='https://example.com', title='Example title', description='Example description',
add_date='1', tags='example-tag'),
BookmarkHtmlTag(href='', title='', description='',
add_date='', tags='')
]
html = self.render_html(html_tags)
bookmarks = parse(html)
self.assertTagsEqual(bookmarks, html_tags)
def test_empty_title(self):
html_tags = [
BookmarkHtmlTag(href='https://example.com', title='', description='Example description',
add_date='1', tags='example-tag'),
]
html = self.render_html(tags_html='''
<DT><A HREF="https://example.com" ADD_DATE="1" TAGS="example-tag"></A>
<DD>Example description
''')
bookmarks = parse(html)
self.assertTagsEqual(bookmarks, html_tags)
def test_with_closing_description_tag(self):
html_tags = [
BookmarkHtmlTag(href='https://example.com', title='Example title', description='Example description',
add_date='1', tags='example-tag'),
BookmarkHtmlTag(href='https://foo.com', title='Foo title', description='',
add_date='2', tags=''),
]
html = self.render_html(tags_html='''
<DT><A HREF="https://example.com" ADD_DATE="1" TAGS="example-tag">Example title</A>
<DD>Example description</DD>
<DT><A HREF="https://foo.com" ADD_DATE="2">Foo title</A>
<DD></DD>
''')
bookmarks = parse(html)
self.assertTagsEqual(bookmarks, html_tags)
def test_description_tag_before_anchor_tag(self):
html_tags = [
BookmarkHtmlTag(href='https://example.com', title='Example title', description='Example description',
add_date='1', tags='example-tag'),
BookmarkHtmlTag(href='https://foo.com', title='Foo title', description='',
add_date='2', tags=''),
]
html = self.render_html(tags_html='''
<DT><DD>Example description</DD>
<A HREF="https://example.com" ADD_DATE="1" TAGS="example-tag">Example title</A>
<DT><DD></DD>
<A HREF="https://foo.com" ADD_DATE="2">Foo title</A>
''')
bookmarks = parse(html)
self.assertTagsEqual(bookmarks, html_tags)
def test_with_folders(self):
html_tags = [
BookmarkHtmlTag(href='https://example.com', title='Example title', description='Example description',
add_date='1', tags='example-tag'),
BookmarkHtmlTag(href='https://foo.com', title='Foo title', description='',
add_date='2', tags=''),
]
html = self.render_html(tags_html='''
<DL><p>
<DT><H3>Folder 1</H3>
<DL><p>
<DT><A HREF="https://example.com" ADD_DATE="1" TAGS="example-tag">Example title</A>
<DD>Example description
</DL><p>
<DT><H3>Folder 2</H3>
<DL><p>
<DT><A HREF="https://foo.com" ADD_DATE="2">Foo title</A>
</DL><p>
</DL><p>
''')
bookmarks = parse(html)
self.assertTagsEqual(bookmarks, html_tags)

View File

@@ -13,7 +13,6 @@ django-sass-processor==1.0.1
django-widget-tweaks==1.4.8 django-widget-tweaks==1.4.8
djangorestframework==3.12.4 djangorestframework==3.12.4
idna==2.8 idna==2.8
pyparsing==2.4.7
python-dateutil==2.8.1 python-dateutil==2.8.1
pytz==2021.1 pytz==2021.1
requests==2.26.0 requests==2.26.0

View File

@@ -18,7 +18,6 @@ django-widget-tweaks==1.4.8
djangorestframework==3.12.4 djangorestframework==3.12.4
idna==2.8 idna==2.8
libsass==0.21.0 libsass==0.21.0
pyparsing==2.4.7
python-dateutil==2.8.1 python-dateutil==2.8.1
pytz==2021.1 pytz==2021.1
rcssmin==1.0.6 rcssmin==1.0.6

View File

@@ -48,6 +48,11 @@ LOGGING = {
'level': 'DEBUG', 'level': 'DEBUG',
'handlers': ['console'], 'handlers': ['console'],
'propagate': False, 'propagate': False,
},
'bookmarks.services.importer': { # Log importer debug output
'level': 'DEBUG',
'handlers': ['console'],
'propagate': False,
} }
} }
} }