Group ideographic characters in tag cloud (#613)

* Fix #588, Ideographic characters should be grouped together.
Following the suggestion of using regex to find the ideographic
range in this SO answer https://stackoverflow.com/a/2718203/554903

We group the ideographic characters together, while keeping other
chinese, japanese and korean characters apart.

* cleanup

---------

Co-authored-by: Sascha Ißbrücker <sascha.issbruecker@gmail.com>
This commit is contained in:
Jonathan Sundqvist
2024-03-16 07:09:37 +01:00
committed by GitHub
parent 38204c87cf
commit 683cf529d7
2 changed files with 74 additions and 10 deletions

View File

@@ -39,7 +39,7 @@ class TagCloudTemplateTest(TestCase, BookmarkFactoryMixin, HtmlTestMixin):
group_element = group_elements[group_index] group_element = group_elements[group_index]
link_elements = group_element.select("a") link_elements = group_element.select("a")
self.assertEqual(len(link_elements), len(tags)) self.assertEqual(len(link_elements), len(tags), tags)
for tag_index, tag in enumerate(tags, start=0): for tag_index, tag in enumerate(tags, start=0):
link_element = link_elements[tag_index] link_element = link_elements[tag_index]
@@ -50,6 +50,59 @@ class TagCloudTemplateTest(TestCase, BookmarkFactoryMixin, HtmlTestMixin):
link_elements = soup.select("p.selected-tags a") link_elements = soup.select("p.selected-tags a")
self.assertEqual(len(link_elements), count) self.assertEqual(len(link_elements), count)
def test_cjk_using_single_group(self):
"""
Ideographic characters will be using the same group
While other japanese and korean characters will have separate groups.
"""
tags = [
self.setup_tag(name="Aardvark"),
self.setup_tag(name="Armadillo"),
self.setup_tag(name="あひる"),
self.setup_tag(name="あきらか"),
self.setup_tag(name="アヒル"),
self.setup_tag(name="アキラカ"),
self.setup_tag(name="ひる"),
self.setup_tag(name="アヒル"),
self.setup_tag(name="오리"),
self.setup_tag(name=""),
self.setup_tag(name="家鴨"),
self.setup_tag(name="感じ"),
]
self.setup_bookmark(tags=tags)
rendered_template = self.render_template()
self.assertTagGroups(
rendered_template,
[
[
"Aardvark",
"Armadillo",
],
[
"あきらか",
"あひる",
],
[
"ひる",
],
[
"アキラカ",
"アヒル",
],
[
"",
],
[
"오리",
],
[
"家鴨",
"感じ",
],
],
)
def test_group_alphabetically(self): def test_group_alphabetically(self):
tags = [ tags = [
self.setup_tag(name="Cockatoo"), self.setup_tag(name="Cockatoo"),

View File

@@ -1,5 +1,6 @@
import urllib.parse import urllib.parse
from typing import Set, List from typing import Set, List
import re
from django.core.handlers.wsgi import WSGIRequest from django.core.handlers.wsgi import WSGIRequest
from django.core.paginator import Paginator from django.core.paginator import Paginator
@@ -11,13 +12,13 @@ from bookmarks import utils
from bookmarks.models import ( from bookmarks.models import (
Bookmark, Bookmark,
BookmarkSearch, BookmarkSearch,
BookmarkSearchForm,
User, User,
UserProfile, UserProfile,
Tag, Tag,
) )
DEFAULT_PAGE_SIZE = 30 DEFAULT_PAGE_SIZE = 30
CJK_RE = re.compile(r"[\u4e00-\u9fff]+")
class BookmarkItem: class BookmarkItem:
@@ -123,13 +124,13 @@ class BookmarkListContext:
) )
def get_base_url(self): def get_base_url(self):
raise Exception(f"Must be implemented by subclass") raise Exception("Must be implemented by subclass")
def get_base_action_url(self): def get_base_action_url(self):
raise Exception(f"Must be implemented by subclass") raise Exception("Must be implemented by subclass")
def get_bookmark_query_set(self): def get_bookmark_query_set(self):
raise Exception(f"Must be implemented by subclass") raise Exception("Must be implemented by subclass")
class ActiveBookmarkListContext(BookmarkListContext): class ActiveBookmarkListContext(BookmarkListContext):
@@ -178,23 +179,33 @@ class TagGroup:
self.tags = [] self.tags = []
self.char = char self.char = char
def __repr__(self):
return f"<{self.char} TagGroup>"
@staticmethod @staticmethod
def create_tag_groups(tags: Set[Tag]): def create_tag_groups(tags: Set[Tag]):
# Ensure groups, as well as tags within groups, are ordered alphabetically # Ensure groups, as well as tags within groups, are ordered alphabetically
sorted_tags = sorted(tags, key=lambda x: str.lower(x.name)) sorted_tags = sorted(tags, key=lambda x: str.lower(x.name))
group = None group = None
groups = [] groups = []
cjk_used = False
cjk_group = TagGroup("Ideographic")
# Group tags that start with a different character than the previous one # Group tags that start with a different character than the previous one
for tag in sorted_tags: for tag in sorted_tags:
tag_char = tag.name[0].lower() tag_char = tag.name[0].lower()
if CJK_RE.match(tag_char):
if not group or group.char != tag_char: cjk_used = True
cjk_group.tags.append(tag)
elif not group or group.char != tag_char:
group = TagGroup(tag_char) group = TagGroup(tag_char)
groups.append(group) groups.append(group)
group.tags.append(tag)
else:
group.tags.append(tag) group.tags.append(tag)
if cjk_used:
groups.append(cjk_group)
return groups return groups
@@ -224,7 +235,7 @@ class TagCloudContext:
self.has_selected_tags = has_selected_tags self.has_selected_tags = has_selected_tags
def get_tag_query_set(self): def get_tag_query_set(self):
raise Exception(f"Must be implemented by subclass") raise Exception("Must be implemented by subclass")
def get_selected_tags(self, tags: List[Tag]): def get_selected_tags(self, tags: List[Tag]):
parsed_query = queries.parse_query_string(self.search.q) parsed_query = queries.parse_query_string(self.search.q)