Group ideographic characters in tag cloud (#613)

* Fix #588, Ideographic characters should be grouped together.
Following the suggestion of using regex to find the ideographic
range in this SO answer https://stackoverflow.com/a/2718203/554903

We group the ideographic characters together, while keeping other
chinese, japanese and korean characters apart.

* cleanup

---------

Co-authored-by: Sascha Ißbrücker <sascha.issbruecker@gmail.com>
This commit is contained in:
Jonathan Sundqvist
2024-03-16 07:09:37 +01:00
committed by GitHub
parent 38204c87cf
commit 683cf529d7
2 changed files with 74 additions and 10 deletions

View File

@@ -39,7 +39,7 @@ class TagCloudTemplateTest(TestCase, BookmarkFactoryMixin, HtmlTestMixin):
group_element = group_elements[group_index]
link_elements = group_element.select("a")
self.assertEqual(len(link_elements), len(tags))
self.assertEqual(len(link_elements), len(tags), tags)
for tag_index, tag in enumerate(tags, start=0):
link_element = link_elements[tag_index]
@@ -50,6 +50,59 @@ class TagCloudTemplateTest(TestCase, BookmarkFactoryMixin, HtmlTestMixin):
link_elements = soup.select("p.selected-tags a")
self.assertEqual(len(link_elements), count)
def test_cjk_using_single_group(self):
"""
Ideographic characters will be using the same group
While other japanese and korean characters will have separate groups.
"""
tags = [
self.setup_tag(name="Aardvark"),
self.setup_tag(name="Armadillo"),
self.setup_tag(name="あひる"),
self.setup_tag(name="あきらか"),
self.setup_tag(name="アヒル"),
self.setup_tag(name="アキラカ"),
self.setup_tag(name="ひる"),
self.setup_tag(name="アヒル"),
self.setup_tag(name="오리"),
self.setup_tag(name=""),
self.setup_tag(name="家鴨"),
self.setup_tag(name="感じ"),
]
self.setup_bookmark(tags=tags)
rendered_template = self.render_template()
self.assertTagGroups(
rendered_template,
[
[
"Aardvark",
"Armadillo",
],
[
"あきらか",
"あひる",
],
[
"ひる",
],
[
"アキラカ",
"アヒル",
],
[
"",
],
[
"오리",
],
[
"家鴨",
"感じ",
],
],
)
def test_group_alphabetically(self):
tags = [
self.setup_tag(name="Cockatoo"),