From 09058c7c8adc7d127182781ae824866161fff0a5 Mon Sep 17 00:00:00 2001 From: Chesterkxng Date: Tue, 27 May 2025 16:36:00 +0200 Subject: [PATCH 01/10] update top list items functions to (1) replace special characters by symbols when building the dict and (2) take a built array with custom regex as input. --- .../tools/list/find-most-popular/service.ts | 41 ++++++++++++++----- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/src/pages/tools/list/find-most-popular/service.ts b/src/pages/tools/list/find-most-popular/service.ts index 03bd144..1f53c0e 100644 --- a/src/pages/tools/list/find-most-popular/service.ts +++ b/src/pages/tools/list/find-most-popular/service.ts @@ -9,7 +9,20 @@ function dictMaker( ): { [key: string]: number } { const dict: { [key: string]: number } = {}; for (const item of array) { - const key = ignoreItemCase ? item.toLowerCase() : item; + let key = ignoreItemCase ? item.toLowerCase() : item; + + const specialCharMap: { [key: string]: string } = { + ' ': '␣', + '\n': '↲', + '\t': '⇥', + '\r': '␍', + '\f': '␌', + '\v': '␋' + }; + if (key in specialCharMap) { + key = specialCharMap[key]; + } + dict[key] = (dict[key] || 0) + 1; } return dict; @@ -74,21 +87,27 @@ export function TopItemsList( sortingMethod: SortingMethod, displayFormat: DisplayFormat, splitSeparator: string, - input: string, + input: string | string[], deleteEmptyItems: boolean, ignoreItemCase: boolean, trimItems: boolean ): string { + if (!input) return ''; + let array: string[]; - switch (splitOperatorType) { - case 'symbol': - array = input.split(splitSeparator); - break; - case 'regex': - array = input - .split(new RegExp(splitSeparator)) - .filter((item) => item !== ''); - break; + if (typeof input === 'string') { + switch (splitOperatorType) { + case 'symbol': + array = input.split(splitSeparator); + break; + case 'regex': + array = input + .split(new RegExp(splitSeparator)) + .filter((item) => item !== ''); + break; + } + } else { + array = input; } // Trim items if required From 4f3977d40ef4ec975a523ad9b01d8f40bfbaab42 Mon Sep 17 00:00:00 2001 From: Chesterkxng Date: Tue, 27 May 2025 16:39:41 +0200 Subject: [PATCH 02/10] feat: text-statistic (types) --- src/pages/tools/string/statistic/types.ts | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 src/pages/tools/string/statistic/types.ts diff --git a/src/pages/tools/string/statistic/types.ts b/src/pages/tools/string/statistic/types.ts new file mode 100644 index 0000000..7dfe089 --- /dev/null +++ b/src/pages/tools/string/statistic/types.ts @@ -0,0 +1,7 @@ +export type InitialValuesType = { + emptyLines: boolean; + sentenceDelimiters?: string[]; + wordDelimiters: string; + characterCount: boolean; + wordCount: boolean; +}; From b1069d507eff0799cdc02b82ed4358b50e08681a Mon Sep 17 00:00:00 2001 From: Chesterkxng Date: Tue, 27 May 2025 16:40:06 +0200 Subject: [PATCH 03/10] feat: text-statistic (service) --- src/pages/tools/string/statistic/service.ts | 110 ++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 src/pages/tools/string/statistic/service.ts diff --git a/src/pages/tools/string/statistic/service.ts b/src/pages/tools/string/statistic/service.ts new file mode 100644 index 0000000..8ff971b --- /dev/null +++ b/src/pages/tools/string/statistic/service.ts @@ -0,0 +1,110 @@ +import { InitialValuesType } from './types'; +import { TopItemsList } from '../../list/find-most-popular/service'; + +function countLines(text: string, options: InitialValuesType): number { + const numberofLines = options.emptyLines + ? text.split('\n').length + : text.split('\n').filter((line) => line.trim() !== '').length; + + return numberofLines; +} + +function countCharacters(text: string): number { + return text.length; +} + +function countSentences(text: string, options: InitialValuesType): number { + const sentenceDelimiters = options.sentenceDelimiters || [ + '.', + '!', + '?', + '...' + ]; + const regex = new RegExp(`[${sentenceDelimiters.join('')}]`, 'g'); + const sentences = text + .split(regex) + .filter((sentence) => sentence.trim() !== ''); + return sentences.length; +} + +function wordsStats( + text: string, + options: InitialValuesType +): [number, string] { + const defaultDelimiters = `\\s.,;:!?"“”«»()…`; + const wordDelimiters = options.wordDelimiters || defaultDelimiters; + const regex = new RegExp(`[${wordDelimiters}]`, 'gu'); + const words = text.split(regex).filter((word) => word.trim() !== ''); + + const wordsFrequency = TopItemsList( + 'regex', + 'count', + 'percentage', + '', + words, + false, + true, + false + ); + + return options.wordCount + ? [words.length, wordsFrequency] + : [words.length, '']; +} + +function countParagraphs(text: string): number { + return text + .split(/\r?\n\s*\r?\n/) + .filter((paragraph) => paragraph.trim() !== '').length; +} + +function charactersStatistic(text: string, options: InitialValuesType): string { + if (!options.characterCount) return ''; + const result = TopItemsList( + 'symbol', + 'count', + 'percentage', + '', + text, + true, + true, + false + ); + return result; +} + +export function textStatistics( + input: string, + options: InitialValuesType +): string { + if (!input) return ''; + + const numberofLines = countLines(input, options); + const numberofCharacters = countCharacters(input); + const numberofSentences = countSentences(input, options); + const [numberofWords, wordsFrequency] = wordsStats(input, options); + const numberofParagraphs = countParagraphs(input); + const characterStats = charactersStatistic(input, options); + + const stats = `Text Statistics +================== +Characters: ${numberofCharacters} +Words: ${numberofWords} +Lines: ${numberofLines} +Sentences: ${numberofSentences} +Paragraphs: ${numberofParagraphs}`; + + const charStats = `Characters Frequency +================== +${characterStats}`; + + const wordStatsOutput = `Words Frequency +================== +${wordsFrequency}`; + + let result = stats; + if (options.wordCount) result += `\n\n${wordStatsOutput}`; + if (options.characterCount) result += `\n\n${charStats}`; + + return result; +} From 8740b8f72ea2d557f697941f3960557428f9b930 Mon Sep 17 00:00:00 2001 From: Chesterkxng Date: Tue, 27 May 2025 17:03:18 +0200 Subject: [PATCH 04/10] feat: text-statistic (fixed types) --- src/pages/tools/string/statistic/types.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pages/tools/string/statistic/types.ts b/src/pages/tools/string/statistic/types.ts index 7dfe089..c775910 100644 --- a/src/pages/tools/string/statistic/types.ts +++ b/src/pages/tools/string/statistic/types.ts @@ -1,6 +1,6 @@ export type InitialValuesType = { emptyLines: boolean; - sentenceDelimiters?: string[]; + sentenceDelimiters: string; wordDelimiters: string; characterCount: boolean; wordCount: boolean; From e1f1c6a1c4bfdbc470b9370f4afda88b7e996e8a Mon Sep 17 00:00:00 2001 From: Chesterkxng Date: Tue, 27 May 2025 17:03:58 +0200 Subject: [PATCH 05/10] feat: text-statistic (fixed service count sentences method) --- src/pages/tools/string/statistic/service.ts | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/pages/tools/string/statistic/service.ts b/src/pages/tools/string/statistic/service.ts index 8ff971b..7481739 100644 --- a/src/pages/tools/string/statistic/service.ts +++ b/src/pages/tools/string/statistic/service.ts @@ -14,12 +14,10 @@ function countCharacters(text: string): number { } function countSentences(text: string, options: InitialValuesType): number { - const sentenceDelimiters = options.sentenceDelimiters || [ - '.', - '!', - '?', - '...' - ]; + const sentenceDelimiters = options.sentenceDelimiters + ? options.sentenceDelimiters.split(',').map((s) => s.trim()) + : ['.', '!', '?', '...']; + const regex = new RegExp(`[${sentenceDelimiters.join('')}]`, 'g'); const sentences = text .split(regex) From b1ec39860e236e726e97a32a78a9ee2cb8ebbc21 Mon Sep 17 00:00:00 2001 From: Chesterkxng Date: Tue, 27 May 2025 17:04:45 +0200 Subject: [PATCH 06/10] feat: text-statistic (index) --- src/pages/tools/string/statistic/index.tsx | 298 +++++++++++++++++++++ 1 file changed, 298 insertions(+) create mode 100644 src/pages/tools/string/statistic/index.tsx diff --git a/src/pages/tools/string/statistic/index.tsx b/src/pages/tools/string/statistic/index.tsx new file mode 100644 index 0000000..352aa58 --- /dev/null +++ b/src/pages/tools/string/statistic/index.tsx @@ -0,0 +1,298 @@ +import { Box } from '@mui/material'; +import { useState } from 'react'; +import ToolTextResult from '@components/result/ToolTextResult'; +import { GetGroupsType } from '@components/options/ToolOptions'; +import { textStatistics } from './service'; +import ToolTextInput from '@components/input/ToolTextInput'; +import { InitialValuesType } from './types'; +import ToolContent from '@components/ToolContent'; +import { CardExampleType } from '@components/examples/ToolExamples'; +import { ToolComponentProps } from '@tools/defineTool'; +import TextFieldWithDesc from '@components/options/TextFieldWithDesc'; +import CheckboxWithDesc from '@components/options/CheckboxWithDesc'; + +const initialValues: InitialValuesType = { + emptyLines: false, + sentenceDelimiters: '', + wordDelimiters: '', + characterCount: false, + wordCount: false +}; + +const exampleCards: CardExampleType[] = [ + { + title: 'Text Statistics without any Flag', + description: + 'This example shows basic text statistics without any additional flags.', + sampleText: + 'Giraffes have long necks that can be up to 6 feet (1.8 meters) long, but they only have 7 neck vertebrae, the same as humans.', + sampleResult: `Text Statistics +================== +Characters: 125 +Words: 26 +Lines: 1 +Sentences: 1 +Paragraphs: 1`, + sampleOptions: initialValues + }, + { + title: 'Text Statistics with Characters Frequency', + description: + 'This example shows basic text statistics with characters frequency.', + sampleText: `The Great Barrier Reef is the world's largest coral reef system, located off the coast of Australia. It consists of over 2,900 individual reefs and 900 islands. The reef is home to thousands of species of marine life, including fish, sea turtles, sharks, and dolphins. It is also a popular tourist destination, attracting millions of visitors every year. However, the reef is facing many threats, including climate change, pollution, and overfishing. Conservation efforts are being made to protect this unique and valuable ecosystem for future generations.`, + sampleResult: `Text Statistics +================== +Characters: 556 +Words: 87 +Lines: 1 +Sentences: 1 +Paragraphs: 1 + +Characters Frequency +================== +0: 4 (0.72%) +2: 1 (0.18%) +9: 2 (0.36%) +␣: 85 (15.29%) +e: 51 (9.17%) +i: 40 (7.19%) +s: 40 (7.19%) +t: 39 (7.01%) +a: 37 (6.65%) +o: 34 (6.12%) +r: 33 (5.94%) +n: 29 (5.22%) +l: 21 (3.78%) +f: 20 (3.60%) +h: 15 (2.70%) +d: 15 (2.70%) +c: 14 (2.52%) +u: 14 (2.52%) +,: 11 (1.98%) +g: 10 (1.80%) +m: 8 (1.44%) +v: 8 (1.44%) +.: 6 (1.08%) +p: 6 (1.08%) +y: 5 (0.90%) +b: 3 (0.54%) +w: 2 (0.36%) +': 1 (0.18%) +k: 1 (0.18%) +q: 1 (0.18%)`, + sampleOptions: { + emptyLines: false, + sentenceDelimiters: '', + wordDelimiters: '', + characterCount: true, + wordCount: false + } + }, + { + title: 'Text Statistics with Characters and Words Frequencies', + description: + 'This example shows basic text statistics with characters and words frequencies.', + sampleText: `The Great Barrier Reef is the world's largest coral reef system, located off the coast of Australia. It consists of over 2,900 individual reefs and 900 islands. The reef is home to thousands of species of marine life, including fish, sea turtles, sharks, and dolphins. It is also a popular tourist destination, attracting millions of visitors every year. However, the reef is facing many threats, including climate change, pollution, and overfishing. Conservation efforts are being made to protect this unique and valuable ecosystem for future generations.`, + sampleResult: `Text Statistics +================== +Characters: 556 +Words: 87 +Lines: 1 +Sentences: 1 +Paragraphs: 1 + +Words Frequency +================== +2: 1 (1.15%) +900: 2 (2.30%) +the: 5 (5.75%) +of: 5 (5.75%) +reef: 4 (4.60%) +is: 4 (4.60%) +and: 4 (4.60%) +it: 2 (2.30%) +to: 2 (2.30%) +including: 2 (2.30%) +great: 1 (1.15%) +barrier: 1 (1.15%) +world's: 1 (1.15%) +largest: 1 (1.15%) +coral: 1 (1.15%) +system: 1 (1.15%) +located: 1 (1.15%) +off: 1 (1.15%) +coast: 1 (1.15%) +australia: 1 (1.15%) +consists: 1 (1.15%) +over: 1 (1.15%) +individual: 1 (1.15%) +reefs: 1 (1.15%) +islands: 1 (1.15%) +home: 1 (1.15%) +thousands: 1 (1.15%) +species: 1 (1.15%) +marine: 1 (1.15%) +life: 1 (1.15%) +fish: 1 (1.15%) +sea: 1 (1.15%) +turtles: 1 (1.15%) +sharks: 1 (1.15%) +dolphins: 1 (1.15%) +also: 1 (1.15%) +a: 1 (1.15%) +popular: 1 (1.15%) +tourist: 1 (1.15%) +destination: 1 (1.15%) +attracting: 1 (1.15%) +millions: 1 (1.15%) +visitors: 1 (1.15%) +every: 1 (1.15%) +year: 1 (1.15%) +however: 1 (1.15%) +facing: 1 (1.15%) +many: 1 (1.15%) +threats: 1 (1.15%) +climate: 1 (1.15%) +change: 1 (1.15%) +pollution: 1 (1.15%) +overfishing: 1 (1.15%) +conservation: 1 (1.15%) +efforts: 1 (1.15%) +are: 1 (1.15%) +being: 1 (1.15%) +made: 1 (1.15%) +protect: 1 (1.15%) +this: 1 (1.15%) +unique: 1 (1.15%) +valuable: 1 (1.15%) +ecosystem: 1 (1.15%) +for: 1 (1.15%) +future: 1 (1.15%) +generations: 1 (1.15%) + +Characters Frequency +================== +0: 4 (0.72%) +2: 1 (0.18%) +9: 2 (0.36%) +␣: 85 (15.29%) +e: 51 (9.17%) +i: 40 (7.19%) +s: 40 (7.19%) +t: 39 (7.01%) +a: 37 (6.65%) +o: 34 (6.12%) +r: 33 (5.94%) +n: 29 (5.22%) +l: 21 (3.78%) +f: 20 (3.60%) +h: 15 (2.70%) +d: 15 (2.70%) +c: 14 (2.52%) +u: 14 (2.52%) +,: 11 (1.98%) +g: 10 (1.80%) +m: 8 (1.44%) +v: 8 (1.44%) +.: 6 (1.08%) +p: 6 (1.08%) +y: 5 (0.90%) +b: 3 (0.54%) +w: 2 (0.36%) +': 1 (0.18%) +k: 1 (0.18%) +q: 1 (0.18%)`, + sampleOptions: { + emptyLines: false, + sentenceDelimiters: '', + wordDelimiters: '', + characterCount: true, + wordCount: true + } + } +]; + +export default function Truncate({ + title, + longDescription +}: ToolComponentProps) { + const [input, setInput] = useState(''); + const [result, setResult] = useState(''); + + function compute(initialValues: InitialValuesType, input: string) { + setResult(textStatistics(input, initialValues)); + } + + const getGroups: GetGroupsType = ({ + values, + updateField + }) => [ + { + title: 'Delimiters Options', + component: ( + + updateField('sentenceDelimiters', val)} + placeholder="e.g. ., !, ?, ..." + description={ + 'Enter custom characters used to delimit sentences in your language (separated by comma) or leave it blank for default.' + } + /> + updateField('wordDelimiters', val)} + placeholder="eg. \\s.,;:!?\”«»()…" + description={ + 'Enter custom Regex to count Words or leave it blank for default.' + } + /> + + ) + }, + { + title: 'Statistics Options', + component: ( + + updateField('wordCount', value)} + title="Words Frequency" + description="Add the word frequency statistics" + /> + updateField('characterCount', value)} + title="Characters Frequency" + description="Add the character frequency statistics" + /> + updateField('emptyLines', value)} + title="Empty Lines" + description="Include empty lines in the count" + /> + + ) + } + ]; + + return ( + + } + resultComponent={ + + } + toolInfo={{ title: `What is a ${title}?`, description: longDescription }} + exampleCards={exampleCards} + /> + ); +} From d2ffdcf2f69c3fb7887f6d174a23a304a771b745 Mon Sep 17 00:00:00 2001 From: Chesterkxng Date: Tue, 27 May 2025 17:05:04 +0200 Subject: [PATCH 07/10] feat: text-statistic (meta) --- src/pages/tools/string/statistic/meta.ts | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 src/pages/tools/string/statistic/meta.ts diff --git a/src/pages/tools/string/statistic/meta.ts b/src/pages/tools/string/statistic/meta.ts new file mode 100644 index 0000000..18bc663 --- /dev/null +++ b/src/pages/tools/string/statistic/meta.ts @@ -0,0 +1,15 @@ +import { defineTool } from '@tools/defineTool'; +import { lazy } from 'react'; + +export const tool = defineTool('string', { + name: 'Text Statistics', + path: 'statistics', + shortDescription: 'Get statistics about your text', + icon: 'fluent:document-landscape-data-24-filled', + description: + 'Load your text in the input form on the left and you will automatically get statistics about your text on the right.', + longDescription: + 'This tool provides various statistics about the text you input, including the number of lines, words, and characters. You can also choose to include empty lines in the count. it can count words and characters based on custom delimiters, allowing for flexible text analysis. Additionally, it can provide frequency statistics for words and characters, helping you understand the distribution of terms in your text.', + keywords: ['text', 'statistics', 'count', 'lines', 'words', 'characters'], + component: lazy(() => import('./index')) +}); From d7d5f81d5794bc22d19dc32cb870f845258ed8c4 Mon Sep 17 00:00:00 2001 From: Chesterkxng Date: Tue, 27 May 2025 17:05:46 +0200 Subject: [PATCH 08/10] feat: text-statistic (ltool added to string tools) --- src/pages/tools/string/index.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/pages/tools/string/index.ts b/src/pages/tools/string/index.ts index b2aa502..7f5ba3a 100644 --- a/src/pages/tools/string/index.ts +++ b/src/pages/tools/string/index.ts @@ -14,6 +14,7 @@ import { tool as stringJoin } from './join/meta'; import { tool as stringReplace } from './text-replacer/meta'; import { tool as stringRepeat } from './repeat/meta'; import { tool as stringTruncate } from './truncate/meta'; +import { tool as stringStatistic } from './statistic/meta'; export const stringTools = [ stringSplit, @@ -31,5 +32,6 @@ export const stringTools = [ stringPalindrome, stringQuote, stringRotate, - stringRot13 + stringRot13, + stringStatistic ]; From b5a67499d5d25e9140a32d689091e925fbbc87a7 Mon Sep 17 00:00:00 2001 From: Chesterkxng Date: Tue, 27 May 2025 17:12:05 +0200 Subject: [PATCH 09/10] feat: text-statistic (readability improvements) --- src/pages/tools/string/statistic/index.tsx | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/pages/tools/string/statistic/index.tsx b/src/pages/tools/string/statistic/index.tsx index 352aa58..4ca3059 100644 --- a/src/pages/tools/string/statistic/index.tsx +++ b/src/pages/tools/string/statistic/index.tsx @@ -257,20 +257,20 @@ export default function Truncate({ updateField('wordCount', value)} - title="Words Frequency" - description="Add the word frequency statistics" + title="Word Frequency Analysis" + description="Count how often each word appears in the text" /> updateField('characterCount', value)} - title="Characters Frequency" - description="Add the character frequency statistics" + title="Character Frequency Analysis" + description="Count how often each character appears in the text" /> updateField('emptyLines', value)} - title="Empty Lines" - description="Include empty lines in the count" + title="Include Empty Lines" + description="Include blank lines when counting lines" /> ) From 80df2eb68f2ac2492778b1c8280663c7f492a015 Mon Sep 17 00:00:00 2001 From: Chesterkxng Date: Thu, 5 Jun 2025 23:36:52 +0200 Subject: [PATCH 10/10] refactor: review related changes --- .../tools/list/find-most-popular/service.ts | 30 ++----------- src/utils/string.ts | 42 +++++++++++++++++++ 2 files changed, 45 insertions(+), 27 deletions(-) diff --git a/src/pages/tools/list/find-most-popular/service.ts b/src/pages/tools/list/find-most-popular/service.ts index 1f53c0e..5fc9d5e 100644 --- a/src/pages/tools/list/find-most-popular/service.ts +++ b/src/pages/tools/list/find-most-popular/service.ts @@ -1,33 +1,9 @@ +import { itemCounter } from '@utils/string'; + export type SplitOperatorType = 'symbol' | 'regex'; export type DisplayFormat = 'count' | 'percentage' | 'total'; export type SortingMethod = 'count' | 'alphabetic'; -// Function that takes the array as arg and returns a dict of element occurrences and handle the ignoreItemCase -function dictMaker( - array: string[], - ignoreItemCase: boolean -): { [key: string]: number } { - const dict: { [key: string]: number } = {}; - for (const item of array) { - let key = ignoreItemCase ? item.toLowerCase() : item; - - const specialCharMap: { [key: string]: string } = { - ' ': '␣', - '\n': '↲', - '\t': '⇥', - '\r': '␍', - '\f': '␌', - '\v': '␋' - }; - if (key in specialCharMap) { - key = specialCharMap[key]; - } - - dict[key] = (dict[key] || 0) + 1; - } - return dict; -} - // Function that sorts the dict created with dictMaker based on the chosen sorting method function dictSorter( dict: { [key: string]: number }, @@ -121,7 +97,7 @@ export function TopItemsList( } // Transform the array into dict - const unsortedDict = dictMaker(array, ignoreItemCase); + const unsortedDict = itemCounter(array, ignoreItemCase); // Sort the list if required const sortedDict = dictSorter(unsortedDict, sortingMethod); diff --git a/src/utils/string.ts b/src/utils/string.ts index a0c2894..17088f9 100644 --- a/src/utils/string.ts +++ b/src/utils/string.ts @@ -1,5 +1,24 @@ import { UpdateField } from '@components/options/ToolOptions'; +// Here starting the shared values for string manipulation. + +/** + * This map is used to replace special characters with their visual representations. + * It is useful for displaying strings in a more readable format, especially in tools + **/ + +export const specialCharMap: { [key: string]: string } = { + '': '␀', + ' ': '␣', + '\n': '↲', + '\t': '⇥', + '\r': '␍', + '\f': '␌', + '\v': '␋' +}; + +// Here starting the utility functions for string manipulation. + export function capitalizeFirstLetter(string: string | undefined) { if (!string) return ''; return string.charAt(0).toUpperCase() + string.slice(1); @@ -63,3 +82,26 @@ export function unquoteIfQuoted(value: string, quoteCharacter: string): string { } return value; } + +/** + * Count the occurence of items. + * @param array - array get from user with a custom delimiter. + * @param ignoreItemCase - boolean status to ignore the case i . + * @returns Dict of Items count {[Item]: occcurence}. + */ +export function itemCounter( + array: string[], + ignoreItemCase: boolean +): { [key: string]: number } { + const dict: { [key: string]: number } = {}; + for (const item of array) { + let key = ignoreItemCase ? item.toLowerCase() : item; + + if (key in specialCharMap) { + key = specialCharMap[key]; + } + + dict[key] = (dict[key] || 0) + 1; + } + return dict; +}