From bc5746d359dc8c54531301bdda18abc17c1151c9 Mon Sep 17 00:00:00 2001 From: AshAnand34 Date: Fri, 18 Jul 2025 17:12:08 -0700 Subject: [PATCH] feat (string): add Hidden Character Detector tool to identify and analyze hidden Unicode characters --- public/locales/en/string.json | 26 ++- .../hidden-character-detector.service.test.ts | 188 ++++++++++++++++++ .../hidden-character-detector/index.tsx | 148 ++++++++++++++ .../string/hidden-character-detector/meta.ts | 24 +++ .../hidden-character-detector/service.ts | 163 +++++++++++++++ .../string/hidden-character-detector/types.ts | 26 +++ src/pages/tools/string/index.ts | 1 + 7 files changed, 575 insertions(+), 1 deletion(-) create mode 100644 src/pages/tools/string/hidden-character-detector/hidden-character-detector.service.test.ts create mode 100644 src/pages/tools/string/hidden-character-detector/index.tsx create mode 100644 src/pages/tools/string/hidden-character-detector/meta.ts create mode 100644 src/pages/tools/string/hidden-character-detector/service.ts create mode 100644 src/pages/tools/string/hidden-character-detector/types.ts diff --git a/public/locales/en/string.json b/public/locales/en/string.json index 2994890..61d0042 100644 --- a/public/locales/en/string.json +++ b/public/locales/en/string.json @@ -280,8 +280,32 @@ "longDescription": "This tool URL-decodes a previously URL-encoded string. URL-decoding is the inverse operation of URL-encoding. All percent-encoded characters get decoded to characters that you can understand. Some of the most well known percent-encoded values are %20 for a space, %3a for a colon, %2f for a slash, and %3f for a question mark. The two digits following the percent sign are character's char code values in hex.", "title": "String URL decoder" }, - "inputTitle": "Input String(URL-escaped)", "resultTitle": "Output string" + }, + "hiddenCharacterDetector": { + "title": "Hidden Character Detector", + "description": "Detect hidden Unicode characters, especially RTL Override characters that could be used in attacks.", + "shortDescription": "Find hidden Unicode characters in text", + "longDescription": "This tool helps you detect hidden Unicode characters in text, particularly Right-to-Left (RTL) Override characters that can be used in attacks. It can identify invisible characters, zero-width characters, and other potentially malicious Unicode sequences that might be hidden in seemingly innocent text.", + "inputTitle": "Text to Analyze", + "inputPlaceholder": "Enter text to check for hidden characters...", + "analysisOptions": "Analysis Options", + "optionsDescription": "Configure which types of hidden characters to detect and how to display the results.", + "noHiddenChars": "No hidden characters detected in the text.", + "foundChars": "Found {{count}} hidden character(s):", + "position": "Position", + "unicode": "Unicode", + "category": "Category", + "rtlOverride": "RTL Override Character", + "invisibleChar": "Invisible Character", + "zeroWidthChar": "Zero Width Character", + "rtlWarning": "WARNING: RTL Override characters detected! This could be used in attacks.", + "rtlAlert": "⚠️ RTL Override characters detected! This text may contain malicious hidden characters.", + "summary": "Analysis Summary", + "totalChars": "Total hidden characters: {{count}}", + "rtlFound": "RTL Override found", + "invisibleFound": "Invisible characters found", + "zeroWidthFound": "Zero-width characters found" } } diff --git a/src/pages/tools/string/hidden-character-detector/hidden-character-detector.service.test.ts b/src/pages/tools/string/hidden-character-detector/hidden-character-detector.service.test.ts new file mode 100644 index 0000000..35c9961 --- /dev/null +++ b/src/pages/tools/string/hidden-character-detector/hidden-character-detector.service.test.ts @@ -0,0 +1,188 @@ +import { expect, describe, it } from 'vitest'; +import { analyzeHiddenCharacters, main } from './service'; +import { InitialValuesType } from './types'; + +describe('Hidden Character Detector', () => { + const defaultOptions: InitialValuesType = { + showUnicodeCodes: true, + highlightRTL: true, + showInvisibleChars: true, + includeZeroWidthChars: true + }; + + describe('analyzeHiddenCharacters', () => { + it('should detect RTL Override characters', () => { + const text = 'Hello\u202EWorld'; // RTL Override + const result = analyzeHiddenCharacters(text, defaultOptions); + + expect(result.totalHiddenChars).toBe(1); + expect(result.hasRTLOverride).toBe(true); + expect(result.hiddenCharacters[0].isRTL).toBe(true); + expect(result.hiddenCharacters[0].unicode).toBe('U+202E'); + expect(result.hiddenCharacters[0].name).toBe('Right-to-Left Override'); + }); + + it('should detect invisible characters', () => { + const text = 'Hello\u200BWorld'; // Zero Width Space + const result = analyzeHiddenCharacters(text, defaultOptions); + + expect(result.totalHiddenChars).toBe(1); + expect(result.hasInvisibleChars).toBe(true); + expect(result.hiddenCharacters[0].isInvisible).toBe(true); + expect(result.hiddenCharacters[0].unicode).toBe('U+200B'); + expect(result.hiddenCharacters[0].name).toBe('Zero Width Space'); + }); + + it('should detect zero-width characters', () => { + const text = 'Hello\u200CWorld'; // Zero Width Non-Joiner + const result = analyzeHiddenCharacters(text, defaultOptions); + + expect(result.totalHiddenChars).toBe(1); + expect(result.hasZeroWidthChars).toBe(true); + expect(result.hiddenCharacters[0].isZeroWidth).toBe(true); + expect(result.hiddenCharacters[0].unicode).toBe('U+200C'); + }); + + it('should detect multiple hidden characters', () => { + const text = 'Hello\u202E\u200BWorld'; // RTL Override + Zero Width Space + const result = analyzeHiddenCharacters(text, defaultOptions); + + expect(result.totalHiddenChars).toBe(2); + expect(result.hasRTLOverride).toBe(true); + expect(result.hasInvisibleChars).toBe(true); + expect(result.hasZeroWidthChars).toBe(true); + }); + + it('should detect control characters', () => { + const text = 'Hello\u0000World'; // Null character + const result = analyzeHiddenCharacters(text, defaultOptions); + + expect(result.totalHiddenChars).toBe(1); + expect(result.hiddenCharacters[0].category).toBe('Control Character'); + expect(result.hiddenCharacters[0].isInvisible).toBe(true); + }); + + it('should not detect regular characters', () => { + const text = 'Hello World'; + const result = analyzeHiddenCharacters(text, defaultOptions); + + expect(result.totalHiddenChars).toBe(0); + expect(result.hasRTLOverride).toBe(false); + expect(result.hasInvisibleChars).toBe(false); + expect(result.hasZeroWidthChars).toBe(false); + }); + + it('should filter based on options', () => { + const text = 'Hello\u202E\u200BWorld'; + const options: InitialValuesType = { + ...defaultOptions, + highlightRTL: false, + showInvisibleChars: true + }; + + const result = analyzeHiddenCharacters(text, options); + + expect(result.totalHiddenChars).toBe(1); // Only invisible chars + expect(result.hasRTLOverride).toBe(false); + expect(result.hasInvisibleChars).toBe(true); + }); + + it('should provide correct character positions', () => { + const text = 'Hello\u202EWorld'; + const result = analyzeHiddenCharacters(text, defaultOptions); + + expect(result.hiddenCharacters[0].position).toBe(5); + expect(result.hiddenCharacters[0].char).toBe('\u202E'); + }); + }); + + describe('main function', () => { + it('should return message when no hidden characters found', () => { + const text = 'Hello World'; + const result = main(text, defaultOptions); + + expect(result).toBe('No hidden characters detected in the text.'); + }); + + it('should return detailed analysis when hidden characters found', () => { + const text = 'Hello\u202EWorld'; + const result = main(text, defaultOptions); + + expect(result).toContain('Found 1 hidden character(s):'); + expect(result).toContain('Position 5: Right-to-Left Override (U+202E)'); + expect(result).toContain('Category: RTL Override'); + expect(result).toContain('⚠️ RTL Override Character'); + expect(result).toContain('WARNING: RTL Override characters detected!'); + }); + + it('should include Unicode codes when showUnicodeCodes is true', () => { + const text = 'Hello\u200BWorld'; + const options: InitialValuesType = { + ...defaultOptions, + showUnicodeCodes: true + }; + + const result = main(text, options); + + expect(result).toContain('Unicode: U+200B'); + }); + + it('should not include Unicode codes when showUnicodeCodes is false', () => { + const text = 'Hello\u200BWorld'; + const options: InitialValuesType = { + ...defaultOptions, + showUnicodeCodes: false + }; + + const result = main(text, options); + + expect(result).not.toContain('Unicode: U+200B'); + }); + + it('should handle multiple RTL characters', () => { + const text = 'Hello\u202E\u202DWorld'; + const result = main(text, defaultOptions); + + expect(result).toContain('Found 2 hidden character(s):'); + expect(result).toContain('Right-to-Left Override'); + expect(result).toContain('Left-to-Right Override'); + }); + + it('should handle mixed character types', () => { + const text = 'Hello\u202E\u200B\u200CWorld'; + const result = main(text, defaultOptions); + + expect(result).toContain('Found 3 hidden character(s):'); + expect(result).toContain('RTL Override Character'); + expect(result).toContain('Invisible Character'); + expect(result).toContain('Zero Width Character'); + }); + }); + + describe('edge cases', () => { + it('should handle empty string', () => { + const result = analyzeHiddenCharacters('', defaultOptions); + + expect(result.totalHiddenChars).toBe(0); + expect(result.originalText).toBe(''); + }); + + it('should handle string with only hidden characters', () => { + const text = '\u202E\u200B\u200C'; + const result = analyzeHiddenCharacters(text, defaultOptions); + + expect(result.totalHiddenChars).toBe(3); + expect(result.hasRTLOverride).toBe(true); + expect(result.hasInvisibleChars).toBe(true); + expect(result.hasZeroWidthChars).toBe(true); + }); + + it('should handle very long strings', () => { + const text = 'A'.repeat(1000) + '\u202E' + 'B'.repeat(1000); + const result = analyzeHiddenCharacters(text, defaultOptions); + + expect(result.totalHiddenChars).toBe(1); + expect(result.hiddenCharacters[0].position).toBe(1000); + }); + }); +}); diff --git a/src/pages/tools/string/hidden-character-detector/index.tsx b/src/pages/tools/string/hidden-character-detector/index.tsx new file mode 100644 index 0000000..5db12b3 --- /dev/null +++ b/src/pages/tools/string/hidden-character-detector/index.tsx @@ -0,0 +1,148 @@ +import React, { useState } from 'react'; +import { Box, Typography, Alert, Paper } from '@mui/material'; +import { useTranslation } from 'react-i18next'; +import ToolContent from '@components/ToolContent'; +import { ToolComponentProps } from '@tools/defineTool'; +import { InitialValuesType } from './types'; +import { analyzeHiddenCharacters } from './service'; +import ToolTextInput from '@components/input/ToolTextInput'; +import ToolTextResult from '@components/result/ToolTextResult'; +import { GetGroupsType } from '@components/options/ToolOptions'; + +const initialValues: InitialValuesType = { + showUnicodeCodes: true, + highlightRTL: true, + showInvisibleChars: true, + includeZeroWidthChars: true +}; + +export default function HiddenCharacterDetector({ + title, + longDescription +}: ToolComponentProps) { + const { t } = useTranslation('string'); + const [input, setInput] = useState(''); + const [result, setResult] = useState(''); + const [analysis, setAnalysis] = useState(null); + + const compute = (values: InitialValuesType, input: string) => { + if (!input.trim()) return; + + try { + const analysisResult = analyzeHiddenCharacters(input, values); + setAnalysis(analysisResult); + + if (analysisResult.totalHiddenChars === 0) { + setResult(t('string:hiddenCharacterDetector.noHiddenChars')); + } else { + let output = t('string:hiddenCharacterDetector.foundChars', { + count: analysisResult.totalHiddenChars + }); + + analysisResult.hiddenCharacters.forEach((char: any) => { + output += `${t('string:hiddenCharacterDetector.position')} ${ + char.position + }: ${char.name} (${char.unicode})\n`; + if (values.showUnicodeCodes) { + output += ` ${t('string:hiddenCharacterDetector.unicode')}: ${ + char.unicode + }\n`; + } + output += ` ${t('string:hiddenCharacterDetector.category')}: ${ + char.category + }\n`; + if (char.isRTL) + output += ` ⚠️ ${t( + 'string:hiddenCharacterDetector.rtlOverride' + )}\n`; + if (char.isInvisible) + output += ` 👁️ ${t( + 'string:hiddenCharacterDetector.invisibleChar' + )}\n`; + if (char.isZeroWidth) + output += ` 📏 ${t( + 'string:hiddenCharacterDetector.zeroWidthChar' + )}\n`; + output += '\n'; + }); + + if (analysisResult.hasRTLOverride) { + output += `⚠️ ${t('string:hiddenCharacterDetector.rtlWarning')}\n`; + } + + setResult(output); + } + } catch (error) { + setResult(`Error: ${error}`); + } + }; + + const getGroups: GetGroupsType = ({ + values, + updateField + }) => [ + { + title: t('string:hiddenCharacterDetector.analysisOptions'), + component: ( + + + {t('string:hiddenCharacterDetector.optionsDescription')} + + + ) + } + ]; + + return ( + + + + {analysis && analysis.hasRTLOverride && ( + + {t('string:hiddenCharacterDetector.rtlAlert')} + + )} + + {analysis && analysis.totalHiddenChars > 0 && ( + + + {t('string:hiddenCharacterDetector.summary')} + + + {t('string:hiddenCharacterDetector.totalChars', { + count: analysis.totalHiddenChars + })} + {analysis.hasRTLOverride && + ` • ${t('string:hiddenCharacterDetector.rtlFound')}`} + {analysis.hasInvisibleChars && + ` • ${t('string:hiddenCharacterDetector.invisibleFound')}`} + {analysis.hasZeroWidthChars && + ` • ${t('string:hiddenCharacterDetector.zeroWidthFound')}`} + + + )} + + } + resultComponent={} + initialValues={initialValues} + getGroups={getGroups} + compute={compute} + input={input} + setInput={setInput} + toolInfo={{ + title: `What is ${title}?`, + description: + longDescription || + 'A tool to detect hidden Unicode characters, especially RTL Override characters that could be used in attacks.' + }} + /> + ); +} diff --git a/src/pages/tools/string/hidden-character-detector/meta.ts b/src/pages/tools/string/hidden-character-detector/meta.ts new file mode 100644 index 0000000..6a56609 --- /dev/null +++ b/src/pages/tools/string/hidden-character-detector/meta.ts @@ -0,0 +1,24 @@ +import { defineTool } from '@tools/defineTool'; +import { lazy } from 'react'; + +export const tool = defineTool('string', { + i18n: { + name: 'string:hiddenCharacterDetector.title', + description: 'string:hiddenCharacterDetector.description', + shortDescription: 'string:hiddenCharacterDetector.shortDescription', + longDescription: 'string:hiddenCharacterDetector.longDescription' + }, + path: 'hidden-character-detector', + icon: 'material-symbols:visibility-off', + keywords: [ + 'hidden', + 'character', + 'detector', + 'unicode', + 'rtl', + 'override', + 'security', + 'invisible' + ], + component: lazy(() => import('./index')) +}); diff --git a/src/pages/tools/string/hidden-character-detector/service.ts b/src/pages/tools/string/hidden-character-detector/service.ts new file mode 100644 index 0000000..f016eb0 --- /dev/null +++ b/src/pages/tools/string/hidden-character-detector/service.ts @@ -0,0 +1,163 @@ +import { InitialValuesType, HiddenCharacter, AnalysisResult } from './types'; + +// RTL Override characters +const RTL_CHARS = [ + { char: '\u202E', name: 'Right-to-Left Override', unicode: 'U+202E' }, + { char: '\u202D', name: 'Left-to-Right Override', unicode: 'U+202D' }, + { char: '\u202B', name: 'Right-to-Left Embedding', unicode: 'U+202B' }, + { char: '\u202A', name: 'Left-to-Right Embedding', unicode: 'U+202A' }, + { char: '\u200F', name: 'Right-to-Left Mark', unicode: 'U+200F' }, + { char: '\u200E', name: 'Left-to-Right Mark', unicode: 'U+200E' } +]; + +// Invisible characters +const INVISIBLE_CHARS = [ + { char: '\u200B', name: 'Zero Width Space', unicode: 'U+200B' }, + { char: '\u200C', name: 'Zero Width Non-Joiner', unicode: 'U+200C' }, + { char: '\u200D', name: 'Zero Width Joiner', unicode: 'U+200D' }, + { char: '\u2060', name: 'Word Joiner', unicode: 'U+2060' }, + { char: '\uFEFF', name: 'Zero Width No-Break Space', unicode: 'U+FEFF' }, + { char: '\u00A0', name: 'Non-Breaking Space', unicode: 'U+00A0' }, + { char: '\u2000', name: 'En Quad', unicode: 'U+2000' }, + { char: '\u2001', name: 'Em Quad', unicode: 'U+2001' }, + { char: '\u2002', name: 'En Space', unicode: 'U+2002' }, + { char: '\u2003', name: 'Em Space', unicode: 'U+2003' }, + { char: '\u2004', name: 'Three-Per-Em Space', unicode: 'U+2004' }, + { char: '\u2005', name: 'Four-Per-Em Space', unicode: 'U+2005' }, + { char: '\u2006', name: 'Six-Per-Em Space', unicode: 'U+2006' }, + { char: '\u2007', name: 'Figure Space', unicode: 'U+2007' }, + { char: '\u2008', name: 'Punctuation Space', unicode: 'U+2008' }, + { char: '\u2009', name: 'Thin Space', unicode: 'U+2009' }, + { char: '\u200A', name: 'Hair Space', unicode: 'U+200A' } +]; + +function getCharacterInfo(char: string, position: number): HiddenCharacter { + const unicode = `U+${char + .charCodeAt(0) + .toString(16) + .toUpperCase() + .padStart(4, '0')}`; + + // Check if it's an RTL character + const rtlChar = RTL_CHARS.find((c) => c.char === char); + if (rtlChar) { + return { + char, + unicode: rtlChar.unicode, + name: rtlChar.name, + category: 'RTL Override', + position, + isRTL: true, + isInvisible: false, + isZeroWidth: false + }; + } + + // Check if it's an invisible character + const invisibleChar = INVISIBLE_CHARS.find((c) => c.char === char); + if (invisibleChar) { + return { + char, + unicode: invisibleChar.unicode, + name: invisibleChar.name, + category: 'Invisible Character', + position, + isRTL: false, + isInvisible: true, + isZeroWidth: + char === '\u200B' || + char === '\u200C' || + char === '\u200D' || + char === '\u2060' || + char === '\uFEFF' + }; + } + + // Check for other control characters + if (char.charCodeAt(0) < 32 || char.charCodeAt(0) === 127) { + return { + char, + unicode, + name: `Control Character (${char.charCodeAt(0)})`, + category: 'Control Character', + position, + isRTL: false, + isInvisible: true, + isZeroWidth: false + }; + } + + return { + char, + unicode, + name: 'Regular Character', + category: 'Regular', + position, + isRTL: false, + isInvisible: false, + isZeroWidth: false + }; +} + +export function analyzeHiddenCharacters( + text: string, + options: InitialValuesType +): AnalysisResult { + const hiddenCharacters: HiddenCharacter[] = []; + + for (let i = 0; i < text.length; i++) { + const char = text[i]; + const charInfo = getCharacterInfo(char, i); + + // Filter based on options + if (options.highlightRTL && charInfo.isRTL) { + hiddenCharacters.push(charInfo); + } else if (options.showInvisibleChars && charInfo.isInvisible) { + hiddenCharacters.push(charInfo); + } else if (options.includeZeroWidthChars && charInfo.isZeroWidth) { + hiddenCharacters.push(charInfo); + } + } + + const hasRTLOverride = hiddenCharacters.some((c) => c.isRTL); + const hasInvisibleChars = hiddenCharacters.some((c) => c.isInvisible); + const hasZeroWidthChars = hiddenCharacters.some((c) => c.isZeroWidth); + + return { + originalText: text, + hiddenCharacters, + hasRTLOverride, + hasInvisibleChars, + hasZeroWidthChars, + totalHiddenChars: hiddenCharacters.length + }; +} + +export function main(input: string, options: InitialValuesType): string { + const result = analyzeHiddenCharacters(input, options); + + if (result.totalHiddenChars === 0) { + return 'No hidden characters detected in the text.'; + } + + let output = `Found ${result.totalHiddenChars} hidden character(s):\n\n`; + + result.hiddenCharacters.forEach((char) => { + output += `Position ${char.position}: ${char.name} (${char.unicode})\n`; + if (options.showUnicodeCodes) { + output += ` Unicode: ${char.unicode}\n`; + } + output += ` Category: ${char.category}\n`; + if (char.isRTL) output += ` ⚠️ RTL Override Character\n`; + if (char.isInvisible) output += ` 👁️ Invisible Character\n`; + if (char.isZeroWidth) output += ` 📏 Zero Width Character\n`; + output += '\n'; + }); + + if (result.hasRTLOverride) { + output += + '⚠️ WARNING: RTL Override characters detected! This could be used in attacks.\n'; + } + + return output; +} diff --git a/src/pages/tools/string/hidden-character-detector/types.ts b/src/pages/tools/string/hidden-character-detector/types.ts new file mode 100644 index 0000000..71e7e00 --- /dev/null +++ b/src/pages/tools/string/hidden-character-detector/types.ts @@ -0,0 +1,26 @@ +export type InitialValuesType = { + showUnicodeCodes: boolean; + highlightRTL: boolean; + showInvisibleChars: boolean; + includeZeroWidthChars: boolean; +}; + +export interface HiddenCharacter { + char: string; + unicode: string; + name: string; + category: string; + position: number; + isRTL: boolean; + isInvisible: boolean; + isZeroWidth: boolean; +} + +export interface AnalysisResult { + originalText: string; + hiddenCharacters: HiddenCharacter[]; + hasRTLOverride: boolean; + hasInvisibleChars: boolean; + hasZeroWidthChars: boolean; + totalHiddenChars: number; +} diff --git a/src/pages/tools/string/index.ts b/src/pages/tools/string/index.ts index bd645f7..3bea002 100644 --- a/src/pages/tools/string/index.ts +++ b/src/pages/tools/string/index.ts @@ -1,3 +1,4 @@ +import { tool as stringHiddenCharacterDetector } from './hidden-character-detector/meta'; import { tool as stringRemoveDuplicateLines } from './remove-duplicate-lines/meta'; import { tool as stringRotate } from './rotate/meta'; import { tool as stringQuote } from './quote/meta';