Files
omni-tools/src/pages/tools/string/hidden-character-detector/service.ts

164 lines
5.1 KiB
TypeScript

import { InitialValuesType, HiddenCharacter, AnalysisResult } from './types';
// RTL Override characters
const RTL_CHARS = [
{ char: '\u202E', name: 'Right-to-Left Override', unicode: 'U+202E' },
{ char: '\u202D', name: 'Left-to-Right Override', unicode: 'U+202D' },
{ char: '\u202B', name: 'Right-to-Left Embedding', unicode: 'U+202B' },
{ char: '\u202A', name: 'Left-to-Right Embedding', unicode: 'U+202A' },
{ char: '\u200F', name: 'Right-to-Left Mark', unicode: 'U+200F' },
{ char: '\u200E', name: 'Left-to-Right Mark', unicode: 'U+200E' }
];
// Invisible characters
const INVISIBLE_CHARS = [
{ char: '\u200B', name: 'Zero Width Space', unicode: 'U+200B' },
{ char: '\u200C', name: 'Zero Width Non-Joiner', unicode: 'U+200C' },
{ char: '\u200D', name: 'Zero Width Joiner', unicode: 'U+200D' },
{ char: '\u2060', name: 'Word Joiner', unicode: 'U+2060' },
{ char: '\uFEFF', name: 'Zero Width No-Break Space', unicode: 'U+FEFF' },
{ char: '\u00A0', name: 'Non-Breaking Space', unicode: 'U+00A0' },
{ char: '\u2000', name: 'En Quad', unicode: 'U+2000' },
{ char: '\u2001', name: 'Em Quad', unicode: 'U+2001' },
{ char: '\u2002', name: 'En Space', unicode: 'U+2002' },
{ char: '\u2003', name: 'Em Space', unicode: 'U+2003' },
{ char: '\u2004', name: 'Three-Per-Em Space', unicode: 'U+2004' },
{ char: '\u2005', name: 'Four-Per-Em Space', unicode: 'U+2005' },
{ char: '\u2006', name: 'Six-Per-Em Space', unicode: 'U+2006' },
{ char: '\u2007', name: 'Figure Space', unicode: 'U+2007' },
{ char: '\u2008', name: 'Punctuation Space', unicode: 'U+2008' },
{ char: '\u2009', name: 'Thin Space', unicode: 'U+2009' },
{ char: '\u200A', name: 'Hair Space', unicode: 'U+200A' }
];
function getCharacterInfo(char: string, position: number): HiddenCharacter {
const unicode = `U+${char
.charCodeAt(0)
.toString(16)
.toUpperCase()
.padStart(4, '0')}`;
// Check if it's an RTL character
const rtlChar = RTL_CHARS.find((c) => c.char === char);
if (rtlChar) {
return {
char,
unicode: rtlChar.unicode,
name: rtlChar.name,
category: 'RTL Override',
position,
isRTL: true,
isInvisible: false,
isZeroWidth: false
};
}
// Check if it's an invisible character
const invisibleChar = INVISIBLE_CHARS.find((c) => c.char === char);
if (invisibleChar) {
return {
char,
unicode: invisibleChar.unicode,
name: invisibleChar.name,
category: 'Invisible Character',
position,
isRTL: false,
isInvisible: true,
isZeroWidth:
char === '\u200B' ||
char === '\u200C' ||
char === '\u200D' ||
char === '\u2060' ||
char === '\uFEFF'
};
}
// Check for other control characters
if (char.charCodeAt(0) < 32 || char.charCodeAt(0) === 127) {
return {
char,
unicode,
name: `Control Character (${char.charCodeAt(0)})`,
category: 'Control Character',
position,
isRTL: false,
isInvisible: true,
isZeroWidth: false
};
}
return {
char,
unicode,
name: 'Regular Character',
category: 'Regular',
position,
isRTL: false,
isInvisible: false,
isZeroWidth: false
};
}
export function analyzeHiddenCharacters(
text: string,
options: InitialValuesType
): AnalysisResult {
const hiddenCharacters: HiddenCharacter[] = [];
for (let i = 0; i < text.length; i++) {
const char = text[i];
const charInfo = getCharacterInfo(char, i);
// Filter based on options
if (options.highlightRTL && charInfo.isRTL) {
hiddenCharacters.push(charInfo);
} else if (options.showInvisibleChars && charInfo.isInvisible) {
hiddenCharacters.push(charInfo);
} else if (options.includeZeroWidthChars && charInfo.isZeroWidth) {
hiddenCharacters.push(charInfo);
}
}
const hasRTLOverride = hiddenCharacters.some((c) => c.isRTL);
const hasInvisibleChars = hiddenCharacters.some((c) => c.isInvisible);
const hasZeroWidthChars = hiddenCharacters.some((c) => c.isZeroWidth);
return {
originalText: text,
hiddenCharacters,
hasRTLOverride,
hasInvisibleChars,
hasZeroWidthChars,
totalHiddenChars: hiddenCharacters.length
};
}
export function main(input: string, options: InitialValuesType): string {
const result = analyzeHiddenCharacters(input, options);
if (result.totalHiddenChars === 0) {
return 'No hidden characters detected in the text.';
}
let output = `Found ${result.totalHiddenChars} hidden character(s):\n\n`;
result.hiddenCharacters.forEach((char) => {
output += `Position ${char.position}: ${char.name} (${char.unicode})\n`;
if (options.showUnicodeCodes) {
output += ` Unicode: ${char.unicode}\n`;
}
output += ` Category: ${char.category}\n`;
if (char.isRTL) output += ` ⚠️ RTL Override Character\n`;
if (char.isInvisible) output += ` 👁️ Invisible Character\n`;
if (char.isZeroWidth) output += ` 📏 Zero Width Character\n`;
output += '\n';
});
if (result.hasRTLOverride) {
output +=
'⚠️ WARNING: RTL Override characters detected! This could be used in attacks.\n';
}
return output;
}