mirror of
https://github.com/iib0011/omni-tools.git
synced 2025-12-02 01:44:03 +01:00
57 lines
1.6 KiB
TypeScript
57 lines
1.6 KiB
TypeScript
import { createWorker } from 'tesseract.js';
|
|
import { InitialValuesType } from './types';
|
|
|
|
export const extractTextFromImage = async (
|
|
file: File,
|
|
options: InitialValuesType
|
|
): Promise<string> => {
|
|
try {
|
|
const { language, detectParagraphs } = options;
|
|
|
|
// Create a Tesseract worker
|
|
const worker = await createWorker(language);
|
|
|
|
// Convert file to URL
|
|
const imageUrl = URL.createObjectURL(file);
|
|
|
|
// Recognize text
|
|
const { data } = await worker.recognize(imageUrl);
|
|
|
|
// Clean up
|
|
URL.revokeObjectURL(imageUrl);
|
|
await worker.terminate();
|
|
|
|
// Process the result based on options
|
|
if (detectParagraphs) {
|
|
// Return text with paragraph structure preserved
|
|
return data.text;
|
|
} else {
|
|
// Return plain text with basic formatting
|
|
return data.text;
|
|
}
|
|
} catch (error) {
|
|
console.error('Error extracting text from image:', error);
|
|
throw new Error(
|
|
'Failed to extract text from image. Please try again with a clearer image.'
|
|
);
|
|
}
|
|
};
|
|
|
|
// Helper function to get available languages
|
|
export const getAvailableLanguages = (): { value: string; label: string }[] => {
|
|
return [
|
|
{ value: 'eng', label: 'English' },
|
|
{ value: 'fra', label: 'French' },
|
|
{ value: 'deu', label: 'German' },
|
|
{ value: 'spa', label: 'Spanish' },
|
|
{ value: 'ita', label: 'Italian' },
|
|
{ value: 'por', label: 'Portuguese' },
|
|
{ value: 'rus', label: 'Russian' },
|
|
{ value: 'jpn', label: 'Japanese' },
|
|
{ value: 'chi_sim', label: 'Chinese (Simplified)' },
|
|
{ value: 'chi_tra', label: 'Chinese (Traditional)' },
|
|
{ value: 'kor', label: 'Korean' },
|
|
{ value: 'ara', label: 'Arabic' }
|
|
];
|
|
};
|