mirror of
https://github.com/iib0011/omni-tools.git
synced 2025-09-19 05:59:34 +02:00
feat: find incomplete csv records
This commit is contained in:
198
src/pages/tools/csv/find-incomplete-csv-records/index.tsx
Normal file
198
src/pages/tools/csv/find-incomplete-csv-records/index.tsx
Normal file
@@ -0,0 +1,198 @@
|
||||
import { Box } from '@mui/material';
|
||||
import React, { useState } from 'react';
|
||||
import ToolContent from '@components/ToolContent';
|
||||
import { ToolComponentProps } from '@tools/defineTool';
|
||||
import ToolTextInput from '@components/input/ToolTextInput';
|
||||
import ToolTextResult from '@components/result/ToolTextResult';
|
||||
import { GetGroupsType } from '@components/options/ToolOptions';
|
||||
import { CardExampleType } from '@components/examples/ToolExamples';
|
||||
import { findIncompleteCsvRecords } from './service';
|
||||
import { InitialValuesType } from './types';
|
||||
import TextFieldWithDesc from '@components/options/TextFieldWithDesc';
|
||||
import CheckboxWithDesc from '@components/options/CheckboxWithDesc';
|
||||
|
||||
const initialValues: InitialValuesType = {
|
||||
csvSeparator: ',',
|
||||
quoteCharacter: '"',
|
||||
commentCharacter: '#',
|
||||
emptyLines: true,
|
||||
emptyValues: true,
|
||||
messageLimit: false,
|
||||
messageNumber: 10
|
||||
};
|
||||
|
||||
const exampleCards: CardExampleType<InitialValuesType>[] = [
|
||||
{
|
||||
title: 'CSV Completeness Check',
|
||||
description:
|
||||
'In this example, we upload a simple CSV file containing names, surnames, and dates of birth. The tool analyzes the data and displays a green "Complete CSV" badge as it finds that there are no missing values or empty records. To say it differently, this check confirms that all rows and columns have the expected number of values in the data and the file is ready for use in any software that imports CSV files without hiccups.',
|
||||
sampleText: `name,surname,dob
|
||||
John,Warner,1990-05-15
|
||||
Lily,Meadows,1985-12-20
|
||||
Jaime,Crane,1993-01-23
|
||||
Jeri,Carroll,2000-11-07
|
||||
Simon,Harper,2013-04-10`,
|
||||
sampleResult: `The Csv input is complete.`,
|
||||
sampleOptions: {
|
||||
csvSeparator: ',',
|
||||
quoteCharacter: '"',
|
||||
commentCharacter: '#',
|
||||
emptyLines: true,
|
||||
emptyValues: true,
|
||||
messageLimit: false,
|
||||
messageNumber: 10
|
||||
}
|
||||
},
|
||||
{
|
||||
title: 'Find Missing Fields in Broken CSV',
|
||||
description:
|
||||
'In this example, we find the missing fields in a CSV file containing city names, time zones, and standard time information. As a result of the analysis, we see a red badge in the output and a text list of missing values in the dataset. The file has missing values on two rows: row 3 lacks standard time data (column 3), and row 5 lacks time zone and standard time data (columns 2 and 3).',
|
||||
sampleText: `City,Time Zone,Standard Time
|
||||
London,UTC+00:00,GMT
|
||||
Chicago,UTC-06:00
|
||||
Tokyo,UTC+09:00,JST
|
||||
Sydney
|
||||
Berlin,UTC+01:00,CET`,
|
||||
sampleResult: `Title: Found missing column(s) on line 3
|
||||
Message: Line 3 has 1 missing column(s).
|
||||
|
||||
Title: Found missing column(s) on line 5
|
||||
Message: Line 5 has 2 missing column(s).`,
|
||||
sampleOptions: {
|
||||
csvSeparator: ',',
|
||||
quoteCharacter: '"',
|
||||
commentCharacter: '#',
|
||||
emptyLines: true,
|
||||
emptyValues: false,
|
||||
messageLimit: true,
|
||||
messageNumber: 10
|
||||
}
|
||||
},
|
||||
{
|
||||
title: 'Detect Empty and Missing Values',
|
||||
description:
|
||||
'This example checks a data file containing information astronomical data about constellations. Not only does it find incomplete records but also detects all empty fields by activating the "Find Empty Values" checkbox. The empty fields are those that have zero length or contain just whitespace. Such fields contain no information. Additionally, since this file uses semicolons instead of commas for separators, we specify the ";" symbol in the options to make the program work with SSV (Semicolon-Separated Values) data. As a result, the program identifies three empty fields and one row with missing data.',
|
||||
sampleText: `Abbreviation;Constellation;Main stars
|
||||
|
||||
Cas;Cassiopeia;5
|
||||
Cep;Cepheus;7
|
||||
;Andromeda;16
|
||||
|
||||
Cyg;;
|
||||
Del;Delphinus`,
|
||||
sampleResult: `Title: Found missing values on line 4
|
||||
Message: Empty values on line 4: column 1.
|
||||
|
||||
Title: Found missing values on line 5
|
||||
Message: Empty values on line 5: column 2, column 3.
|
||||
|
||||
Title: Found missing column(s) on line 6
|
||||
Message: Line 6 has 1 missing column(s).`,
|
||||
sampleOptions: {
|
||||
csvSeparator: ';',
|
||||
quoteCharacter: '"',
|
||||
commentCharacter: '#',
|
||||
emptyLines: true,
|
||||
emptyValues: true,
|
||||
messageLimit: true,
|
||||
messageNumber: 10
|
||||
}
|
||||
}
|
||||
];
|
||||
export default function FindIncompleteCsvRecords({
|
||||
title,
|
||||
longDescription
|
||||
}: ToolComponentProps) {
|
||||
const [input, setInput] = useState<string>('');
|
||||
const [result, setResult] = useState<string>('');
|
||||
|
||||
const compute = (values: InitialValuesType, input: string) => {
|
||||
setResult(findIncompleteCsvRecords(input, values));
|
||||
};
|
||||
|
||||
const getGroups: GetGroupsType<InitialValuesType> | null = ({
|
||||
values,
|
||||
updateField
|
||||
}) => [
|
||||
{
|
||||
title: 'Csv input Options',
|
||||
component: (
|
||||
<Box>
|
||||
<TextFieldWithDesc
|
||||
value={values.csvSeparator}
|
||||
onOwnChange={(val) => updateField('csvSeparator', val)}
|
||||
description={
|
||||
'Enter the character used to delimit columns in the CSV input file.'
|
||||
}
|
||||
/>
|
||||
<TextFieldWithDesc
|
||||
value={values.quoteCharacter}
|
||||
onOwnChange={(val) => updateField('quoteCharacter', val)}
|
||||
description={
|
||||
'Enter the quote character used to quote the CSV input fields.'
|
||||
}
|
||||
/>
|
||||
<TextFieldWithDesc
|
||||
value={values.commentCharacter}
|
||||
onOwnChange={(val) => updateField('commentCharacter', val)}
|
||||
description={
|
||||
'Enter the character indicating the start of a comment line. Lines starting with this symbol will be skipped.'
|
||||
}
|
||||
/>
|
||||
</Box>
|
||||
)
|
||||
},
|
||||
{
|
||||
title: 'Checking Options',
|
||||
component: (
|
||||
<Box>
|
||||
<CheckboxWithDesc
|
||||
checked={values.emptyLines}
|
||||
onChange={(value) => updateField('emptyLines', value)}
|
||||
title="Delete Lines with No Data"
|
||||
description="Remove empty lines from CSV input file."
|
||||
/>
|
||||
|
||||
<CheckboxWithDesc
|
||||
checked={values.emptyValues}
|
||||
onChange={(value) => updateField('emptyValues', value)}
|
||||
title="Find Empty Values"
|
||||
description="Display a message about CSV fields that are empty (These are not missing fields but fields that contain nothing)."
|
||||
/>
|
||||
|
||||
<CheckboxWithDesc
|
||||
checked={values.messageLimit}
|
||||
onChange={(value) => updateField('messageLimit', value)}
|
||||
title="Limit number of messages"
|
||||
/>
|
||||
|
||||
{values.messageLimit && (
|
||||
<TextFieldWithDesc
|
||||
value={values.messageNumber}
|
||||
onOwnChange={(val) => updateField('messageNumber', Number(val))}
|
||||
type="number"
|
||||
inputProps={{ min: 1 }}
|
||||
description={'Set the limit of number of messages in the output.'}
|
||||
/>
|
||||
)}
|
||||
</Box>
|
||||
)
|
||||
}
|
||||
];
|
||||
return (
|
||||
<ToolContent
|
||||
title={title}
|
||||
input={input}
|
||||
inputComponent={
|
||||
<ToolTextInput title={'Input CSV'} value={input} onChange={setInput} />
|
||||
}
|
||||
resultComponent={<ToolTextResult title={'CSV Status'} value={result} />}
|
||||
initialValues={initialValues}
|
||||
exampleCards={exampleCards}
|
||||
getGroups={getGroups}
|
||||
setInput={setInput}
|
||||
compute={compute}
|
||||
toolInfo={{ title: `What is a ${title}?`, description: longDescription }}
|
||||
/>
|
||||
);
|
||||
}
|
16
src/pages/tools/csv/find-incomplete-csv-records/meta.ts
Normal file
16
src/pages/tools/csv/find-incomplete-csv-records/meta.ts
Normal file
@@ -0,0 +1,16 @@
|
||||
import { defineTool } from '@tools/defineTool';
|
||||
import { lazy } from 'react';
|
||||
|
||||
export const tool = defineTool('csv', {
|
||||
name: 'Find incomplete csv records',
|
||||
path: 'find-incomplete-csv-records',
|
||||
icon: 'tdesign:search-error',
|
||||
description:
|
||||
'Just upload your CSV file in the form below and this tool will automatically check if none of the rows or columns are missing values. In the tool options, you can adjust the input file format (specify the delimiter, quote character, and comment character). Additionally, you can enable checking for empty values, skip empty lines, and set a limit on the number of error messages in the output.',
|
||||
shortDescription:
|
||||
'Quickly find rows and columns in CSV that are missing values.',
|
||||
keywords: ['find', 'incomplete', 'csv', 'records'],
|
||||
longDescription:
|
||||
'This tool checks the completeness of CSV (Comma Separated Values) files and identifies incomplete records within the data. It finds rows and columns where one or more values are missing and displays their positions in the output so that you can quickly find and fix your CSV file. A valid CSV file has the same number of values (fields) in all rows and the same number of values (fields) in all columns. If the CSV you load in this tool is complete, the program will notify you with a green badge. If at least one value is missing in any row or column, the program will show a red badge and indicate the exact location of the missing value. If the CSV file has a field with no characters in it, then such a field is called an empty field. It is not a missing field, just empty as it contains nothing. You can activate the "Find Empty Values" checkbox in the options to identify all such fields in the CSV. If the file contains empty lines, you can ignore them with the "Skip Empty Lines" option or check them for completeness along with other lines. You can also configure the delimiter, quote, and comment characters in the options. This allows you to adapt to other file formats besides CSV, such as TSV (Tab Separated Values), SSV (Semicolon Separated Values), or PSV (Pipe Separated Values). If the file has too many incomplete or empty records, you can set a limit on the output messages to display, for example, 5, 10, or 20 messages. If you want to quickly fill in the missing data with default values, you can use our Fill Incomplete CSV Records tool. Csv-abulous!',
|
||||
component: lazy(() => import('./index'))
|
||||
});
|
80
src/pages/tools/csv/find-incomplete-csv-records/service.ts
Normal file
80
src/pages/tools/csv/find-incomplete-csv-records/service.ts
Normal file
@@ -0,0 +1,80 @@
|
||||
import { InitialValuesType } from './types';
|
||||
import { splitCsv } from '@utils/csv';
|
||||
|
||||
function generateMessage(
|
||||
row: string[],
|
||||
lineIndex: number,
|
||||
maxLength: number,
|
||||
emptyLines: boolean,
|
||||
emptyValues: boolean
|
||||
) {
|
||||
const lineNumber = lineIndex + 1;
|
||||
// check if empty lines are allowed
|
||||
if (!emptyLines && row.length === 1 && row[0] === '')
|
||||
return { title: 'Missing Line', message: `Line ${lineNumber} is empty.` };
|
||||
|
||||
// if row legth is less than maxLength it means that there are missing columns
|
||||
if (row.length < maxLength)
|
||||
return {
|
||||
title: `Found missing column(s) on line ${lineNumber}`,
|
||||
message: `Line ${lineNumber} has ${
|
||||
maxLength - row.length
|
||||
} missing column(s).`
|
||||
};
|
||||
|
||||
// if row length is equal to maxLength we should check if there are empty values
|
||||
if (row.length == maxLength && emptyValues) {
|
||||
let missingValues = false;
|
||||
let message = `Empty values on line ${lineNumber}: `;
|
||||
row.forEach((cell, index) => {
|
||||
if (cell.trim() === '') {
|
||||
missingValues = true;
|
||||
message += `column ${index + 1}, `;
|
||||
}
|
||||
});
|
||||
if (missingValues)
|
||||
return {
|
||||
title: `Found missing values on line ${lineNumber}`,
|
||||
message: message.slice(0, -2) + '.'
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
export function findIncompleteCsvRecords(
|
||||
input: string,
|
||||
options: InitialValuesType
|
||||
): string {
|
||||
if (!input) return '';
|
||||
|
||||
if (options.messageLimit && options.messageNumber <= 0)
|
||||
throw new Error('Message number must be greater than 0');
|
||||
|
||||
const rows = splitCsv(
|
||||
input,
|
||||
true,
|
||||
options.commentCharacter,
|
||||
options.emptyLines,
|
||||
options.csvSeparator,
|
||||
options.quoteCharacter
|
||||
);
|
||||
const maxLength = Math.max(...rows.map((row) => row.length));
|
||||
const messages = rows
|
||||
.map((row, index) =>
|
||||
generateMessage(
|
||||
row,
|
||||
index,
|
||||
maxLength,
|
||||
options.emptyLines,
|
||||
options.emptyValues
|
||||
)
|
||||
)
|
||||
.filter(Boolean)
|
||||
.map((msg) => `Title: ${msg!.title}\nMessage: ${msg!.message}`);
|
||||
|
||||
return messages.length > 0
|
||||
? options.messageLimit
|
||||
? messages.slice(0, options.messageNumber).join('\n\n')
|
||||
: messages.join('\n\n')
|
||||
: 'The Csv input is complete.';
|
||||
}
|
9
src/pages/tools/csv/find-incomplete-csv-records/types.ts
Normal file
9
src/pages/tools/csv/find-incomplete-csv-records/types.ts
Normal file
@@ -0,0 +1,9 @@
|
||||
export type InitialValuesType = {
|
||||
csvSeparator: string;
|
||||
quoteCharacter: string;
|
||||
commentCharacter: string;
|
||||
emptyLines: boolean;
|
||||
emptyValues: boolean;
|
||||
messageLimit: boolean;
|
||||
messageNumber: number;
|
||||
};
|
@@ -1,3 +1,4 @@
|
||||
import { tool as findIncompleteCsvRecords } from './find-incomplete-csv-records/meta';
|
||||
import { tool as ChangeCsvDelimiter } from './change-csv-separator/meta';
|
||||
import { tool as csvToYaml } from './csv-to-yaml/meta';
|
||||
import { tool as csvToJson } from './csv-to-json/meta';
|
||||
@@ -13,5 +14,6 @@ export const csvTools = [
|
||||
csvToTsv,
|
||||
swapCsvColumns,
|
||||
csvToYaml,
|
||||
ChangeCsvDelimiter
|
||||
ChangeCsvDelimiter,
|
||||
findIncompleteCsvRecords
|
||||
];
|
||||
|
Reference in New Issue
Block a user