mirror of
https://github.com/mermaid-js/mermaid.git
synced 2025-11-18 11:44:07 +01:00
Better handling of special characters
This commit is contained in:
@@ -83,7 +83,7 @@
|
|||||||
"@vitest/spy": "^3.0.6",
|
"@vitest/spy": "^3.0.6",
|
||||||
"@vitest/ui": "^3.0.6",
|
"@vitest/ui": "^3.0.6",
|
||||||
"ajv": "^8.17.1",
|
"ajv": "^8.17.1",
|
||||||
"chokidar": "^4.0.3",
|
"chokidar": "3.6.0",
|
||||||
"concurrently": "^9.1.2",
|
"concurrently": "^9.1.2",
|
||||||
"cors": "^2.8.5",
|
"cors": "^2.8.5",
|
||||||
"cpy-cli": "^5.0.0",
|
"cpy-cli": "^5.0.0",
|
||||||
|
|||||||
@@ -106,7 +106,7 @@
|
|||||||
"@types/stylis": "^4.2.7",
|
"@types/stylis": "^4.2.7",
|
||||||
"@types/uuid": "^10.0.0",
|
"@types/uuid": "^10.0.0",
|
||||||
"ajv": "^8.17.1",
|
"ajv": "^8.17.1",
|
||||||
"chokidar": "^4.0.3",
|
"chokidar": "3.6.0",
|
||||||
"concurrently": "^9.1.2",
|
"concurrently": "^9.1.2",
|
||||||
"csstree-validator": "^4.0.1",
|
"csstree-validator": "^4.0.1",
|
||||||
"globby": "^14.0.2",
|
"globby": "^14.0.2",
|
||||||
|
|||||||
@@ -375,7 +375,7 @@ export class FlowchartAstVisitor extends BaseVisitor {
|
|||||||
if (nodeId.startsWith(keyword)) {
|
if (nodeId.startsWith(keyword)) {
|
||||||
// Allow if the keyword is not followed by a delimiter (e.g., "endpoint" is OK, "end.node" is not)
|
// Allow if the keyword is not followed by a delimiter (e.g., "endpoint" is OK, "end.node" is not)
|
||||||
const afterKeyword = nodeId.substring(keyword.length);
|
const afterKeyword = nodeId.substring(keyword.length);
|
||||||
if (afterKeyword.length === 0 || /^[./\-]/.test(afterKeyword)) {
|
if (afterKeyword.length === 0 || /^[./-]/.test(afterKeyword)) {
|
||||||
throw new Error(`Node ID cannot start with reserved keyword: ${keyword}`);
|
throw new Error(`Node ID cannot start with reserved keyword: ${keyword}`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -835,6 +835,10 @@ export class FlowchartAstVisitor extends BaseVisitor {
|
|||||||
const endNodeId = this.visit(ctx.nodeId[1]);
|
const endNodeId = this.visit(ctx.nodeId[1]);
|
||||||
const linkData = this.visit(ctx.link);
|
const linkData = this.visit(ctx.link);
|
||||||
|
|
||||||
|
// Ensure both start and end nodes exist as vertices
|
||||||
|
this.ensureVertex(startNodeId);
|
||||||
|
this.ensureVertex(endNodeId);
|
||||||
|
|
||||||
const edge: any = {
|
const edge: any = {
|
||||||
start: startNodeId,
|
start: startNodeId,
|
||||||
end: endNodeId,
|
end: endNodeId,
|
||||||
@@ -850,6 +854,17 @@ export class FlowchartAstVisitor extends BaseVisitor {
|
|||||||
this.edges.push(edge);
|
this.edges.push(edge);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Helper method to ensure a vertex exists
|
||||||
|
private ensureVertex(nodeId: string): void {
|
||||||
|
if (!this.vertices[nodeId]) {
|
||||||
|
this.vertices[nodeId] = {
|
||||||
|
id: nodeId,
|
||||||
|
text: nodeId,
|
||||||
|
type: 'default',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Missing visitor methods
|
// Missing visitor methods
|
||||||
linkStyleStatement(_ctx: any): void {
|
linkStyleStatement(_ctx: any): void {
|
||||||
// Handle link style statements
|
// Handle link style statements
|
||||||
|
|||||||
@@ -55,12 +55,12 @@ const EOF = createToken({
|
|||||||
// Modified to include special characters and handle minus character edge cases
|
// Modified to include special characters and handle minus character edge cases
|
||||||
// Allows - in node IDs including standalone -, -at-start, and -at-end patterns
|
// Allows - in node IDs including standalone -, -at-start, and -at-end patterns
|
||||||
// Avoids conflicts with link tokens by using negative lookahead for link patterns
|
// Avoids conflicts with link tokens by using negative lookahead for link patterns
|
||||||
// Handles compound cases like &node, -node, vnode where special chars are followed by word chars
|
// Handles compound cases like &node, -node, vnode where special chars are followed by word chars // cspell:disable-line
|
||||||
// Only matches compound patterns (special char + word chars), not standalone special chars
|
// Complex pattern to handle all edge cases including punctuation at start/end
|
||||||
const NODE_STRING = createToken({
|
const NODE_STRING = createToken({
|
||||||
name: 'NODE_STRING',
|
name: 'NODE_STRING',
|
||||||
pattern:
|
pattern:
|
||||||
/\\\w+|\w+\\|&[\w!"#$%&'*+,./:?\\`]+[\w!"#$%&'*+,./:?\\`-]*|-[\w!"#$%&'*+,./:?\\`]+[\w!"#$%&'*+,./:?\\`-]*|[<>^v][\w!"#$%&'*+,./:?\\`]+[\w!"#$%&'*+,./:?\\`-]*|[\w!"#$%&'*+,./:?\\`](?:[\w!"#$%&'*+,./:?\\`]|-(?![.=-])|\.(?!-))*[\w!"#$%&'*+,./:?\\`]|[\w!"#$%&'*+,./:?\\`]|&|-|\\|\//,
|
/\\\w+|\w+\\|&[\w!"#$%&'*+,./:?\\`]+[\w!"#$%&'*+,./:?\\`-]*|-[\w!"#$%&'*+,./:?\\`]+[\w!"#$%&'*+,./:?\\`-]*|[<>^v][\w!"#$%&'*+,./:?\\`]+[\w!"#$%&'*+,./:?\\`-]*|:[\w!"#$%&'*+,./:?\\`]+[\w!"#$%&'*+,./:?\\`-]*|,[\w!"#$%&'*+,./:?\\`]+[\w!"#$%&'*+,./:?\\`-]*|[\w!"#$%&'*+,./:?\\`](?:[\w!"#$%&'*+,./:?\\`]|-(?![.=-])|\.(?!-))*[\w!"#$%&'*+,./:?\\`-]|[\w!"#$%&'*+,./:?\\`]|&|-|\\|\//,
|
||||||
});
|
});
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
@@ -146,6 +146,7 @@ const Default = createToken({
|
|||||||
const DirectionValue = createToken({
|
const DirectionValue = createToken({
|
||||||
name: 'DirectionValue',
|
name: 'DirectionValue',
|
||||||
pattern: /LR|RL|TB|BT|TD|BR|<|>|\^|v/,
|
pattern: /LR|RL|TB|BT|TD|BR|<|>|\^|v/,
|
||||||
|
longer_alt: NODE_STRING,
|
||||||
});
|
});
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
@@ -202,37 +203,46 @@ const ShapeDataStart = createToken({
|
|||||||
// LINK TOKENS (JISON lines 154-164)
|
// LINK TOKENS (JISON lines 154-164)
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
|
|
||||||
|
// Regular links without text
|
||||||
const LINK = createToken({
|
const LINK = createToken({
|
||||||
name: 'LINK',
|
name: 'LINK',
|
||||||
pattern: /\s*[<ox]?--+[>ox-]\s*/,
|
pattern: /[<ox]?--+[>ox-]/,
|
||||||
|
longer_alt: NODE_STRING,
|
||||||
});
|
});
|
||||||
|
|
||||||
const START_LINK = createToken({
|
const START_LINK = createToken({
|
||||||
name: 'START_LINK',
|
name: 'START_LINK',
|
||||||
pattern: /\s*[<ox]?--\s*/,
|
pattern: /[<ox]?--/,
|
||||||
push_mode: 'edgeText_mode',
|
push_mode: 'edgeText_mode',
|
||||||
|
longer_alt: NODE_STRING,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Regular thick links without text
|
||||||
const THICK_LINK = createToken({
|
const THICK_LINK = createToken({
|
||||||
name: 'THICK_LINK',
|
name: 'THICK_LINK',
|
||||||
pattern: /\s*[<ox]?==+[=>ox-]?\s*/,
|
pattern: /[<ox]?==+[=>ox-]?/,
|
||||||
|
longer_alt: NODE_STRING,
|
||||||
});
|
});
|
||||||
|
|
||||||
const START_THICK_LINK = createToken({
|
const START_THICK_LINK = createToken({
|
||||||
name: 'START_THICK_LINK',
|
name: 'START_THICK_LINK',
|
||||||
pattern: /\s*[<ox]?==(?=\s*\|)\s*/,
|
pattern: /[<ox]?==/,
|
||||||
push_mode: 'thickEdgeText_mode',
|
push_mode: 'thickEdgeText_mode',
|
||||||
|
longer_alt: NODE_STRING,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Regular dotted links without text
|
||||||
const DOTTED_LINK = createToken({
|
const DOTTED_LINK = createToken({
|
||||||
name: 'DOTTED_LINK',
|
name: 'DOTTED_LINK',
|
||||||
pattern: /\s*[<ox]?-?\.+-[>ox-]?\s*/,
|
pattern: /[<ox]?-?\.+-[>ox-]?/,
|
||||||
|
longer_alt: NODE_STRING,
|
||||||
});
|
});
|
||||||
|
|
||||||
const START_DOTTED_LINK = createToken({
|
const START_DOTTED_LINK = createToken({
|
||||||
name: 'START_DOTTED_LINK',
|
name: 'START_DOTTED_LINK',
|
||||||
pattern: /\s*[<ox]?-\.(?!-)\s*/,
|
pattern: /[<ox]?-\./,
|
||||||
push_mode: 'dottedEdgeText_mode',
|
push_mode: 'dottedEdgeText_mode',
|
||||||
|
longer_alt: NODE_STRING,
|
||||||
});
|
});
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
@@ -297,6 +307,7 @@ const Comma = createToken({
|
|||||||
const Pipe = createToken({
|
const Pipe = createToken({
|
||||||
name: 'Pipe',
|
name: 'Pipe',
|
||||||
pattern: /\|/,
|
pattern: /\|/,
|
||||||
|
push_mode: 'text_mode',
|
||||||
});
|
});
|
||||||
|
|
||||||
const Ampersand = createToken({
|
const Ampersand = createToken({
|
||||||
@@ -311,38 +322,38 @@ const Minus = createToken({
|
|||||||
longer_alt: NODE_STRING,
|
longer_alt: NODE_STRING,
|
||||||
});
|
});
|
||||||
|
|
||||||
// Additional special character tokens for node IDs
|
// Additional special character tokens for node IDs - currently unused but kept for future reference
|
||||||
const Hash = createToken({
|
// const Hash = createToken({
|
||||||
name: 'Hash',
|
// name: 'Hash',
|
||||||
pattern: /#/,
|
// pattern: /#/,
|
||||||
longer_alt: NODE_STRING,
|
// longer_alt: NODE_STRING,
|
||||||
});
|
// });
|
||||||
|
|
||||||
const Asterisk = createToken({
|
// const Asterisk = createToken({
|
||||||
name: 'Asterisk',
|
// name: 'Asterisk',
|
||||||
pattern: /\*/,
|
// pattern: /\*/,
|
||||||
longer_alt: NODE_STRING,
|
// longer_alt: NODE_STRING,
|
||||||
});
|
// });
|
||||||
|
|
||||||
const Dot = createToken({
|
// const Dot = createToken({
|
||||||
name: 'Dot',
|
// name: 'Dot',
|
||||||
pattern: /\./,
|
// pattern: /\./,
|
||||||
longer_alt: NODE_STRING,
|
// longer_alt: NODE_STRING,
|
||||||
});
|
// });
|
||||||
|
|
||||||
// Backslash token removed - handled entirely by NODE_STRING
|
// Backslash token removed - handled entirely by NODE_STRING
|
||||||
|
|
||||||
const Slash = createToken({
|
// const Slash = createToken({
|
||||||
name: 'Slash',
|
// name: 'Slash',
|
||||||
pattern: /\//,
|
// pattern: /\//,
|
||||||
longer_alt: NODE_STRING,
|
// longer_alt: NODE_STRING,
|
||||||
});
|
// });
|
||||||
|
|
||||||
const Underscore = createToken({
|
// const Underscore = createToken({
|
||||||
name: 'Underscore',
|
// name: 'Underscore',
|
||||||
pattern: /_/,
|
// pattern: /_/,
|
||||||
longer_alt: NODE_STRING,
|
// longer_alt: NODE_STRING,
|
||||||
});
|
// });
|
||||||
|
|
||||||
const NumberToken = createToken({
|
const NumberToken = createToken({
|
||||||
name: 'NumberToken',
|
name: 'NumberToken',
|
||||||
@@ -452,6 +463,13 @@ const DiamondEnd = createToken({
|
|||||||
pop_mode: true,
|
pop_mode: true,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Pipe token for text mode that pops back to initial mode
|
||||||
|
const PipeEnd = createToken({
|
||||||
|
name: 'PipeEnd',
|
||||||
|
pattern: /\|/,
|
||||||
|
pop_mode: true,
|
||||||
|
});
|
||||||
|
|
||||||
// Tokens for edge text modes (JISON lines 156, 160, 164)
|
// Tokens for edge text modes (JISON lines 156, 160, 164)
|
||||||
const EdgeTextContent = createToken({
|
const EdgeTextContent = createToken({
|
||||||
name: 'EdgeTextContent',
|
name: 'EdgeTextContent',
|
||||||
@@ -557,12 +575,9 @@ const multiModeLexerDefinition = {
|
|||||||
HexagonStart,
|
HexagonStart,
|
||||||
DiamondStart,
|
DiamondStart,
|
||||||
|
|
||||||
// Basic punctuation (must come before NODE_STRING)
|
// Basic punctuation (must come before NODE_STRING for proper tokenization)
|
||||||
Pipe,
|
Pipe,
|
||||||
Colon,
|
|
||||||
Comma,
|
|
||||||
Ampersand,
|
Ampersand,
|
||||||
Minus,
|
|
||||||
|
|
||||||
// Node strings and numbers (must come after punctuation)
|
// Node strings and numbers (must come after punctuation)
|
||||||
NODE_STRING,
|
NODE_STRING,
|
||||||
@@ -601,7 +616,7 @@ const multiModeLexerDefinition = {
|
|||||||
HexagonEnd,
|
HexagonEnd,
|
||||||
DiamondEnd,
|
DiamondEnd,
|
||||||
QuotedString,
|
QuotedString,
|
||||||
Pipe, // Special handling for pipe in text mode
|
PipeEnd, // Pipe that pops back to initial mode
|
||||||
TextContent,
|
TextContent,
|
||||||
],
|
],
|
||||||
|
|
||||||
@@ -753,6 +768,7 @@ export const allTokens = [
|
|||||||
Colon,
|
Colon,
|
||||||
Comma,
|
Comma,
|
||||||
Pipe,
|
Pipe,
|
||||||
|
PipeEnd,
|
||||||
Ampersand,
|
Ampersand,
|
||||||
Minus,
|
Minus,
|
||||||
];
|
];
|
||||||
@@ -850,6 +866,7 @@ export {
|
|||||||
Colon,
|
Colon,
|
||||||
Comma,
|
Comma,
|
||||||
Pipe,
|
Pipe,
|
||||||
|
PipeEnd,
|
||||||
Ampersand,
|
Ampersand,
|
||||||
Minus,
|
Minus,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -55,6 +55,34 @@ export class FlowchartParser extends CstParser {
|
|||||||
private statement = this.RULE('statement', () => {
|
private statement = this.RULE('statement', () => {
|
||||||
this.OR([
|
this.OR([
|
||||||
{ ALT: () => this.SUBRULE(this.vertexStatement) },
|
{ ALT: () => this.SUBRULE(this.vertexStatement) },
|
||||||
|
// Standalone link statement only when pattern is exactly nodeId link nodeId (no continuation)
|
||||||
|
{
|
||||||
|
ALT: () => this.SUBRULE(this.standaloneLinkStatement),
|
||||||
|
GATE: () => {
|
||||||
|
// Look ahead to see if this is a simple nodeId link nodeId pattern
|
||||||
|
// without any continuation (like more links or ampersands)
|
||||||
|
const la1 = this.LA(1); // First token (should be nodeId)
|
||||||
|
const la2 = this.LA(2); // Second token (should be link)
|
||||||
|
const la3 = this.LA(3); // Third token (should be nodeId)
|
||||||
|
const la4 = this.LA(4); // Fourth token (should be separator or EOF)
|
||||||
|
|
||||||
|
// Check if we have the exact pattern: nodeId link nodeId separator/EOF
|
||||||
|
return (
|
||||||
|
(la1.tokenType === tokens.NODE_STRING || la1.tokenType === tokens.NumberToken) &&
|
||||||
|
(la2.tokenType === tokens.LINK ||
|
||||||
|
la2.tokenType === tokens.THICK_LINK ||
|
||||||
|
la2.tokenType === tokens.DOTTED_LINK ||
|
||||||
|
la2.tokenType === tokens.START_LINK ||
|
||||||
|
la2.tokenType === tokens.START_THICK_LINK ||
|
||||||
|
la2.tokenType === tokens.START_DOTTED_LINK) &&
|
||||||
|
(la3.tokenType === tokens.NODE_STRING || la3.tokenType === tokens.NumberToken) &&
|
||||||
|
(la4 === undefined ||
|
||||||
|
la4.tokenType === tokens.Semicolon ||
|
||||||
|
la4.tokenType === tokens.Newline ||
|
||||||
|
la4.tokenType === tokens.WhiteSpace)
|
||||||
|
);
|
||||||
|
},
|
||||||
|
},
|
||||||
{ ALT: () => this.SUBRULE(this.styleStatement) },
|
{ ALT: () => this.SUBRULE(this.styleStatement) },
|
||||||
{ ALT: () => this.SUBRULE(this.linkStyleStatement) },
|
{ ALT: () => this.SUBRULE(this.linkStyleStatement) },
|
||||||
{ ALT: () => this.SUBRULE(this.classDefStatement) },
|
{ ALT: () => this.SUBRULE(this.classDefStatement) },
|
||||||
@@ -170,7 +198,7 @@ export class FlowchartParser extends CstParser {
|
|||||||
// TODO: Add style separator support when implementing styling
|
// TODO: Add style separator support when implementing styling
|
||||||
});
|
});
|
||||||
|
|
||||||
// Node ID - handles both simple and compound node IDs
|
// Node ID - handles both simple and special character node IDs
|
||||||
private nodeId = this.RULE('nodeId', () => {
|
private nodeId = this.RULE('nodeId', () => {
|
||||||
this.OR([
|
this.OR([
|
||||||
{ ALT: () => this.CONSUME(tokens.NODE_STRING) },
|
{ ALT: () => this.CONSUME(tokens.NODE_STRING) },
|
||||||
@@ -287,11 +315,11 @@ export class FlowchartParser extends CstParser {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
// Arrow text - PIPE text PIPE
|
// Arrow text - PIPE text PipeEnd
|
||||||
private arrowText = this.RULE('arrowText', () => {
|
private arrowText = this.RULE('arrowText', () => {
|
||||||
this.CONSUME(tokens.Pipe);
|
this.CONSUME(tokens.Pipe);
|
||||||
this.SUBRULE(this.text);
|
this.SUBRULE(this.text);
|
||||||
this.CONSUME2(tokens.Pipe);
|
this.CONSUME(tokens.PipeEnd);
|
||||||
});
|
});
|
||||||
|
|
||||||
// Text rule - following JISON pattern
|
// Text rule - following JISON pattern
|
||||||
|
|||||||
212
plan.md
Normal file
212
plan.md
Normal file
@@ -0,0 +1,212 @@
|
|||||||
|
# Chevrotain Parser Implementation Plan
|
||||||
|
|
||||||
|
## Current Status: 86% Complete ✅
|
||||||
|
|
||||||
|
**Progress**: 174/203 tests passing (86% success rate)
|
||||||
|
|
||||||
|
**Major Achievements**:
|
||||||
|
- ✅ Fixed grammar ambiguity issues
|
||||||
|
- ✅ Added `standaloneLinkStatement` to statement rule with proper lookahead
|
||||||
|
- ✅ Core parser architecture is working
|
||||||
|
- ✅ Most single node, vertex, and basic edge tests are passing
|
||||||
|
|
||||||
|
## Remaining Issues: 29 Tests (3 Core Problems)
|
||||||
|
|
||||||
|
### ✅ COMPLETED: Phase 3 - Special Characters (4 tests)
|
||||||
|
**Status**: FIXED - All special character tests now passing
|
||||||
|
**Solution**: Removed conflicting punctuation tokens from lexer main mode
|
||||||
|
**Impact**: +2 tests (174/203 passing)
|
||||||
|
|
||||||
|
### 1. Node Creation in Edges (17 tests) - HIGH PRIORITY
|
||||||
|
**Problem**: `Cannot read properties of undefined (reading 'id')`
|
||||||
|
**Root Cause**: When parsing edges like `A-->B`, vertices A and B are not being created in the vertices map
|
||||||
|
|
||||||
|
**Examples of Failing Tests**:
|
||||||
|
- `should handle basic arrow` (`A-->B`)
|
||||||
|
- `should handle multiple edges` (`A-->B; B-->C`)
|
||||||
|
- `should handle chained edges` (`A-->B-->C`)
|
||||||
|
|
||||||
|
**Solution Strategy**:
|
||||||
|
1. **Investigate which grammar rule is actually being used** for failing tests
|
||||||
|
2. **Add vertex creation to all edge processing paths**:
|
||||||
|
- `standaloneLinkStatement` visitor (already has `ensureVertex()`)
|
||||||
|
- `vertexStatement` with link chains
|
||||||
|
- Any other edge processing methods
|
||||||
|
3. **Test the fix incrementally** with one failing test at a time
|
||||||
|
|
||||||
|
**Implementation Steps**:
|
||||||
|
```typescript
|
||||||
|
// In flowAst.ts - ensure all edge processing creates vertices
|
||||||
|
private ensureVertex(nodeId: string): void {
|
||||||
|
if (!this.vertices[nodeId]) {
|
||||||
|
this.vertices[nodeId] = {
|
||||||
|
id: nodeId,
|
||||||
|
text: nodeId,
|
||||||
|
type: 'default',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add to ALL methods that process edges:
|
||||||
|
// - standaloneLinkStatement ✅ (already done)
|
||||||
|
// - vertexStatement (when it has link chains)
|
||||||
|
// - linkChain processing
|
||||||
|
// - Any other edge creation paths
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Arrow Text Parsing (10 tests) - MEDIUM PRIORITY
|
||||||
|
**Problem**: `Parse error: Expecting token of type --> EOF <-- but found --> '|' <--`
|
||||||
|
**Root Cause**: Lexer not properly handling pipe character `|` in arrow text patterns like `A-->|text|B`
|
||||||
|
|
||||||
|
**Examples of Failing Tests**:
|
||||||
|
- `should handle arrow with text` (`A-->|text|B`)
|
||||||
|
- `should handle edges with quoted text` (`A-->|"quoted text"|B`)
|
||||||
|
|
||||||
|
**Solution Strategy**:
|
||||||
|
1. **Fix lexer mode switching** for pipe characters
|
||||||
|
2. **Follow original JISON grammar** for arrow text patterns
|
||||||
|
3. **Implement proper tokenization** of `LINK + PIPE + text + PIPE` sequences
|
||||||
|
|
||||||
|
**Implementation Steps**:
|
||||||
|
```typescript
|
||||||
|
// In flowLexer.ts - fix pipe character handling
|
||||||
|
// Current issue: PIPE token conflicts with text content
|
||||||
|
// Solution: Use lexer modes or proper token precedence
|
||||||
|
|
||||||
|
// 1. Check how JISON handles |text| patterns
|
||||||
|
// 2. Implement similar tokenization in Chevrotain
|
||||||
|
// 3. Ensure link text is properly captured and processed
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Special Characters at Node Start (4 tests) - LOW PRIORITY
|
||||||
|
**Problem**: Specific characters (`:`, `&`, `,`, `-`) at start of node IDs not being parsed
|
||||||
|
**Root Cause**: TOKEN precedence issues where punctuation tokens override NODE_STRING
|
||||||
|
|
||||||
|
**Examples of Failing Tests**:
|
||||||
|
- Node IDs starting with `:`, `&`, `,`, `-`
|
||||||
|
|
||||||
|
**Solution Strategy**:
|
||||||
|
1. **Adjust token precedence** in lexer
|
||||||
|
2. **Modify NODE_STRING pattern** to handle special characters
|
||||||
|
3. **Test with each special character individually**
|
||||||
|
|
||||||
|
## Execution Plan
|
||||||
|
|
||||||
|
### Phase 1: Fix Node Creation (Target: +17 tests = 189/203 passing)
|
||||||
|
**Timeline**: 1-2 hours
|
||||||
|
**Priority**: HIGH - This affects the most tests
|
||||||
|
|
||||||
|
1. **Debug which grammar rule is being used** for failing edge tests
|
||||||
|
```bash
|
||||||
|
# Add logging to AST visitor methods to see which path is taken
|
||||||
|
vitest packages/mermaid/src/diagrams/flowchart/parser/flow-chev-arrows.spec.js -t "should handle basic arrow" --run
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Add vertex creation to all edge processing paths**
|
||||||
|
- Check `vertexStatement` when it processes link chains
|
||||||
|
- Check `linkChain` processing
|
||||||
|
- Ensure `ensureVertex()` is called for all edge endpoints
|
||||||
|
|
||||||
|
3. **Test incrementally**
|
||||||
|
```bash
|
||||||
|
# Test one failing test at a time
|
||||||
|
vitest packages/mermaid/src/diagrams/flowchart/parser/flow-chev-arrows.spec.js -t "should handle basic arrow" --run
|
||||||
|
```
|
||||||
|
|
||||||
|
### Phase 2: Fix Arrow Text Parsing (Target: +10 tests = 199/203 passing)
|
||||||
|
**Timeline**: 2-3 hours
|
||||||
|
**Priority**: MEDIUM - Complex lexer issue
|
||||||
|
|
||||||
|
1. **Analyze original JISON grammar** for arrow text patterns
|
||||||
|
```bash
|
||||||
|
# Check how flow.jison handles |text| patterns
|
||||||
|
grep -n "EdgeText\|PIPE" packages/mermaid/src/diagrams/flowchart/parser/flow.jison
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Fix lexer tokenization** for pipe characters
|
||||||
|
- Implement proper mode switching or token precedence
|
||||||
|
- Ensure `A-->|text|B` tokenizes as `NODE_STRING LINK PIPE TEXT PIPE NODE_STRING`
|
||||||
|
|
||||||
|
3. **Update grammar rules** to handle arrow text
|
||||||
|
- Ensure link rules can consume pipe-delimited text
|
||||||
|
- Test with various text patterns (quoted, unquoted, complex)
|
||||||
|
|
||||||
|
### Phase 3: Fix Special Characters (Target: +4 tests = 203/203 passing)
|
||||||
|
**Timeline**: 1 hour
|
||||||
|
**Priority**: LOW - Affects fewest tests
|
||||||
|
|
||||||
|
1. **Identify token conflicts** for each special character
|
||||||
|
2. **Adjust lexer token order** or patterns
|
||||||
|
3. **Test each character individually**
|
||||||
|
|
||||||
|
## Success Criteria
|
||||||
|
|
||||||
|
### Phase 1 Success:
|
||||||
|
- [ ] All basic edge tests pass (`A-->B`, `A-->B-->C`, etc.)
|
||||||
|
- [ ] Vertices are created for all edge endpoints
|
||||||
|
- [ ] No regression in currently passing tests
|
||||||
|
|
||||||
|
### Phase 2 Success:
|
||||||
|
- [ ] All arrow text tests pass (`A-->|text|B`)
|
||||||
|
- [ ] Lexer properly tokenizes pipe-delimited text
|
||||||
|
- [ ] Grammar correctly parses arrow text patterns
|
||||||
|
|
||||||
|
### Phase 3 Success:
|
||||||
|
- [ ] All special character tests pass
|
||||||
|
- [ ] Node IDs can start with `:`, `&`, `,`, `-`
|
||||||
|
- [ ] No conflicts with other tokens
|
||||||
|
|
||||||
|
### Final Success:
|
||||||
|
- [ ] **203/203 tests passing (100%)**
|
||||||
|
- [ ] Full compatibility with original JISON parser
|
||||||
|
- [ ] All existing functionality preserved
|
||||||
|
|
||||||
|
## Risk Mitigation
|
||||||
|
|
||||||
|
### High Risk: Breaking Currently Passing Tests
|
||||||
|
**Mitigation**: Run full test suite after each change
|
||||||
|
```bash
|
||||||
|
vitest packages/mermaid/src/diagrams/flowchart/parser/*flow*-chev*.spec.js --run
|
||||||
|
```
|
||||||
|
|
||||||
|
### Medium Risk: Lexer Changes Affecting Other Patterns
|
||||||
|
**Mitigation**: Test with diverse input patterns, not just failing tests
|
||||||
|
|
||||||
|
### Low Risk: Performance Impact
|
||||||
|
**Mitigation**: Current implementation is already efficient, changes should be minimal
|
||||||
|
|
||||||
|
## Tools and Commands
|
||||||
|
|
||||||
|
### Run Specific Test:
|
||||||
|
```bash
|
||||||
|
vitest packages/mermaid/src/diagrams/flowchart/parser/flow-chev-arrows.spec.js -t "should handle basic arrow" --run
|
||||||
|
```
|
||||||
|
|
||||||
|
### Run All Chevrotain Tests:
|
||||||
|
```bash
|
||||||
|
vitest packages/mermaid/src/diagrams/flowchart/parser/*flow*-chev*.spec.js --run
|
||||||
|
```
|
||||||
|
|
||||||
|
### Debug Lexer Tokenization:
|
||||||
|
```typescript
|
||||||
|
// In flowParserAdapter.ts
|
||||||
|
const lexResult = FlowChevLexer.tokenize(input);
|
||||||
|
console.debug('Tokens:', lexResult.tokens.map(t => [t.image, t.tokenType.name]));
|
||||||
|
console.debug('Errors:', lexResult.errors);
|
||||||
|
```
|
||||||
|
|
||||||
|
### Check Grammar Rule Usage:
|
||||||
|
```typescript
|
||||||
|
// Add logging to AST visitor methods
|
||||||
|
console.debug('Using standaloneLinkStatement for:', ctx);
|
||||||
|
```
|
||||||
|
|
||||||
|
## Next Actions
|
||||||
|
|
||||||
|
1. **Start with Phase 1** - Fix node creation (highest impact)
|
||||||
|
2. **Debug the exact grammar path** being taken for failing tests
|
||||||
|
3. **Add vertex creation to all edge processing methods**
|
||||||
|
4. **Test incrementally** to avoid regressions
|
||||||
|
5. **Move to Phase 2** only after Phase 1 is complete
|
||||||
|
|
||||||
|
This systematic approach ensures we fix the most impactful issues first while maintaining the stability of the 85% of tests that are already passing.
|
||||||
22
pnpm-lock.yaml
generated
22
pnpm-lock.yaml
generated
@@ -74,8 +74,8 @@ importers:
|
|||||||
specifier: ^8.17.1
|
specifier: ^8.17.1
|
||||||
version: 8.17.1
|
version: 8.17.1
|
||||||
chokidar:
|
chokidar:
|
||||||
specifier: ^4.0.3
|
specifier: 3.6.0
|
||||||
version: 4.0.3
|
version: 3.6.0
|
||||||
concurrently:
|
concurrently:
|
||||||
specifier: ^9.1.2
|
specifier: ^9.1.2
|
||||||
version: 9.1.2
|
version: 9.1.2
|
||||||
@@ -330,8 +330,8 @@ importers:
|
|||||||
specifier: ^8.17.1
|
specifier: ^8.17.1
|
||||||
version: 8.17.1
|
version: 8.17.1
|
||||||
chokidar:
|
chokidar:
|
||||||
specifier: ^4.0.3
|
specifier: 3.6.0
|
||||||
version: 4.0.3
|
version: 3.6.0
|
||||||
concurrently:
|
concurrently:
|
||||||
specifier: ^9.1.2
|
specifier: ^9.1.2
|
||||||
version: 9.1.2
|
version: 9.1.2
|
||||||
@@ -4558,10 +4558,6 @@ packages:
|
|||||||
resolution: {integrity: sha512-7VT13fmjotKpGipCW9JEQAusEPE+Ei8nl6/g4FBAmIm0GOOLMua9NDDo/DWp0ZAxCr3cPq5ZpBqmPAQgDda2Pw==}
|
resolution: {integrity: sha512-7VT13fmjotKpGipCW9JEQAusEPE+Ei8nl6/g4FBAmIm0GOOLMua9NDDo/DWp0ZAxCr3cPq5ZpBqmPAQgDda2Pw==}
|
||||||
engines: {node: '>= 8.10.0'}
|
engines: {node: '>= 8.10.0'}
|
||||||
|
|
||||||
chokidar@4.0.3:
|
|
||||||
resolution: {integrity: sha512-Qgzu8kfBvo+cA4962jnP1KkS6Dop5NS6g7R5LFYJr4b8Ub94PPQXUksCw9PvXoeXPRRddRNC5C1JQUR2SMGtnA==}
|
|
||||||
engines: {node: '>= 14.16.0'}
|
|
||||||
|
|
||||||
chrome-trace-event@1.0.4:
|
chrome-trace-event@1.0.4:
|
||||||
resolution: {integrity: sha512-rNjApaLzuwaOTjCiT8lSDdGN1APCiqkChLMJxJPWLunPAt5fy8xgU9/jNOchV84wfIxrA0lRQB7oCT8jrn/wrQ==}
|
resolution: {integrity: sha512-rNjApaLzuwaOTjCiT8lSDdGN1APCiqkChLMJxJPWLunPAt5fy8xgU9/jNOchV84wfIxrA0lRQB7oCT8jrn/wrQ==}
|
||||||
engines: {node: '>=6.0'}
|
engines: {node: '>=6.0'}
|
||||||
@@ -8430,10 +8426,6 @@ packages:
|
|||||||
resolution: {integrity: sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==}
|
resolution: {integrity: sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==}
|
||||||
engines: {node: '>=8.10.0'}
|
engines: {node: '>=8.10.0'}
|
||||||
|
|
||||||
readdirp@4.1.2:
|
|
||||||
resolution: {integrity: sha512-GDhwkLfywWL2s6vEjyhri+eXmfH6j1L7JE27WhqLeYzoh/A3DBaYGEj2H/HFZCn/kMfim73FXxEJTw06WtxQwg==}
|
|
||||||
engines: {node: '>= 14.18.0'}
|
|
||||||
|
|
||||||
real-require@0.2.0:
|
real-require@0.2.0:
|
||||||
resolution: {integrity: sha512-57frrGM/OCTLqLOAh0mhVA9VBMHd+9U7Zb2THMGdBUoZVOtGbJzjxsYGDJ3A9AYYCP4hn6y1TVbaOfzWtm5GFg==}
|
resolution: {integrity: sha512-57frrGM/OCTLqLOAh0mhVA9VBMHd+9U7Zb2THMGdBUoZVOtGbJzjxsYGDJ3A9AYYCP4hn6y1TVbaOfzWtm5GFg==}
|
||||||
engines: {node: '>= 12.13.0'}
|
engines: {node: '>= 12.13.0'}
|
||||||
@@ -15468,10 +15460,6 @@ snapshots:
|
|||||||
optionalDependencies:
|
optionalDependencies:
|
||||||
fsevents: 2.3.3
|
fsevents: 2.3.3
|
||||||
|
|
||||||
chokidar@4.0.3:
|
|
||||||
dependencies:
|
|
||||||
readdirp: 4.1.2
|
|
||||||
|
|
||||||
chrome-trace-event@1.0.4: {}
|
chrome-trace-event@1.0.4: {}
|
||||||
|
|
||||||
ci-info@3.9.0: {}
|
ci-info@3.9.0: {}
|
||||||
@@ -20193,8 +20181,6 @@ snapshots:
|
|||||||
dependencies:
|
dependencies:
|
||||||
picomatch: 2.3.1
|
picomatch: 2.3.1
|
||||||
|
|
||||||
readdirp@4.1.2: {}
|
|
||||||
|
|
||||||
real-require@0.2.0: {}
|
real-require@0.2.0: {}
|
||||||
|
|
||||||
rechoir@0.6.2:
|
rechoir@0.6.2:
|
||||||
|
|||||||
Reference in New Issue
Block a user