feat: ANTLR parser achieves 97.4% pass rate (922/947 tests)

Major improvements:
- Fixed individual node tracking in subgraphs with consistent ordering
- Resolved nested subgraph node ordering issues
- Fixed markdown string processing for both nodes and edges
- Improved error handling and validation
- Enhanced FlowDB integration

Progress: 97.4% pass rate (922 passed, 22 failed, 3 skipped)
Target: 99.7% pass rate to match Jison parser performance

Remaining issues:
- Text processing for special characters (8 failures)
- Node data multi-line string processing (4 failures)
- Interaction parsing (3 failures)
- Style/class assignment (2 failures)
- Vertex chaining class assignment (1 failure)
- Markdown subgraph titles (1 failure)
This commit is contained in:
Ashish Jain
2025-09-15 04:15:26 +02:00
parent 42d50fa2f5
commit 54b8f6aec3
8 changed files with 925 additions and 170 deletions

BIN
antlr-4.13.1-complete.jar Normal file

Binary file not shown.

BIN
antlr-4.13.2-complete.jar Normal file

Binary file not shown.

View File

@@ -9,14 +9,15 @@ tokens {
// Lexer modes to match Jison's state-based lexing
// Based on Jison: %x string, md_string, acc_title, acc_descr, acc_descr_multiline, dir, vertex, text, etc.
// Shape data tokens - MUST be defined FIRST for absolute precedence over LINK_ID
// Match exactly "@{" like Jison does (no whitespace allowed between @ and {)
SHAPE_DATA_START: '@{' -> pushMode(SHAPE_DATA_MODE);
// Accessibility tokens
ACC_TITLE: 'accTitle' WS* ':' WS* -> pushMode(ACC_TITLE_MODE);
ACC_DESCR: 'accDescr' WS* ':' WS* -> pushMode(ACC_DESCR_MODE);
ACC_DESCR_MULTI: 'accDescr' WS* '{' WS* -> pushMode(ACC_DESCR_MULTILINE_MODE);
// Shape data tokens - moved after LINK_ID for proper precedence
// This will be defined later to ensure proper token precedence
// Interactivity tokens
CALL: 'call' WS+ -> pushMode(CALLBACKNAME_MODE);
HREF: 'href' WS;
@@ -49,17 +50,14 @@ DIRECTION_BT: 'direction' WS+ 'BT' ~[\n]*;
DIRECTION_RL: 'direction' WS+ 'RL' ~[\n]*;
DIRECTION_LR: 'direction' WS+ 'LR' ~[\n]*;
// Shape data tokens - defined BEFORE LINK_ID to handle conflicts
// The longer match "@{" should take precedence over "@" in LINK_ID
SHAPE_DATA_START: '@' WS* '{' -> pushMode(SHAPE_DATA_MODE);
// ELLIPSE_START must come very early to avoid conflicts with PAREN_START
ELLIPSE_START: '(-' -> pushMode(ELLIPSE_TEXT_MODE);
// Link ID token - matches edge IDs like "e1@" but not shape data "@{"
// Since SHAPE_DATA_START is defined earlier, it will take precedence over LINK_ID for "@{"
// This allows LINK_ID to match "e1@" without conflict (matches Jison pattern [^\s\"]+\@)
LINK_ID: ~[ \t\r\n"]+ '@';
// Link ID token - matches edge IDs like "e1@" when followed by link patterns
// Uses a negative lookahead pattern to match the Jison lookahead (?=[^\{\"])
// This prevents LINK_ID from matching "e1@{" and allows SHAPE_DATA_START to match "@{" correctly
// The pattern matches any non-whitespace followed by @ but only when NOT followed by { or "
LINK_ID: ~[ \t\r\n"]+ '@' {this.inputStream.LA(1) != '{'.charCodeAt(0) && this.inputStream.LA(1) != '"'.charCodeAt(0)}?;
NUM: [0-9]+;
BRKT: '#';
@@ -71,10 +69,12 @@ COMMA: ',';
MULT: '*';
// Edge patterns - these are complex in Jison, need careful translation
// Normal edges without text: A-->B (matches Jison: \s*[xo<]?\-\-+[-xo>]\s*) - must come first to avoid conflicts
LINK_NORMAL: WS* [xo<]? '--' '-'* [-xo>] WS*;
// Normal edges with text: A-- text ---B (matches Jison: <INITIAL>\s*[xo<]?\-\-\s* -> START_LINK)
START_LINK_NORMAL: WS* [xo<]? '--' WS+ -> pushMode(EDGE_TEXT_MODE);
// Normal edges without text: A-->B (matches Jison: \s*[xo<]?\-\-+[-xo>]\s*)
LINK_NORMAL: WS* [xo<]? '--' '-'* [-xo>] WS*;
// Normal edges with text (no space): A--text---B - match -- followed by any non-dash character
START_LINK_NORMAL_NOSPACE: WS* [xo<]? '--' -> pushMode(EDGE_TEXT_MODE);
// Pipe-delimited edge text: A--x| (linkStatement for arrowText) - matches Jison linkStatement pattern
LINK_STATEMENT_NORMAL: WS* [xo<]? '--' '-'* [xo<]?;
@@ -229,12 +229,13 @@ ELLIPSE_TEXT: (~[-)])+;
mode TRAP_TEXT_MODE;
TRAP_END_BRACKET: '\\]' -> popMode, type(TRAPEND);
INVTRAP_END_BRACKET: '/]' -> popMode, type(INVTRAPEND);
TRAP_TEXT: (~[\\\\/\]])+;
TRAP_TEXT: (~[\\/\]])+;
mode EDGE_TEXT_MODE;
// Handle space-delimited pattern: A-- text ----B or A-- text -->B (matches Jison: [^-]|\-(?!\-)+)
// Must handle both cases: extra dashes without arrow (----) and dashes with arrow (-->)
EDGE_TEXT_LINK_END: WS* '--' '-'* [-xo>]? WS* -> popMode, type(LINK_NORMAL);
// Match any character including spaces and single dashes, but not double dashes
EDGE_TEXT: (~[-] | '-' ~[-])+;
mode THICK_EDGE_TEXT_MODE;

View File

@@ -103,9 +103,11 @@ link:
linkStatement arrowText spaceList?
| linkStatement
| START_LINK_NORMAL edgeText LINK_NORMAL
| START_LINK_NORMAL_NOSPACE edgeText LINK_NORMAL
| START_LINK_THICK edgeText LINK_THICK
| START_LINK_DOTTED edgeText LINK_DOTTED
| LINK_ID START_LINK_NORMAL edgeText LINK_NORMAL
| LINK_ID START_LINK_NORMAL_NOSPACE edgeText LINK_NORMAL
| LINK_ID START_LINK_THICK edgeText LINK_THICK
| LINK_ID START_LINK_DOTTED edgeText LINK_DOTTED
;

View File

@@ -35,11 +35,14 @@ export class FlowParserVisitor {
const text = ctx.getText();
const contextName = ctx.constructor.name;
console.log('visit called with context:', contextName, 'text:', text);
// Only process specific contexts to avoid duplicates
if (contextName === 'StartContext') {
console.log('Processing StartContext');
// Parse direction from graph declaration
// Let FlowDB handle direction symbol mapping just like Jison does
const directionPattern = /graph\s+(TD|TB|BT|RL|LR|>|<|\^|v)/i;
const directionPattern = /graph\s+(td|tb|bt|rl|lr|>|<|\^|v)/i;
const dirMatch = text.match(directionPattern);
if (dirMatch) {
// Pass the raw direction value to FlowDB - it will handle symbol mapping
@@ -58,7 +61,9 @@ export class FlowParserVisitor {
}
private visitChildren(ctx: any): void {
if (!ctx || !ctx.children) return;
if (!ctx?.children) {
return;
}
for (const child of ctx.children) {
this.visit(child);
@@ -81,6 +86,7 @@ export class FlowParserVisitor {
// Parse different types of connections and nodes
this.parseConnections(text);
this.parseStandaloneNodes(text);
this.parseShapeData(text);
this.parseClickStatements(text);
this.parseLinkStyleStatements(text);
this.parseEdgeCurveProperties(text);
@@ -96,8 +102,10 @@ export class FlowParserVisitor {
for (const line of lines) {
const trimmedLine = line.trim();
// Skip lines that don't contain connections or contain @{curve: ...}
if (!trimmedLine || !trimmedLine.includes('-->') || trimmedLine.includes('@{')) {
// Skip lines that don't contain connections or contain edge curve properties like @{curve: ...}
// But allow node shape data like A@{label: "text"}
const isEdgeCurveProperty = trimmedLine.includes('@{') && trimmedLine.includes('curve:');
if (!trimmedLine || !trimmedLine.includes('-->') || isEdgeCurveProperty) {
processedLines.push(line);
continue;
}
@@ -154,7 +162,7 @@ export class FlowParserVisitor {
const connectionPattern =
/^([A-Za-z0-9_]+(?:\[[^\]]*\]|\([^)]*\)|\{[^}]*\})*)\s*(-+)\s*(.+?)\s*(--?>)\s*([A-Za-z0-9_]+(?:\[[^\]]*\]|\([^)]*\)|\{[^}]*\})*)\s*(.*)$/;
while (remaining && remaining.includes('-->')) {
while (remaining?.includes('-->')) {
const match = remaining.match(connectionPattern);
if (match) {
const [, fromNode, startEdge, edgeText, endEdge, toNode, rest] = match;
@@ -370,7 +378,11 @@ export class FlowParserVisitor {
private parseStandaloneNodes(text: string): void {
// Parse nodes that might not be in connections (for completeness)
// Include markdown string support
// Include markdown string support and shape data
// First, handle shape data nodes separately
this.parseShapeDataNodes(text);
const nodePatterns = [
// IMPORTANT: Specific bracket patterns MUST come before general square bracket pattern
// Trapezoid nodes: A[/Text\]
@@ -1104,7 +1116,7 @@ export class FlowParserVisitor {
}
}
}
} catch (error) {
} catch (_error) {
// Fallback to simple length calculation if destructLink fails
const dashMatch = edgeType.match(/-+/g);
const equalsMatch = edgeType.match(/=+/g);
@@ -1220,6 +1232,7 @@ export class FlowParserVisitor {
// Then process the remaining content (nodes and connections)
this.parseConnections(text);
this.parseStandaloneNodes(text);
this.parseShapeData(text);
this.parseClickStatements(text);
this.parseLinkStyleStatements(text);
this.parseEdgeCurveProperties(text);
@@ -1714,6 +1727,483 @@ export class FlowParserVisitor {
}
}
private parseShapeDataNodes(text: string): void {
// Parse standalone nodes with shape data using @{} syntax
// Pattern: NodeId@{shape: shapeType, label: "text", other: "value"}
// Reference: flow.jison SHAPE_DATA handling
// Clean the text to remove ANTLR artifacts
const cleanText = text.replace(/<EOF>/g, '').trim();
// Use a more sophisticated approach to find shape data blocks
const nodeIdPattern = /([A-Za-z0-9_]+)@\{/g;
let match;
while ((match = nodeIdPattern.exec(cleanText)) !== null) {
const nodeId = match[1];
const startIndex = match.index + match[0].length;
// Find the matching closing brace, handling nested braces and quoted strings
const shapeDataContent = this.extractShapeDataContent(cleanText, startIndex);
if (shapeDataContent !== null) {
// Parse the shape data content (key: value pairs)
const shapeData = this.parseShapeDataContent(shapeDataContent);
// Apply the shape data to the node
this.applyShapeDataToNode(nodeId, shapeData);
}
}
}
private parseShapeData(text: string): void {
// Parse node shape data using @{} syntax
// Pattern: NodeId@{shape: shapeType, label: "text", other: "value"}
// Reference: flow.jison SHAPE_DATA handling
// Clean the text to remove ANTLR artifacts
const cleanText = text.replace(/<EOF>/g, '').trim();
// Use a more sophisticated approach to find shape data blocks
const nodeIdPattern = /([A-Za-z0-9_]+)@\{/g;
let match;
while ((match = nodeIdPattern.exec(cleanText)) !== null) {
const nodeId = match[1];
const startIndex = match.index + match[0].length;
// Find the matching closing brace, handling nested braces and quoted strings
const shapeDataContent = this.extractShapeDataContent(cleanText, startIndex);
if (shapeDataContent !== null) {
// Parse the shape data content (key: value pairs)
const shapeData = this.parseShapeDataContent(shapeDataContent);
// Apply the shape data to the node
this.applyShapeDataToNode(nodeId, shapeData);
}
}
}
private extractShapeDataContent(text: string, startIndex: number): string | null {
let braceCount = 1;
let inQuotes = false;
let quoteChar = '';
let i = startIndex;
while (i < text.length && braceCount > 0) {
const char = text[i];
if (!inQuotes && (char === '"' || char === "'")) {
inQuotes = true;
quoteChar = char;
} else if (inQuotes && char === quoteChar) {
// Check if it's escaped
if (i === 0 || text[i - 1] !== '\\') {
inQuotes = false;
quoteChar = '';
}
} else if (!inQuotes) {
if (char === '{') {
braceCount++;
} else if (char === '}') {
braceCount--;
}
}
i++;
}
if (braceCount === 0) {
return text.substring(startIndex, i - 1);
}
return null;
}
private parseShapeDataContent(content: string): Record<string, string> {
const data: Record<string, string> = {};
// Split by commas, but handle quoted strings properly
const pairs = this.splitShapeDataPairs(content);
for (const pair of pairs) {
const colonIndex = pair.indexOf(':');
if (colonIndex > 0) {
const key = pair.substring(0, colonIndex).trim();
let value = pair.substring(colonIndex + 1).trim();
// Remove quotes if present
if (
(value.startsWith('"') && value.endsWith('"')) ||
(value.startsWith("'") && value.endsWith("'"))
) {
value = value.slice(1, -1);
}
data[key] = value;
}
}
return data;
}
private splitShapeDataPairs(content: string): string[] {
const pairs: string[] = [];
let current = '';
let inQuotes = false;
let quoteChar = '';
for (let i = 0; i < content.length; i++) {
const char = content[i];
if (!inQuotes && (char === '"' || char === "'")) {
inQuotes = true;
quoteChar = char;
current += char;
} else if (inQuotes && char === quoteChar) {
inQuotes = false;
quoteChar = '';
current += char;
} else if (!inQuotes && char === ',') {
if (current.trim()) {
pairs.push(current.trim());
}
current = '';
} else {
current += char;
}
}
if (current.trim()) {
pairs.push(current.trim());
}
return pairs;
}
private applyShapeDataToNode(nodeId: string, shapeData: Record<string, string>): void {
// Ensure the node exists
if (!this.db.getVertices().has(nodeId)) {
this.db.addVertex(nodeId, nodeId, 'square', [], '', '');
}
// Apply shape if specified
if (shapeData.shape) {
const vertex = this.db.getVertices().get(nodeId);
if (vertex) {
vertex.type = this.mapShapeToType(shapeData.shape);
}
}
// Apply label if specified
if (shapeData.label) {
const vertex = this.db.getVertices().get(nodeId);
if (vertex) {
vertex.text = shapeData.label;
}
}
// Apply other properties as needed
// This can be extended to handle more shape data properties
}
private mapShapeToType(shape: string): string {
// Map shape names to vertex types
const shapeMap: Record<string, string> = {
squareRect: 'square',
rect: 'square',
square: 'square',
circle: 'circle',
ellipse: 'ellipse',
diamond: 'diamond',
hexagon: 'hexagon',
stadium: 'stadium',
cylinder: 'cylinder',
doublecircle: 'doublecircle',
subroutine: 'subroutine',
trapezoid: 'trapezoid',
inv_trapezoid: 'inv_trapezoid',
lean_right: 'lean_right',
lean_left: 'lean_left',
odd: 'odd',
};
return shapeMap[shape] || 'square';
}
// Vertex statement visitor - handles node definitions with optional shape data
visitVertexStatement(ctx: any): any {
console.log('visitVertexStatement called with:', ctx.getText());
// Handle different vertex statement patterns:
// - node shapeData
// - node spaceList
// - node
// - vertexStatement link node shapeData
// - vertexStatement link node
if (ctx.node && ctx.shapeData) {
console.log('Found node with shape data');
// Single node with shape data: node shapeData
const nodeCtx = Array.isArray(ctx.node()) ? ctx.node()[ctx.node().length - 1] : ctx.node();
const shapeDataCtx = Array.isArray(ctx.shapeData())
? ctx.shapeData()[ctx.shapeData().length - 1]
: ctx.shapeData();
this.visitNode(nodeCtx);
this.visitShapeDataForNode(shapeDataCtx, nodeCtx);
} else if (ctx.node) {
console.log('Found node without shape data');
// Single node or chained nodes without shape data
const nodes = Array.isArray(ctx.node()) ? ctx.node() : [ctx.node()];
for (const nodeCtx of nodes) {
this.visitNode(nodeCtx);
}
}
// Handle links if present
if (ctx.link) {
const links = Array.isArray(ctx.link()) ? ctx.link() : [ctx.link()];
for (const linkCtx of links) {
this.visitLink(linkCtx);
}
}
// Continue with default visitor behavior
return this.visitChildren(ctx);
}
// Node visitor - handles individual node definitions
visitNode(ctx: any): any {
if (ctx.styledVertex) {
const vertices = Array.isArray(ctx.styledVertex())
? ctx.styledVertex()
: [ctx.styledVertex()];
for (const vertexCtx of vertices) {
this.visitStyledVertex(vertexCtx);
}
}
return this.visitChildren(ctx);
}
// Styled vertex visitor - handles vertex with optional style
visitStyledVertex(ctx: any): any {
if (ctx.vertex) {
this.visitVertex(ctx.vertex());
}
// Handle style separator and class assignment
if (ctx.STYLE_SEPARATOR && ctx.idString) {
const vertexCtx = ctx.vertex();
const classId = ctx.idString().getText();
// Extract node ID from vertex context
const nodeId = this.extractNodeIdFromVertexContext(vertexCtx);
if (nodeId) {
this.db.setClass(nodeId, classId);
}
}
return this.visitChildren(ctx);
}
// Vertex visitor - handles basic vertex definitions
visitVertex(ctx: any): any {
// Extract node information from vertex context
let nodeId = '';
let nodeText = '';
let nodeType = 'square'; // default
// Handle different vertex types based on the grammar
if (ctx.NODE_STRING) {
nodeId = ctx.NODE_STRING().getText();
nodeText = nodeId; // default text is the ID
} else if (ctx.getText) {
const fullText = ctx.getText();
// Parse vertex text to extract ID and shape information
const match = fullText.match(/^([A-Za-z0-9_]+)/);
if (match) {
nodeId = match[1];
nodeText = nodeId;
}
// Determine node type from shape delimiters
if (fullText.includes('[') && fullText.includes(']')) {
nodeType = 'square';
// Extract text between brackets
const textMatch = fullText.match(/\[([^\]]*)\]/);
if (textMatch) {
nodeText = textMatch[1];
}
} else if (fullText.includes('(') && fullText.includes(')')) {
nodeType = 'round';
// Extract text between parentheses
const textMatch = fullText.match(/\(([^\)]*)\)/);
if (textMatch) {
nodeText = textMatch[1];
}
}
// Add more shape type detection as needed
}
// Add the vertex to the database if we have a valid node ID
if (nodeId) {
this.db.addVertex(nodeId, nodeText, nodeType);
}
return this.visitChildren(ctx);
}
// Link visitor - handles edge/connection definitions
visitLink(ctx: any): any {
// Handle link parsing - this is a placeholder for now
// The actual link parsing is complex and handled by the existing regex-based approach
return this.visitChildren(ctx);
}
// Shape data visitor methods
visitShapeData(ctx: any): string {
// Handle shape data parsing through ANTLR visitor pattern
const content = this.visitShapeDataContent(ctx.shapeDataContent());
return content;
}
visitShapeDataForNode(shapeDataCtx: any, nodeCtx: any): void {
console.log('visitShapeDataForNode called');
// Handle shape data for a specific node
const content = this.visitShapeData(shapeDataCtx);
const nodeId = this.extractNodeIdFromVertexContext(nodeCtx);
console.log('Shape data content:', content);
console.log('Node ID:', nodeId);
if (nodeId && content) {
// Parse the shape data content (key: value pairs)
const shapeData = this.parseShapeDataContent(content);
console.log('Parsed shape data:', shapeData);
// Apply the shape data to the node using FlowDB
this.applyShapeDataToNodeViaDB(nodeId, shapeData);
}
}
visitShapeDataContent(ctx: any): string {
// Collect all shape data content tokens
let content = '';
if (ctx.SHAPE_DATA_CONTENT) {
if (Array.isArray(ctx.SHAPE_DATA_CONTENT())) {
content += ctx
.SHAPE_DATA_CONTENT()
.map((token: any) => token.getText())
.join('');
} else {
content += ctx.SHAPE_DATA_CONTENT().getText();
}
}
// Handle string content
if (ctx.SHAPE_DATA_STRING_START && ctx.SHAPE_DATA_STRING_CONTENT && ctx.SHAPE_DATA_STRING_END) {
const stringContents = ctx.SHAPE_DATA_STRING_CONTENT();
if (Array.isArray(stringContents)) {
content += stringContents.map((token: any) => `"${token.getText()}"`).join('');
} else {
content += `"${stringContents.getText()}"`;
}
}
// Handle nested shape data content
if (ctx.shapeDataContent && ctx.shapeDataContent().length > 0) {
for (const childCtx of ctx.shapeDataContent()) {
content += this.visitShapeDataContent(childCtx);
}
}
return content;
}
// Helper method to extract node ID from vertex context
extractNodeIdFromVertexContext(vertexCtx: any): string | null {
if (!vertexCtx) return null;
// Try different ways to extract the node ID from vertex context
if (vertexCtx.NODE_STRING) {
return vertexCtx.NODE_STRING().getText();
}
if (vertexCtx.getText) {
const text = vertexCtx.getText();
// Extract node ID from vertex text (before any shape delimiters)
const match = text.match(/^([A-Za-z0-9_]+)/);
return match ? match[1] : null;
}
return null;
}
// Helper method to apply shape data to node via FlowDB (like Jison does)
applyShapeDataToNodeViaDB(nodeId: string, shapeData: any): void {
// Convert shape data to YAML string format that FlowDB expects
let yamlContent = '';
if (typeof shapeData === 'object' && shapeData !== null) {
const pairs: string[] = [];
for (const [key, value] of Object.entries(shapeData)) {
if (typeof value === 'string') {
pairs.push(`${key}: "${value}"`);
} else {
pairs.push(`${key}: ${value}`);
}
}
yamlContent = pairs.join('\n');
} else if (typeof shapeData === 'string') {
yamlContent = shapeData;
}
// Call FlowDB addVertex with shape data (8th parameter) like Jison does
// addVertex(id, textObj, textType, style, classes, dir, props, shapeData)
this.db.addVertex(
nodeId,
undefined,
undefined,
undefined,
undefined,
undefined,
undefined,
yamlContent
);
}
private extractNodeIdFromShapeDataContext(ctx: any): string | null {
// Walk up the parse tree to find the node ID
let parent = ctx.parent;
while (parent) {
// Check if this is a vertexStatement with a node
if (parent.node && parent.node().length > 0) {
const nodeCtx = parent.node(0);
if (nodeCtx.styledVertex && nodeCtx.styledVertex().vertex) {
const vertexCtx = nodeCtx.styledVertex().vertex();
if (vertexCtx.NODE_STRING) {
return vertexCtx.NODE_STRING().getText();
}
}
}
// Check if this is a standaloneVertex
if (parent.NODE_STRING) {
return parent.NODE_STRING().getText();
}
parent = parent.parent;
}
return null;
}
// Text handling methods for markdown support
visitStringText(ctx: any): { text: string; type: string } {
return { text: ctx.STR().getText(), type: 'string' };

View File

@@ -43,31 +43,27 @@ class FlowchartListener implements ParseTreeListener {
// Handle vertex statements (nodes and edges)
exitVertexStatement = (ctx: VertexStatementContext) => {
try {
// Handle the current node
const nodeCtx = ctx.node();
const shapeDataCtx = ctx.shapeData();
// Handle the current node
const nodeCtx = ctx.node();
const shapeDataCtx = ctx.shapeData();
if (nodeCtx) {
this.processNode(nodeCtx, shapeDataCtx);
if (nodeCtx) {
this.processNode(nodeCtx, shapeDataCtx);
}
// Handle edges (links) - this is where A-->B gets processed
const linkCtx = ctx.link();
const prevVertexCtx = ctx.vertexStatement();
if (linkCtx && prevVertexCtx && nodeCtx) {
// We have a link: prevVertex --link--> currentNode
// Extract arrays of node IDs to handle ampersand chaining
const startNodeIds = this.extractNodeIds(prevVertexCtx);
const endNodeIds = this.extractNodeIds(nodeCtx);
if (startNodeIds.length > 0 && endNodeIds.length > 0) {
this.processEdgeArray(startNodeIds, endNodeIds, linkCtx);
}
// Handle edges (links) - this is where A-->B gets processed
const linkCtx = ctx.link();
const prevVertexCtx = ctx.vertexStatement();
if (linkCtx && prevVertexCtx && nodeCtx) {
// We have a link: prevVertex --link--> currentNode
// Extract arrays of node IDs to handle ampersand chaining
const startNodeIds = this.extractNodeIds(prevVertexCtx);
const endNodeIds = this.extractNodeIds(nodeCtx);
if (startNodeIds.length > 0 && endNodeIds.length > 0) {
this.processEdgeArray(startNodeIds, endNodeIds, linkCtx);
}
}
} catch (error) {
// Error handling - silently continue for now
}
};
@@ -280,11 +276,42 @@ class FlowchartListener implements ParseTreeListener {
yamlContent
);
}
} catch (error) {
} catch (_error) {
// Error handling - silently continue for now
}
};
// Reserved keywords that cannot be used as node IDs (matches Jison parser)
private static readonly RESERVED_KEYWORDS = [
'graph',
'flowchart',
'flowchart-elk',
'style',
'linkStyle',
'interpolate',
'classDef',
'class',
'_self',
'_blank',
'_parent',
'_top',
'end',
'subgraph',
];
// Validate that a node ID doesn't start with reserved keywords
private validateNodeId(nodeId: string) {
for (const keyword of FlowchartListener.RESERVED_KEYWORDS) {
if (
nodeId.startsWith(keyword + '.') ||
nodeId.startsWith(keyword + '-') ||
nodeId.startsWith(keyword + '/')
) {
throw new Error(`Node ID cannot start with reserved keyword: ${keyword}`);
}
}
}
private processNode(nodeCtx: any, shapeDataCtx?: any) {
const styledVertexCtx = nodeCtx.styledVertex();
if (!styledVertexCtx) {
@@ -300,6 +327,9 @@ class FlowchartListener implements ParseTreeListener {
const idCtx = vertexCtx.idString();
const nodeId = idCtx ? idCtx.getText() : '';
// Validate node ID against reserved keywords
this.validateNodeId(nodeId);
// Check for class application pattern: vertex STYLE_SEPARATOR idString
const children = styledVertexCtx.children;
if (children && children.length >= 3) {
@@ -319,10 +349,13 @@ class FlowchartListener implements ParseTreeListener {
// Get node text - if there's explicit text, use it, otherwise use the ID
const textCtx = vertexCtx.text();
const nodeText = textCtx ? textCtx.getText() : nodeId;
// Create text object
const textObj = { text: nodeText, type: 'text' };
let textObj;
if (textCtx) {
const textWithType = this.extractTextWithType(textCtx);
textObj = { text: textWithType.text, type: textWithType.type };
} else {
textObj = { text: nodeId, type: 'text' };
}
// Determine node shape based on the vertex structure
let nodeShape = 'square'; // default
@@ -389,6 +422,7 @@ class FlowchartListener implements ParseTreeListener {
let shapeDataYaml = '';
if (shapeDataCtx) {
const shapeDataText = shapeDataCtx.getText();
console.log('Processing shape data:', shapeDataText);
// Extract the content between { and } for YAML parsing
// e.g., "@{ shape: rounded }" -> "shape: rounded"
@@ -407,111 +441,125 @@ class FlowchartListener implements ParseTreeListener {
yamlContent = yamlContent.substring(1, yamlContent.length - 1).trim();
}
shapeDataYaml = yamlContent;
// Normalize YAML indentation to fix inconsistent whitespace
const lines = yamlContent.split('\n');
const normalizedLines = lines
.map((line) => line.trim()) // Remove leading/trailing whitespace
.filter((line) => line.length > 0); // Remove empty lines
shapeDataYaml = normalizedLines.join('\n');
}
// Add vertex to database
this.db.addVertex(nodeId, textObj, nodeShape, [], [], '', {}, shapeDataYaml);
// Note: Subgraph node tracking is handled in processEdge method
// to ensure correct order matching Jison parser behavior
// Track individual nodes in current subgraph if we're inside one
// Use unshift() to match the Jison behavior for node ordering
if (this.subgraphStack.length > 0) {
const currentSubgraph = this.subgraphStack[this.subgraphStack.length - 1];
if (!currentSubgraph.nodes.includes(nodeId)) {
currentSubgraph.nodes.unshift(nodeId);
}
}
}
private processNodeWithShapeData(styledVertexCtx: any, shapeDataCtx: any) {
try {
// Extract node ID from styled vertex
const nodeId = this.extractNodeId(styledVertexCtx);
if (!nodeId) {
return;
}
// Extract vertex context to get text and shape
const vertexCtx = styledVertexCtx.vertex();
if (!vertexCtx) {
return;
}
// Get node text - if there's explicit text, use it, otherwise use the ID
const textCtx = vertexCtx.text();
const nodeText = textCtx ? textCtx.getText() : nodeId;
// Create text object
const textObj = { text: nodeText, type: 'text' };
// Get node shape from vertex type
let nodeShape = 'square'; // default
// Shape detection logic for trapezoid and other shapes
if (vertexCtx.SQS()) {
nodeShape = 'square';
} else if (vertexCtx.CIRCLE_START()) {
nodeShape = 'circle';
} else if (vertexCtx.PS()) {
nodeShape = 'round';
} else if (vertexCtx.DOUBLECIRCLE_START()) {
nodeShape = 'doublecircle';
} else if (vertexCtx.ELLIPSE_START()) {
nodeShape = 'ellipse';
} else if (vertexCtx.STADIUM_START()) {
nodeShape = 'stadium';
} else if (vertexCtx.SUBROUTINE_START()) {
nodeShape = 'subroutine';
} else if (vertexCtx.DIAMOND_START().length === 2) {
nodeShape = 'hexagon';
} else if (vertexCtx.DIAMOND_START().length === 1) {
nodeShape = 'diamond';
} else if (vertexCtx.TAGEND()) {
nodeShape = 'odd';
} else if (
vertexCtx.TRAP_START &&
vertexCtx.TRAP_START() &&
vertexCtx.TRAPEND &&
vertexCtx.TRAPEND()
) {
nodeShape = 'trapezoid';
} else if (
vertexCtx.INVTRAP_START &&
vertexCtx.INVTRAP_START() &&
vertexCtx.INVTRAPEND &&
vertexCtx.INVTRAPEND()
) {
nodeShape = 'inv_trapezoid';
} else if (
vertexCtx.TRAP_START &&
vertexCtx.TRAP_START() &&
vertexCtx.INVTRAPEND &&
vertexCtx.INVTRAPEND()
) {
nodeShape = 'lean_right';
} else if (
vertexCtx.INVTRAP_START &&
vertexCtx.INVTRAP_START() &&
vertexCtx.TRAPEND &&
vertexCtx.TRAPEND()
) {
nodeShape = 'lean_left';
}
// Shape detection complete
// Extract shape data content
let shapeDataContent = '';
if (shapeDataCtx) {
const contentCtx = shapeDataCtx.shapeDataContent();
if (contentCtx) {
shapeDataContent = contentCtx.getText();
}
}
// Add vertex to database with shape data
this.db.addVertex(nodeId, textObj, nodeShape, [], [], '', {}, shapeDataContent);
// Note: Subgraph node tracking is handled in edge processing methods
// to match Jison parser behavior which collects nodes from statements
} catch (_error) {
// Error handling for processNodeWithShapeData
// Extract node ID from styled vertex
const nodeId = this.extractNodeId(styledVertexCtx);
if (!nodeId) {
return;
}
// Validate node ID against reserved keywords
this.validateNodeId(nodeId);
// Extract vertex context to get text and shape
const vertexCtx = styledVertexCtx.vertex();
if (!vertexCtx) {
return;
}
// Get node text - if there's explicit text, use it, otherwise use the ID
const textCtx = vertexCtx.text();
let textObj;
if (textCtx) {
const textWithType = this.extractTextWithType(textCtx);
textObj = { text: textWithType.text, type: textWithType.type };
} else {
textObj = { text: nodeId, type: 'text' };
}
// Get node shape from vertex type
let nodeShape = 'square'; // default
// Shape detection logic for trapezoid and other shapes
if (vertexCtx.SQS()) {
nodeShape = 'square';
} else if (vertexCtx.CIRCLE_START()) {
nodeShape = 'circle';
} else if (vertexCtx.PS()) {
nodeShape = 'round';
} else if (vertexCtx.DOUBLECIRCLE_START()) {
nodeShape = 'doublecircle';
} else if (vertexCtx.ELLIPSE_START()) {
nodeShape = 'ellipse';
} else if (vertexCtx.STADIUM_START()) {
nodeShape = 'stadium';
} else if (vertexCtx.SUBROUTINE_START()) {
nodeShape = 'subroutine';
} else if (vertexCtx.DIAMOND_START().length === 2) {
nodeShape = 'hexagon';
} else if (vertexCtx.DIAMOND_START().length === 1) {
nodeShape = 'diamond';
} else if (vertexCtx.TAGEND()) {
nodeShape = 'odd';
} else if (
vertexCtx.TRAP_START &&
vertexCtx.TRAP_START() &&
vertexCtx.TRAPEND &&
vertexCtx.TRAPEND()
) {
nodeShape = 'trapezoid';
} else if (
vertexCtx.INVTRAP_START &&
vertexCtx.INVTRAP_START() &&
vertexCtx.INVTRAPEND &&
vertexCtx.INVTRAPEND()
) {
nodeShape = 'inv_trapezoid';
} else if (
vertexCtx.TRAP_START &&
vertexCtx.TRAP_START() &&
vertexCtx.INVTRAPEND &&
vertexCtx.INVTRAPEND()
) {
nodeShape = 'lean_right';
} else if (
vertexCtx.INVTRAP_START &&
vertexCtx.INVTRAP_START() &&
vertexCtx.TRAPEND &&
vertexCtx.TRAPEND()
) {
nodeShape = 'lean_left';
}
// Shape detection complete
// Extract shape data content
let shapeDataContent = '';
if (shapeDataCtx) {
const contentCtx = shapeDataCtx.shapeDataContent();
if (contentCtx) {
shapeDataContent = contentCtx.getText();
}
}
// Add vertex to database with shape data - let validation errors bubble up
this.db.addVertex(nodeId, textObj, nodeShape, [], [], '', {}, shapeDataContent);
// Note: Subgraph node tracking is handled in edge processing methods
// to match Jison parser behavior which collects nodes from statements
}
private findStyledVertexInNode(nodeCtx: any): any | null {
@@ -764,15 +812,20 @@ class FlowchartListener implements ParseTreeListener {
// Track nodes in current subgraph if we're inside one
if (this.subgraphStack.length > 0) {
const currentSubgraph = this.subgraphStack[this.subgraphStack.length - 1];
// Add all end nodes first, then start nodes (to match Jison behavior)
for (const endNodeId of endNodeIds) {
if (!currentSubgraph.nodes.includes(endNodeId)) {
currentSubgraph.nodes.push(endNodeId);
}
}
// To match Jison behavior for chained vertices, we need to add nodes in the order
// that matches how Jison processes chains: rightmost nodes first
// For a chain a1-->a2-->a3, Jison produces [a3, a2, a1]
// The key insight: Jison processes left-to-right but builds the list by prepending
// So we add start nodes first (they appear earlier), then end nodes
for (const startNodeId of startNodeIds) {
if (!currentSubgraph.nodes.includes(startNodeId)) {
currentSubgraph.nodes.push(startNodeId);
currentSubgraph.nodes.unshift(startNodeId); // Add to beginning to match Jison order
}
}
for (const endNodeId of endNodeIds) {
if (!currentSubgraph.nodes.includes(endNodeId)) {
currentSubgraph.nodes.unshift(endNodeId); // Add to beginning to match Jison order
}
}
}
@@ -794,9 +847,11 @@ class FlowchartListener implements ParseTreeListener {
// Check for arrowText (pipe-delimited text: |text|) at top level
const arrowTextCtx = linkCtx.arrowText();
if (arrowTextCtx) {
console.log('Processing arrowText context');
const textContent = arrowTextCtx.text();
if (textContent) {
linkType.text = { text: textContent.getText(), type: 'text' };
const textWithType = this.extractTextWithType(textContent);
linkType.text = { text: textWithType.text, type: textWithType.type };
}
}
@@ -872,9 +927,46 @@ class FlowchartListener implements ParseTreeListener {
// Check for edge text
const edgeTextCtx = linkCtx.edgeText();
if (edgeTextCtx) {
const textContent = edgeTextCtx.getText();
if (textContent) {
linkType.text = { text: textContent, type: 'text' };
console.log('Processing edgeText context');
// edgeText contains a text context, so we need to extract it properly
const textCtx = edgeTextCtx.text ? edgeTextCtx.text() : null;
if (textCtx) {
const textWithType = this.extractTextWithType(textCtx);
linkType.text = { text: textWithType.text, type: textWithType.type };
} else {
// Fallback to direct text extraction with processing
const textContent = edgeTextCtx.getText();
if (textContent) {
// Apply the same text processing logic as extractTextWithType
// First, trim whitespace to handle ANTLR parser boundary issues
const trimmedContent = textContent.trim();
let processedText = trimmedContent;
let textType = 'text';
// Detect different text types based on wrapping characters
if (
trimmedContent.startsWith('"') &&
trimmedContent.endsWith('"') &&
trimmedContent.length > 4 &&
trimmedContent.charAt(1) === '`' &&
trimmedContent.charAt(trimmedContent.length - 2) === '`'
) {
// Markdown strings: "`text`" (wrapped in quotes)
processedText = trimmedContent.slice(2, -2);
textType = 'markdown';
} else if (
trimmedContent.startsWith('"') &&
trimmedContent.endsWith('"') &&
trimmedContent.length > 2
) {
// Quoted strings: "text"
processedText = trimmedContent.slice(1, -1);
textType = 'string';
}
linkType.text = { text: processedText, type: textType };
}
}
}
@@ -967,6 +1059,7 @@ class FlowchartListener implements ParseTreeListener {
}
// Push new subgraph context onto stack
this.subgraphStack.push({
id,
title,
@@ -1159,17 +1252,135 @@ class FlowchartListener implements ParseTreeListener {
}
};
// Extract text content from a text context
private extractTextContent(textCtx: any): string {
if (!textCtx || !textCtx.children) return '';
// Extract text content from a text context and determine label type
private extractTextContent(textCtx: any): { text: string; type: string } {
if (!textCtx || !textCtx.children) return { text: '', type: 'text' };
let text = '';
let hasMarkdown = false;
for (const child of textCtx.children) {
if (child.getText) {
text += child.getText();
const childText = child.getText();
// Check if this child is an MD_STR token
if (child.symbol && child.symbol.type) {
// Get the token type name from the lexer
const tokenTypeName = this.getTokenTypeName(child.symbol.type);
if (tokenTypeName === 'MD_STR') {
hasMarkdown = true;
text += childText;
} else {
text += childText;
}
} else {
text += childText;
}
}
}
return text;
return {
text: text,
type: hasMarkdown ? 'markdown' : 'text',
};
}
// Helper method to get token type name from token type number
private getTokenTypeName(tokenType: number): string {
// This is a simplified approach - in a full implementation, you'd use the lexer's vocabulary
// For now, we'll use a different approach to detect MD_STR tokens
return 'UNKNOWN';
}
// Extract text content and detect markdown strings by checking for MD_STR tokens
private extractTextWithType(textCtx: any): { text: string; type: string } {
if (!textCtx) return { text: '', type: 'text' };
const fullText = textCtx.getText();
// Check if the text came from specific context types to determine the label type
let detectedType = 'text'; // default
if (textCtx.children && textCtx.children.length > 0) {
const firstChild = textCtx.children[0];
const childConstructor = firstChild.constructor.name;
if (childConstructor === 'StringLiteralContext') {
// This came from a quoted string in the grammar
detectedType = 'string';
}
}
// Detect different text types based on wrapping characters (for cases where quotes are preserved)
if (fullText.startsWith('`') && fullText.endsWith('`') && fullText.length > 2) {
// Markdown strings: "`text`"
const strippedText = fullText.slice(1, -1);
return {
text: strippedText,
type: 'markdown',
};
} else if (fullText.startsWith('"') && fullText.endsWith('"') && fullText.length > 2) {
// Quoted strings: "text" (fallback case)
const strippedText = fullText.slice(1, -1);
return {
text: strippedText,
type: 'string',
};
}
// Use the detected type from context analysis
return {
text: fullText,
type: detectedType,
};
}
// Check if a text context contains markdown by examining the lexer tokens
private checkForMarkdownInContext(textCtx: any): boolean {
// Walk through the token stream to find MD_STR tokens
if (!textCtx.start || !textCtx.stop) return false;
const startIndex = textCtx.start.tokenIndex;
const stopIndex = textCtx.stop.tokenIndex;
// Access the token stream from the parser context
// This is a more direct approach to check for MD_STR tokens
try {
const parser = textCtx.parser;
if (parser && parser.getTokenStream) {
const tokenStream = parser.getTokenStream();
for (let i = startIndex; i <= stopIndex; i++) {
const token = tokenStream.get(i);
if (token && token.type) {
// Check if this token type corresponds to MD_STR
// MD_STR should be token type that comes after MD_STRING_START
const tokenText = token.text;
if (tokenText && !tokenText.includes('`') && !tokenText.includes('"')) {
// This might be the content of an MD_STR token
// Check if there are backticks around this token in the original input
const prevToken = i > 0 ? tokenStream.get(i - 1) : null;
const nextToken = tokenStream.get(i + 1);
if (prevToken && nextToken) {
const prevText = prevToken.text || '';
const nextText = nextToken.text || '';
// Look for the pattern: "`content`" where content is this token
if (prevText.includes('`') || nextText.includes('`')) {
return true;
}
}
}
}
}
}
} catch (error) {
// Fallback - if we can't access the token stream, return false
}
return false;
}
// Handle arrow text (pipe-delimited edge text)
@@ -1184,12 +1395,13 @@ class FlowchartListener implements ParseTreeListener {
const child = children[i];
if (child.constructor.name === 'TextContext') {
// Store the arrow text for use by the parent link rule
this.currentArrowText = this.extractTextContent(child);
const textWithType = this.extractTextWithType(child);
this.currentArrowText = textWithType.text;
break;
}
}
}
} catch (error) {
} catch (_error) {
// Error handling - silently continue for now
}
};
@@ -1407,12 +1619,8 @@ class FlowchartListener implements ParseTreeListener {
};
exitShapeDataContent = (_ctx: any) => {
try {
// Shape data content is collected and processed when used
// The actual processing happens in vertex statement handlers
} catch (_error) {
// Error handling for shape data content processing
}
// Shape data content is collected and processed when used
// The actual processing happens in vertex statement handlers
};
}
@@ -1469,7 +1677,20 @@ class ANTLRFlowParser {
const parser = new ANTLRFlowParser();
// Export in the format expected by the existing code
export default {
const exportedParser = {
parse: (input: string) => parser.parse(input),
parser: parser,
yy: null as any, // This will be set by the test setup
};
// Make sure the parser uses the external yy when available
Object.defineProperty(exportedParser, 'yy', {
get() {
return parser.yy;
},
set(value) {
parser.yy = value;
},
});
export default exportedParser;

View File

@@ -0,0 +1,15 @@
const { CharStream } = require('antlr4ng');
const { FlowLexer } = require('./generated/FlowLexer.ts');
const input = 'D@{ shape: rounded }';
console.log('Input:', input);
const chars = CharStream.fromString(input);
const lexer = new FlowLexer(chars);
const tokens = lexer.getAllTokens();
console.log('Tokens:');
for (let i = 0; i < tokens.length; i++) {
const token = tokens[i];
console.log(` [${i}] Type: ${token.type}, Text: '${token.text}', Channel: ${token.channel}`);
}

26
test-backslash.js Normal file
View File

@@ -0,0 +1,26 @@
// Test backslash character parsing
const flow = require('./packages/mermaid/src/diagrams/flowchart/flowDb.ts');
// Set up ANTLR parser
process.env.USE_ANTLR_PARSER = 'true';
const antlrParser = require('./packages/mermaid/src/diagrams/flowchart/parser/antlr/antlr-parser.ts');
try {
console.log('Testing backslash character: \\');
// Test the problematic input
const input = 'graph TD; \\ --> A';
console.log('Input:', input);
// Parse with ANTLR
const result = antlrParser.parse(input);
console.log('Parse result:', result);
// Check vertices
const vertices = flow.getVertices();
console.log('Vertices:', vertices);
console.log('Backslash vertex:', vertices.get('\\'));
} catch (error) {
console.error('Error:', error);
}