diff --git a/antlr-4.13.1-complete.jar b/antlr-4.13.1-complete.jar new file mode 100644 index 000000000..f539ab040 Binary files /dev/null and b/antlr-4.13.1-complete.jar differ diff --git a/antlr-4.13.2-complete.jar b/antlr-4.13.2-complete.jar new file mode 100644 index 000000000..75bfcc397 Binary files /dev/null and b/antlr-4.13.2-complete.jar differ diff --git a/packages/mermaid/src/diagrams/flowchart/parser/antlr/FlowLexer.g4 b/packages/mermaid/src/diagrams/flowchart/parser/antlr/FlowLexer.g4 index 4575a846a..563b8feba 100644 --- a/packages/mermaid/src/diagrams/flowchart/parser/antlr/FlowLexer.g4 +++ b/packages/mermaid/src/diagrams/flowchart/parser/antlr/FlowLexer.g4 @@ -9,14 +9,15 @@ tokens { // Lexer modes to match Jison's state-based lexing // Based on Jison: %x string, md_string, acc_title, acc_descr, acc_descr_multiline, dir, vertex, text, etc. +// Shape data tokens - MUST be defined FIRST for absolute precedence over LINK_ID +// Match exactly "@{" like Jison does (no whitespace allowed between @ and {) +SHAPE_DATA_START: '@{' -> pushMode(SHAPE_DATA_MODE); + // Accessibility tokens ACC_TITLE: 'accTitle' WS* ':' WS* -> pushMode(ACC_TITLE_MODE); ACC_DESCR: 'accDescr' WS* ':' WS* -> pushMode(ACC_DESCR_MODE); ACC_DESCR_MULTI: 'accDescr' WS* '{' WS* -> pushMode(ACC_DESCR_MULTILINE_MODE); -// Shape data tokens - moved after LINK_ID for proper precedence -// This will be defined later to ensure proper token precedence - // Interactivity tokens CALL: 'call' WS+ -> pushMode(CALLBACKNAME_MODE); HREF: 'href' WS; @@ -49,17 +50,14 @@ DIRECTION_BT: 'direction' WS+ 'BT' ~[\n]*; DIRECTION_RL: 'direction' WS+ 'RL' ~[\n]*; DIRECTION_LR: 'direction' WS+ 'LR' ~[\n]*; -// Shape data tokens - defined BEFORE LINK_ID to handle conflicts -// The longer match "@{" should take precedence over "@" in LINK_ID -SHAPE_DATA_START: '@' WS* '{' -> pushMode(SHAPE_DATA_MODE); - // ELLIPSE_START must come very early to avoid conflicts with PAREN_START ELLIPSE_START: '(-' -> pushMode(ELLIPSE_TEXT_MODE); -// Link ID token - matches edge IDs like "e1@" but not shape data "@{" -// Since SHAPE_DATA_START is defined earlier, it will take precedence over LINK_ID for "@{" -// This allows LINK_ID to match "e1@" without conflict (matches Jison pattern [^\s\"]+\@) -LINK_ID: ~[ \t\r\n"]+ '@'; +// Link ID token - matches edge IDs like "e1@" when followed by link patterns +// Uses a negative lookahead pattern to match the Jison lookahead (?=[^\{\"]) +// This prevents LINK_ID from matching "e1@{" and allows SHAPE_DATA_START to match "@{" correctly +// The pattern matches any non-whitespace followed by @ but only when NOT followed by { or " +LINK_ID: ~[ \t\r\n"]+ '@' {this.inputStream.LA(1) != '{'.charCodeAt(0) && this.inputStream.LA(1) != '"'.charCodeAt(0)}?; NUM: [0-9]+; BRKT: '#'; @@ -71,10 +69,12 @@ COMMA: ','; MULT: '*'; // Edge patterns - these are complex in Jison, need careful translation +// Normal edges without text: A-->B (matches Jison: \s*[xo<]?\-\-+[-xo>]\s*) - must come first to avoid conflicts +LINK_NORMAL: WS* [xo<]? '--' '-'* [-xo>] WS*; // Normal edges with text: A-- text ---B (matches Jison: \s*[xo<]?\-\-\s* -> START_LINK) START_LINK_NORMAL: WS* [xo<]? '--' WS+ -> pushMode(EDGE_TEXT_MODE); -// Normal edges without text: A-->B (matches Jison: \s*[xo<]?\-\-+[-xo>]\s*) -LINK_NORMAL: WS* [xo<]? '--' '-'* [-xo>] WS*; +// Normal edges with text (no space): A--text---B - match -- followed by any non-dash character +START_LINK_NORMAL_NOSPACE: WS* [xo<]? '--' -> pushMode(EDGE_TEXT_MODE); // Pipe-delimited edge text: A--x| (linkStatement for arrowText) - matches Jison linkStatement pattern LINK_STATEMENT_NORMAL: WS* [xo<]? '--' '-'* [xo<]?; @@ -229,12 +229,13 @@ ELLIPSE_TEXT: (~[-)])+; mode TRAP_TEXT_MODE; TRAP_END_BRACKET: '\\]' -> popMode, type(TRAPEND); INVTRAP_END_BRACKET: '/]' -> popMode, type(INVTRAPEND); -TRAP_TEXT: (~[\\\\/\]])+; +TRAP_TEXT: (~[\\/\]])+; mode EDGE_TEXT_MODE; // Handle space-delimited pattern: A-- text ----B or A-- text -->B (matches Jison: [^-]|\-(?!\-)+) // Must handle both cases: extra dashes without arrow (----) and dashes with arrow (-->) EDGE_TEXT_LINK_END: WS* '--' '-'* [-xo>]? WS* -> popMode, type(LINK_NORMAL); +// Match any character including spaces and single dashes, but not double dashes EDGE_TEXT: (~[-] | '-' ~[-])+; mode THICK_EDGE_TEXT_MODE; diff --git a/packages/mermaid/src/diagrams/flowchart/parser/antlr/FlowParser.g4 b/packages/mermaid/src/diagrams/flowchart/parser/antlr/FlowParser.g4 index 55c7623e2..7cdbb3336 100644 --- a/packages/mermaid/src/diagrams/flowchart/parser/antlr/FlowParser.g4 +++ b/packages/mermaid/src/diagrams/flowchart/parser/antlr/FlowParser.g4 @@ -103,9 +103,11 @@ link: linkStatement arrowText spaceList? | linkStatement | START_LINK_NORMAL edgeText LINK_NORMAL + | START_LINK_NORMAL_NOSPACE edgeText LINK_NORMAL | START_LINK_THICK edgeText LINK_THICK | START_LINK_DOTTED edgeText LINK_DOTTED | LINK_ID START_LINK_NORMAL edgeText LINK_NORMAL + | LINK_ID START_LINK_NORMAL_NOSPACE edgeText LINK_NORMAL | LINK_ID START_LINK_THICK edgeText LINK_THICK | LINK_ID START_LINK_DOTTED edgeText LINK_DOTTED ; diff --git a/packages/mermaid/src/diagrams/flowchart/parser/antlr/FlowParserVisitor.ts b/packages/mermaid/src/diagrams/flowchart/parser/antlr/FlowParserVisitor.ts index cc96d9506..a4ef90c58 100644 --- a/packages/mermaid/src/diagrams/flowchart/parser/antlr/FlowParserVisitor.ts +++ b/packages/mermaid/src/diagrams/flowchart/parser/antlr/FlowParserVisitor.ts @@ -35,11 +35,14 @@ export class FlowParserVisitor { const text = ctx.getText(); const contextName = ctx.constructor.name; + console.log('visit called with context:', contextName, 'text:', text); + // Only process specific contexts to avoid duplicates if (contextName === 'StartContext') { + console.log('Processing StartContext'); // Parse direction from graph declaration // Let FlowDB handle direction symbol mapping just like Jison does - const directionPattern = /graph\s+(TD|TB|BT|RL|LR|>|<|\^|v)/i; + const directionPattern = /graph\s+(td|tb|bt|rl|lr|>|<|\^|v)/i; const dirMatch = text.match(directionPattern); if (dirMatch) { // Pass the raw direction value to FlowDB - it will handle symbol mapping @@ -58,7 +61,9 @@ export class FlowParserVisitor { } private visitChildren(ctx: any): void { - if (!ctx || !ctx.children) return; + if (!ctx?.children) { + return; + } for (const child of ctx.children) { this.visit(child); @@ -81,6 +86,7 @@ export class FlowParserVisitor { // Parse different types of connections and nodes this.parseConnections(text); this.parseStandaloneNodes(text); + this.parseShapeData(text); this.parseClickStatements(text); this.parseLinkStyleStatements(text); this.parseEdgeCurveProperties(text); @@ -96,8 +102,10 @@ export class FlowParserVisitor { for (const line of lines) { const trimmedLine = line.trim(); - // Skip lines that don't contain connections or contain @{curve: ...} - if (!trimmedLine || !trimmedLine.includes('-->') || trimmedLine.includes('@{')) { + // Skip lines that don't contain connections or contain edge curve properties like @{curve: ...} + // But allow node shape data like A@{label: "text"} + const isEdgeCurveProperty = trimmedLine.includes('@{') && trimmedLine.includes('curve:'); + if (!trimmedLine || !trimmedLine.includes('-->') || isEdgeCurveProperty) { processedLines.push(line); continue; } @@ -154,7 +162,7 @@ export class FlowParserVisitor { const connectionPattern = /^([A-Za-z0-9_]+(?:\[[^\]]*\]|\([^)]*\)|\{[^}]*\})*)\s*(-+)\s*(.+?)\s*(--?>)\s*([A-Za-z0-9_]+(?:\[[^\]]*\]|\([^)]*\)|\{[^}]*\})*)\s*(.*)$/; - while (remaining && remaining.includes('-->')) { + while (remaining?.includes('-->')) { const match = remaining.match(connectionPattern); if (match) { const [, fromNode, startEdge, edgeText, endEdge, toNode, rest] = match; @@ -370,7 +378,11 @@ export class FlowParserVisitor { private parseStandaloneNodes(text: string): void { // Parse nodes that might not be in connections (for completeness) - // Include markdown string support + // Include markdown string support and shape data + + // First, handle shape data nodes separately + this.parseShapeDataNodes(text); + const nodePatterns = [ // IMPORTANT: Specific bracket patterns MUST come before general square bracket pattern // Trapezoid nodes: A[/Text\] @@ -1104,7 +1116,7 @@ export class FlowParserVisitor { } } } - } catch (error) { + } catch (_error) { // Fallback to simple length calculation if destructLink fails const dashMatch = edgeType.match(/-+/g); const equalsMatch = edgeType.match(/=+/g); @@ -1220,6 +1232,7 @@ export class FlowParserVisitor { // Then process the remaining content (nodes and connections) this.parseConnections(text); this.parseStandaloneNodes(text); + this.parseShapeData(text); this.parseClickStatements(text); this.parseLinkStyleStatements(text); this.parseEdgeCurveProperties(text); @@ -1714,6 +1727,483 @@ export class FlowParserVisitor { } } + private parseShapeDataNodes(text: string): void { + // Parse standalone nodes with shape data using @{} syntax + // Pattern: NodeId@{shape: shapeType, label: "text", other: "value"} + // Reference: flow.jison SHAPE_DATA handling + + // Clean the text to remove ANTLR artifacts + const cleanText = text.replace(//g, '').trim(); + + // Use a more sophisticated approach to find shape data blocks + const nodeIdPattern = /([A-Za-z0-9_]+)@\{/g; + let match; + + while ((match = nodeIdPattern.exec(cleanText)) !== null) { + const nodeId = match[1]; + const startIndex = match.index + match[0].length; + + // Find the matching closing brace, handling nested braces and quoted strings + const shapeDataContent = this.extractShapeDataContent(cleanText, startIndex); + + if (shapeDataContent !== null) { + // Parse the shape data content (key: value pairs) + const shapeData = this.parseShapeDataContent(shapeDataContent); + + // Apply the shape data to the node + this.applyShapeDataToNode(nodeId, shapeData); + } + } + } + + private parseShapeData(text: string): void { + // Parse node shape data using @{} syntax + // Pattern: NodeId@{shape: shapeType, label: "text", other: "value"} + // Reference: flow.jison SHAPE_DATA handling + + // Clean the text to remove ANTLR artifacts + const cleanText = text.replace(//g, '').trim(); + + // Use a more sophisticated approach to find shape data blocks + const nodeIdPattern = /([A-Za-z0-9_]+)@\{/g; + let match; + + while ((match = nodeIdPattern.exec(cleanText)) !== null) { + const nodeId = match[1]; + const startIndex = match.index + match[0].length; + + // Find the matching closing brace, handling nested braces and quoted strings + const shapeDataContent = this.extractShapeDataContent(cleanText, startIndex); + + if (shapeDataContent !== null) { + // Parse the shape data content (key: value pairs) + const shapeData = this.parseShapeDataContent(shapeDataContent); + + // Apply the shape data to the node + this.applyShapeDataToNode(nodeId, shapeData); + } + } + } + + private extractShapeDataContent(text: string, startIndex: number): string | null { + let braceCount = 1; + let inQuotes = false; + let quoteChar = ''; + let i = startIndex; + + while (i < text.length && braceCount > 0) { + const char = text[i]; + + if (!inQuotes && (char === '"' || char === "'")) { + inQuotes = true; + quoteChar = char; + } else if (inQuotes && char === quoteChar) { + // Check if it's escaped + if (i === 0 || text[i - 1] !== '\\') { + inQuotes = false; + quoteChar = ''; + } + } else if (!inQuotes) { + if (char === '{') { + braceCount++; + } else if (char === '}') { + braceCount--; + } + } + + i++; + } + + if (braceCount === 0) { + return text.substring(startIndex, i - 1); + } + + return null; + } + + private parseShapeDataContent(content: string): Record { + const data: Record = {}; + + // Split by commas, but handle quoted strings properly + const pairs = this.splitShapeDataPairs(content); + + for (const pair of pairs) { + const colonIndex = pair.indexOf(':'); + if (colonIndex > 0) { + const key = pair.substring(0, colonIndex).trim(); + let value = pair.substring(colonIndex + 1).trim(); + + // Remove quotes if present + if ( + (value.startsWith('"') && value.endsWith('"')) || + (value.startsWith("'") && value.endsWith("'")) + ) { + value = value.slice(1, -1); + } + + data[key] = value; + } + } + + return data; + } + + private splitShapeDataPairs(content: string): string[] { + const pairs: string[] = []; + let current = ''; + let inQuotes = false; + let quoteChar = ''; + + for (let i = 0; i < content.length; i++) { + const char = content[i]; + + if (!inQuotes && (char === '"' || char === "'")) { + inQuotes = true; + quoteChar = char; + current += char; + } else if (inQuotes && char === quoteChar) { + inQuotes = false; + quoteChar = ''; + current += char; + } else if (!inQuotes && char === ',') { + if (current.trim()) { + pairs.push(current.trim()); + } + current = ''; + } else { + current += char; + } + } + + if (current.trim()) { + pairs.push(current.trim()); + } + + return pairs; + } + + private applyShapeDataToNode(nodeId: string, shapeData: Record): void { + // Ensure the node exists + if (!this.db.getVertices().has(nodeId)) { + this.db.addVertex(nodeId, nodeId, 'square', [], '', ''); + } + + // Apply shape if specified + if (shapeData.shape) { + const vertex = this.db.getVertices().get(nodeId); + if (vertex) { + vertex.type = this.mapShapeToType(shapeData.shape); + } + } + + // Apply label if specified + if (shapeData.label) { + const vertex = this.db.getVertices().get(nodeId); + if (vertex) { + vertex.text = shapeData.label; + } + } + + // Apply other properties as needed + // This can be extended to handle more shape data properties + } + + private mapShapeToType(shape: string): string { + // Map shape names to vertex types + const shapeMap: Record = { + squareRect: 'square', + rect: 'square', + square: 'square', + circle: 'circle', + ellipse: 'ellipse', + diamond: 'diamond', + hexagon: 'hexagon', + stadium: 'stadium', + cylinder: 'cylinder', + doublecircle: 'doublecircle', + subroutine: 'subroutine', + trapezoid: 'trapezoid', + inv_trapezoid: 'inv_trapezoid', + lean_right: 'lean_right', + lean_left: 'lean_left', + odd: 'odd', + }; + + return shapeMap[shape] || 'square'; + } + + // Vertex statement visitor - handles node definitions with optional shape data + visitVertexStatement(ctx: any): any { + console.log('visitVertexStatement called with:', ctx.getText()); + + // Handle different vertex statement patterns: + // - node shapeData + // - node spaceList + // - node + // - vertexStatement link node shapeData + // - vertexStatement link node + + if (ctx.node && ctx.shapeData) { + console.log('Found node with shape data'); + // Single node with shape data: node shapeData + const nodeCtx = Array.isArray(ctx.node()) ? ctx.node()[ctx.node().length - 1] : ctx.node(); + const shapeDataCtx = Array.isArray(ctx.shapeData()) + ? ctx.shapeData()[ctx.shapeData().length - 1] + : ctx.shapeData(); + + this.visitNode(nodeCtx); + this.visitShapeDataForNode(shapeDataCtx, nodeCtx); + } else if (ctx.node) { + console.log('Found node without shape data'); + // Single node or chained nodes without shape data + const nodes = Array.isArray(ctx.node()) ? ctx.node() : [ctx.node()]; + for (const nodeCtx of nodes) { + this.visitNode(nodeCtx); + } + } + + // Handle links if present + if (ctx.link) { + const links = Array.isArray(ctx.link()) ? ctx.link() : [ctx.link()]; + for (const linkCtx of links) { + this.visitLink(linkCtx); + } + } + + // Continue with default visitor behavior + return this.visitChildren(ctx); + } + + // Node visitor - handles individual node definitions + visitNode(ctx: any): any { + if (ctx.styledVertex) { + const vertices = Array.isArray(ctx.styledVertex()) + ? ctx.styledVertex() + : [ctx.styledVertex()]; + for (const vertexCtx of vertices) { + this.visitStyledVertex(vertexCtx); + } + } + + return this.visitChildren(ctx); + } + + // Styled vertex visitor - handles vertex with optional style + visitStyledVertex(ctx: any): any { + if (ctx.vertex) { + this.visitVertex(ctx.vertex()); + } + + // Handle style separator and class assignment + if (ctx.STYLE_SEPARATOR && ctx.idString) { + const vertexCtx = ctx.vertex(); + const classId = ctx.idString().getText(); + + // Extract node ID from vertex context + const nodeId = this.extractNodeIdFromVertexContext(vertexCtx); + if (nodeId) { + this.db.setClass(nodeId, classId); + } + } + + return this.visitChildren(ctx); + } + + // Vertex visitor - handles basic vertex definitions + visitVertex(ctx: any): any { + // Extract node information from vertex context + let nodeId = ''; + let nodeText = ''; + let nodeType = 'square'; // default + + // Handle different vertex types based on the grammar + if (ctx.NODE_STRING) { + nodeId = ctx.NODE_STRING().getText(); + nodeText = nodeId; // default text is the ID + } else if (ctx.getText) { + const fullText = ctx.getText(); + // Parse vertex text to extract ID and shape information + const match = fullText.match(/^([A-Za-z0-9_]+)/); + if (match) { + nodeId = match[1]; + nodeText = nodeId; + } + + // Determine node type from shape delimiters + if (fullText.includes('[') && fullText.includes(']')) { + nodeType = 'square'; + // Extract text between brackets + const textMatch = fullText.match(/\[([^\]]*)\]/); + if (textMatch) { + nodeText = textMatch[1]; + } + } else if (fullText.includes('(') && fullText.includes(')')) { + nodeType = 'round'; + // Extract text between parentheses + const textMatch = fullText.match(/\(([^\)]*)\)/); + if (textMatch) { + nodeText = textMatch[1]; + } + } + // Add more shape type detection as needed + } + + // Add the vertex to the database if we have a valid node ID + if (nodeId) { + this.db.addVertex(nodeId, nodeText, nodeType); + } + + return this.visitChildren(ctx); + } + + // Link visitor - handles edge/connection definitions + visitLink(ctx: any): any { + // Handle link parsing - this is a placeholder for now + // The actual link parsing is complex and handled by the existing regex-based approach + return this.visitChildren(ctx); + } + + // Shape data visitor methods + visitShapeData(ctx: any): string { + // Handle shape data parsing through ANTLR visitor pattern + const content = this.visitShapeDataContent(ctx.shapeDataContent()); + return content; + } + + visitShapeDataForNode(shapeDataCtx: any, nodeCtx: any): void { + console.log('visitShapeDataForNode called'); + // Handle shape data for a specific node + const content = this.visitShapeData(shapeDataCtx); + const nodeId = this.extractNodeIdFromVertexContext(nodeCtx); + + console.log('Shape data content:', content); + console.log('Node ID:', nodeId); + + if (nodeId && content) { + // Parse the shape data content (key: value pairs) + const shapeData = this.parseShapeDataContent(content); + + console.log('Parsed shape data:', shapeData); + + // Apply the shape data to the node using FlowDB + this.applyShapeDataToNodeViaDB(nodeId, shapeData); + } + } + + visitShapeDataContent(ctx: any): string { + // Collect all shape data content tokens + let content = ''; + + if (ctx.SHAPE_DATA_CONTENT) { + if (Array.isArray(ctx.SHAPE_DATA_CONTENT())) { + content += ctx + .SHAPE_DATA_CONTENT() + .map((token: any) => token.getText()) + .join(''); + } else { + content += ctx.SHAPE_DATA_CONTENT().getText(); + } + } + + // Handle string content + if (ctx.SHAPE_DATA_STRING_START && ctx.SHAPE_DATA_STRING_CONTENT && ctx.SHAPE_DATA_STRING_END) { + const stringContents = ctx.SHAPE_DATA_STRING_CONTENT(); + if (Array.isArray(stringContents)) { + content += stringContents.map((token: any) => `"${token.getText()}"`).join(''); + } else { + content += `"${stringContents.getText()}"`; + } + } + + // Handle nested shape data content + if (ctx.shapeDataContent && ctx.shapeDataContent().length > 0) { + for (const childCtx of ctx.shapeDataContent()) { + content += this.visitShapeDataContent(childCtx); + } + } + + return content; + } + + // Helper method to extract node ID from vertex context + extractNodeIdFromVertexContext(vertexCtx: any): string | null { + if (!vertexCtx) return null; + + // Try different ways to extract the node ID from vertex context + if (vertexCtx.NODE_STRING) { + return vertexCtx.NODE_STRING().getText(); + } + + if (vertexCtx.getText) { + const text = vertexCtx.getText(); + // Extract node ID from vertex text (before any shape delimiters) + const match = text.match(/^([A-Za-z0-9_]+)/); + return match ? match[1] : null; + } + + return null; + } + + // Helper method to apply shape data to node via FlowDB (like Jison does) + applyShapeDataToNodeViaDB(nodeId: string, shapeData: any): void { + // Convert shape data to YAML string format that FlowDB expects + let yamlContent = ''; + + if (typeof shapeData === 'object' && shapeData !== null) { + const pairs: string[] = []; + for (const [key, value] of Object.entries(shapeData)) { + if (typeof value === 'string') { + pairs.push(`${key}: "${value}"`); + } else { + pairs.push(`${key}: ${value}`); + } + } + yamlContent = pairs.join('\n'); + } else if (typeof shapeData === 'string') { + yamlContent = shapeData; + } + + // Call FlowDB addVertex with shape data (8th parameter) like Jison does + // addVertex(id, textObj, textType, style, classes, dir, props, shapeData) + this.db.addVertex( + nodeId, + undefined, + undefined, + undefined, + undefined, + undefined, + undefined, + yamlContent + ); + } + + private extractNodeIdFromShapeDataContext(ctx: any): string | null { + // Walk up the parse tree to find the node ID + let parent = ctx.parent; + + while (parent) { + // Check if this is a vertexStatement with a node + if (parent.node && parent.node().length > 0) { + const nodeCtx = parent.node(0); + if (nodeCtx.styledVertex && nodeCtx.styledVertex().vertex) { + const vertexCtx = nodeCtx.styledVertex().vertex(); + if (vertexCtx.NODE_STRING) { + return vertexCtx.NODE_STRING().getText(); + } + } + } + + // Check if this is a standaloneVertex + if (parent.NODE_STRING) { + return parent.NODE_STRING().getText(); + } + + parent = parent.parent; + } + + return null; + } + // Text handling methods for markdown support visitStringText(ctx: any): { text: string; type: string } { return { text: ctx.STR().getText(), type: 'string' }; diff --git a/packages/mermaid/src/diagrams/flowchart/parser/antlr/antlr-parser.ts b/packages/mermaid/src/diagrams/flowchart/parser/antlr/antlr-parser.ts index 112e443d5..bcf914da2 100644 --- a/packages/mermaid/src/diagrams/flowchart/parser/antlr/antlr-parser.ts +++ b/packages/mermaid/src/diagrams/flowchart/parser/antlr/antlr-parser.ts @@ -43,31 +43,27 @@ class FlowchartListener implements ParseTreeListener { // Handle vertex statements (nodes and edges) exitVertexStatement = (ctx: VertexStatementContext) => { - try { - // Handle the current node - const nodeCtx = ctx.node(); - const shapeDataCtx = ctx.shapeData(); + // Handle the current node + const nodeCtx = ctx.node(); + const shapeDataCtx = ctx.shapeData(); - if (nodeCtx) { - this.processNode(nodeCtx, shapeDataCtx); + if (nodeCtx) { + this.processNode(nodeCtx, shapeDataCtx); + } + + // Handle edges (links) - this is where A-->B gets processed + const linkCtx = ctx.link(); + const prevVertexCtx = ctx.vertexStatement(); + + if (linkCtx && prevVertexCtx && nodeCtx) { + // We have a link: prevVertex --link--> currentNode + // Extract arrays of node IDs to handle ampersand chaining + const startNodeIds = this.extractNodeIds(prevVertexCtx); + const endNodeIds = this.extractNodeIds(nodeCtx); + + if (startNodeIds.length > 0 && endNodeIds.length > 0) { + this.processEdgeArray(startNodeIds, endNodeIds, linkCtx); } - - // Handle edges (links) - this is where A-->B gets processed - const linkCtx = ctx.link(); - const prevVertexCtx = ctx.vertexStatement(); - - if (linkCtx && prevVertexCtx && nodeCtx) { - // We have a link: prevVertex --link--> currentNode - // Extract arrays of node IDs to handle ampersand chaining - const startNodeIds = this.extractNodeIds(prevVertexCtx); - const endNodeIds = this.extractNodeIds(nodeCtx); - - if (startNodeIds.length > 0 && endNodeIds.length > 0) { - this.processEdgeArray(startNodeIds, endNodeIds, linkCtx); - } - } - } catch (error) { - // Error handling - silently continue for now } }; @@ -280,11 +276,42 @@ class FlowchartListener implements ParseTreeListener { yamlContent ); } - } catch (error) { + } catch (_error) { // Error handling - silently continue for now } }; + // Reserved keywords that cannot be used as node IDs (matches Jison parser) + private static readonly RESERVED_KEYWORDS = [ + 'graph', + 'flowchart', + 'flowchart-elk', + 'style', + 'linkStyle', + 'interpolate', + 'classDef', + 'class', + '_self', + '_blank', + '_parent', + '_top', + 'end', + 'subgraph', + ]; + + // Validate that a node ID doesn't start with reserved keywords + private validateNodeId(nodeId: string) { + for (const keyword of FlowchartListener.RESERVED_KEYWORDS) { + if ( + nodeId.startsWith(keyword + '.') || + nodeId.startsWith(keyword + '-') || + nodeId.startsWith(keyword + '/') + ) { + throw new Error(`Node ID cannot start with reserved keyword: ${keyword}`); + } + } + } + private processNode(nodeCtx: any, shapeDataCtx?: any) { const styledVertexCtx = nodeCtx.styledVertex(); if (!styledVertexCtx) { @@ -300,6 +327,9 @@ class FlowchartListener implements ParseTreeListener { const idCtx = vertexCtx.idString(); const nodeId = idCtx ? idCtx.getText() : ''; + // Validate node ID against reserved keywords + this.validateNodeId(nodeId); + // Check for class application pattern: vertex STYLE_SEPARATOR idString const children = styledVertexCtx.children; if (children && children.length >= 3) { @@ -319,10 +349,13 @@ class FlowchartListener implements ParseTreeListener { // Get node text - if there's explicit text, use it, otherwise use the ID const textCtx = vertexCtx.text(); - const nodeText = textCtx ? textCtx.getText() : nodeId; - - // Create text object - const textObj = { text: nodeText, type: 'text' }; + let textObj; + if (textCtx) { + const textWithType = this.extractTextWithType(textCtx); + textObj = { text: textWithType.text, type: textWithType.type }; + } else { + textObj = { text: nodeId, type: 'text' }; + } // Determine node shape based on the vertex structure let nodeShape = 'square'; // default @@ -389,6 +422,7 @@ class FlowchartListener implements ParseTreeListener { let shapeDataYaml = ''; if (shapeDataCtx) { const shapeDataText = shapeDataCtx.getText(); + console.log('Processing shape data:', shapeDataText); // Extract the content between { and } for YAML parsing // e.g., "@{ shape: rounded }" -> "shape: rounded" @@ -407,111 +441,125 @@ class FlowchartListener implements ParseTreeListener { yamlContent = yamlContent.substring(1, yamlContent.length - 1).trim(); } - shapeDataYaml = yamlContent; + // Normalize YAML indentation to fix inconsistent whitespace + const lines = yamlContent.split('\n'); + const normalizedLines = lines + .map((line) => line.trim()) // Remove leading/trailing whitespace + .filter((line) => line.length > 0); // Remove empty lines + + shapeDataYaml = normalizedLines.join('\n'); } // Add vertex to database this.db.addVertex(nodeId, textObj, nodeShape, [], [], '', {}, shapeDataYaml); - // Note: Subgraph node tracking is handled in processEdge method - // to ensure correct order matching Jison parser behavior + // Track individual nodes in current subgraph if we're inside one + // Use unshift() to match the Jison behavior for node ordering + if (this.subgraphStack.length > 0) { + const currentSubgraph = this.subgraphStack[this.subgraphStack.length - 1]; + if (!currentSubgraph.nodes.includes(nodeId)) { + currentSubgraph.nodes.unshift(nodeId); + } + } } private processNodeWithShapeData(styledVertexCtx: any, shapeDataCtx: any) { - try { - // Extract node ID from styled vertex - const nodeId = this.extractNodeId(styledVertexCtx); - if (!nodeId) { - return; - } - - // Extract vertex context to get text and shape - const vertexCtx = styledVertexCtx.vertex(); - if (!vertexCtx) { - return; - } - - // Get node text - if there's explicit text, use it, otherwise use the ID - const textCtx = vertexCtx.text(); - const nodeText = textCtx ? textCtx.getText() : nodeId; - - // Create text object - const textObj = { text: nodeText, type: 'text' }; - - // Get node shape from vertex type - let nodeShape = 'square'; // default - - // Shape detection logic for trapezoid and other shapes - - if (vertexCtx.SQS()) { - nodeShape = 'square'; - } else if (vertexCtx.CIRCLE_START()) { - nodeShape = 'circle'; - } else if (vertexCtx.PS()) { - nodeShape = 'round'; - } else if (vertexCtx.DOUBLECIRCLE_START()) { - nodeShape = 'doublecircle'; - } else if (vertexCtx.ELLIPSE_START()) { - nodeShape = 'ellipse'; - } else if (vertexCtx.STADIUM_START()) { - nodeShape = 'stadium'; - } else if (vertexCtx.SUBROUTINE_START()) { - nodeShape = 'subroutine'; - } else if (vertexCtx.DIAMOND_START().length === 2) { - nodeShape = 'hexagon'; - } else if (vertexCtx.DIAMOND_START().length === 1) { - nodeShape = 'diamond'; - } else if (vertexCtx.TAGEND()) { - nodeShape = 'odd'; - } else if ( - vertexCtx.TRAP_START && - vertexCtx.TRAP_START() && - vertexCtx.TRAPEND && - vertexCtx.TRAPEND() - ) { - nodeShape = 'trapezoid'; - } else if ( - vertexCtx.INVTRAP_START && - vertexCtx.INVTRAP_START() && - vertexCtx.INVTRAPEND && - vertexCtx.INVTRAPEND() - ) { - nodeShape = 'inv_trapezoid'; - } else if ( - vertexCtx.TRAP_START && - vertexCtx.TRAP_START() && - vertexCtx.INVTRAPEND && - vertexCtx.INVTRAPEND() - ) { - nodeShape = 'lean_right'; - } else if ( - vertexCtx.INVTRAP_START && - vertexCtx.INVTRAP_START() && - vertexCtx.TRAPEND && - vertexCtx.TRAPEND() - ) { - nodeShape = 'lean_left'; - } - - // Shape detection complete - - // Extract shape data content - let shapeDataContent = ''; - if (shapeDataCtx) { - const contentCtx = shapeDataCtx.shapeDataContent(); - if (contentCtx) { - shapeDataContent = contentCtx.getText(); - } - } - - // Add vertex to database with shape data - this.db.addVertex(nodeId, textObj, nodeShape, [], [], '', {}, shapeDataContent); - - // Note: Subgraph node tracking is handled in edge processing methods - // to match Jison parser behavior which collects nodes from statements - } catch (_error) { - // Error handling for processNodeWithShapeData + // Extract node ID from styled vertex + const nodeId = this.extractNodeId(styledVertexCtx); + if (!nodeId) { + return; } + + // Validate node ID against reserved keywords + this.validateNodeId(nodeId); + + // Extract vertex context to get text and shape + const vertexCtx = styledVertexCtx.vertex(); + if (!vertexCtx) { + return; + } + + // Get node text - if there's explicit text, use it, otherwise use the ID + const textCtx = vertexCtx.text(); + let textObj; + if (textCtx) { + const textWithType = this.extractTextWithType(textCtx); + textObj = { text: textWithType.text, type: textWithType.type }; + } else { + textObj = { text: nodeId, type: 'text' }; + } + + // Get node shape from vertex type + let nodeShape = 'square'; // default + + // Shape detection logic for trapezoid and other shapes + + if (vertexCtx.SQS()) { + nodeShape = 'square'; + } else if (vertexCtx.CIRCLE_START()) { + nodeShape = 'circle'; + } else if (vertexCtx.PS()) { + nodeShape = 'round'; + } else if (vertexCtx.DOUBLECIRCLE_START()) { + nodeShape = 'doublecircle'; + } else if (vertexCtx.ELLIPSE_START()) { + nodeShape = 'ellipse'; + } else if (vertexCtx.STADIUM_START()) { + nodeShape = 'stadium'; + } else if (vertexCtx.SUBROUTINE_START()) { + nodeShape = 'subroutine'; + } else if (vertexCtx.DIAMOND_START().length === 2) { + nodeShape = 'hexagon'; + } else if (vertexCtx.DIAMOND_START().length === 1) { + nodeShape = 'diamond'; + } else if (vertexCtx.TAGEND()) { + nodeShape = 'odd'; + } else if ( + vertexCtx.TRAP_START && + vertexCtx.TRAP_START() && + vertexCtx.TRAPEND && + vertexCtx.TRAPEND() + ) { + nodeShape = 'trapezoid'; + } else if ( + vertexCtx.INVTRAP_START && + vertexCtx.INVTRAP_START() && + vertexCtx.INVTRAPEND && + vertexCtx.INVTRAPEND() + ) { + nodeShape = 'inv_trapezoid'; + } else if ( + vertexCtx.TRAP_START && + vertexCtx.TRAP_START() && + vertexCtx.INVTRAPEND && + vertexCtx.INVTRAPEND() + ) { + nodeShape = 'lean_right'; + } else if ( + vertexCtx.INVTRAP_START && + vertexCtx.INVTRAP_START() && + vertexCtx.TRAPEND && + vertexCtx.TRAPEND() + ) { + nodeShape = 'lean_left'; + } + + // Shape detection complete + + // Extract shape data content + let shapeDataContent = ''; + if (shapeDataCtx) { + const contentCtx = shapeDataCtx.shapeDataContent(); + if (contentCtx) { + shapeDataContent = contentCtx.getText(); + } + } + + // Add vertex to database with shape data - let validation errors bubble up + this.db.addVertex(nodeId, textObj, nodeShape, [], [], '', {}, shapeDataContent); + + // Note: Subgraph node tracking is handled in edge processing methods + // to match Jison parser behavior which collects nodes from statements } private findStyledVertexInNode(nodeCtx: any): any | null { @@ -764,15 +812,20 @@ class FlowchartListener implements ParseTreeListener { // Track nodes in current subgraph if we're inside one if (this.subgraphStack.length > 0) { const currentSubgraph = this.subgraphStack[this.subgraphStack.length - 1]; - // Add all end nodes first, then start nodes (to match Jison behavior) - for (const endNodeId of endNodeIds) { - if (!currentSubgraph.nodes.includes(endNodeId)) { - currentSubgraph.nodes.push(endNodeId); - } - } + + // To match Jison behavior for chained vertices, we need to add nodes in the order + // that matches how Jison processes chains: rightmost nodes first + // For a chain a1-->a2-->a3, Jison produces [a3, a2, a1] + // The key insight: Jison processes left-to-right but builds the list by prepending + // So we add start nodes first (they appear earlier), then end nodes for (const startNodeId of startNodeIds) { if (!currentSubgraph.nodes.includes(startNodeId)) { - currentSubgraph.nodes.push(startNodeId); + currentSubgraph.nodes.unshift(startNodeId); // Add to beginning to match Jison order + } + } + for (const endNodeId of endNodeIds) { + if (!currentSubgraph.nodes.includes(endNodeId)) { + currentSubgraph.nodes.unshift(endNodeId); // Add to beginning to match Jison order } } } @@ -794,9 +847,11 @@ class FlowchartListener implements ParseTreeListener { // Check for arrowText (pipe-delimited text: |text|) at top level const arrowTextCtx = linkCtx.arrowText(); if (arrowTextCtx) { + console.log('Processing arrowText context'); const textContent = arrowTextCtx.text(); if (textContent) { - linkType.text = { text: textContent.getText(), type: 'text' }; + const textWithType = this.extractTextWithType(textContent); + linkType.text = { text: textWithType.text, type: textWithType.type }; } } @@ -872,9 +927,46 @@ class FlowchartListener implements ParseTreeListener { // Check for edge text const edgeTextCtx = linkCtx.edgeText(); if (edgeTextCtx) { - const textContent = edgeTextCtx.getText(); - if (textContent) { - linkType.text = { text: textContent, type: 'text' }; + console.log('Processing edgeText context'); + // edgeText contains a text context, so we need to extract it properly + const textCtx = edgeTextCtx.text ? edgeTextCtx.text() : null; + if (textCtx) { + const textWithType = this.extractTextWithType(textCtx); + linkType.text = { text: textWithType.text, type: textWithType.type }; + } else { + // Fallback to direct text extraction with processing + const textContent = edgeTextCtx.getText(); + + if (textContent) { + // Apply the same text processing logic as extractTextWithType + // First, trim whitespace to handle ANTLR parser boundary issues + const trimmedContent = textContent.trim(); + let processedText = trimmedContent; + let textType = 'text'; + + // Detect different text types based on wrapping characters + if ( + trimmedContent.startsWith('"') && + trimmedContent.endsWith('"') && + trimmedContent.length > 4 && + trimmedContent.charAt(1) === '`' && + trimmedContent.charAt(trimmedContent.length - 2) === '`' + ) { + // Markdown strings: "`text`" (wrapped in quotes) + processedText = trimmedContent.slice(2, -2); + textType = 'markdown'; + } else if ( + trimmedContent.startsWith('"') && + trimmedContent.endsWith('"') && + trimmedContent.length > 2 + ) { + // Quoted strings: "text" + processedText = trimmedContent.slice(1, -1); + textType = 'string'; + } + + linkType.text = { text: processedText, type: textType }; + } } } @@ -967,6 +1059,7 @@ class FlowchartListener implements ParseTreeListener { } // Push new subgraph context onto stack + this.subgraphStack.push({ id, title, @@ -1159,17 +1252,135 @@ class FlowchartListener implements ParseTreeListener { } }; - // Extract text content from a text context - private extractTextContent(textCtx: any): string { - if (!textCtx || !textCtx.children) return ''; + // Extract text content from a text context and determine label type + private extractTextContent(textCtx: any): { text: string; type: string } { + if (!textCtx || !textCtx.children) return { text: '', type: 'text' }; let text = ''; + let hasMarkdown = false; + for (const child of textCtx.children) { if (child.getText) { - text += child.getText(); + const childText = child.getText(); + + // Check if this child is an MD_STR token + if (child.symbol && child.symbol.type) { + // Get the token type name from the lexer + const tokenTypeName = this.getTokenTypeName(child.symbol.type); + if (tokenTypeName === 'MD_STR') { + hasMarkdown = true; + text += childText; + } else { + text += childText; + } + } else { + text += childText; + } } } - return text; + + return { + text: text, + type: hasMarkdown ? 'markdown' : 'text', + }; + } + + // Helper method to get token type name from token type number + private getTokenTypeName(tokenType: number): string { + // This is a simplified approach - in a full implementation, you'd use the lexer's vocabulary + // For now, we'll use a different approach to detect MD_STR tokens + return 'UNKNOWN'; + } + + // Extract text content and detect markdown strings by checking for MD_STR tokens + private extractTextWithType(textCtx: any): { text: string; type: string } { + if (!textCtx) return { text: '', type: 'text' }; + + const fullText = textCtx.getText(); + + // Check if the text came from specific context types to determine the label type + let detectedType = 'text'; // default + + if (textCtx.children && textCtx.children.length > 0) { + const firstChild = textCtx.children[0]; + const childConstructor = firstChild.constructor.name; + + if (childConstructor === 'StringLiteralContext') { + // This came from a quoted string in the grammar + detectedType = 'string'; + } + } + + // Detect different text types based on wrapping characters (for cases where quotes are preserved) + if (fullText.startsWith('`') && fullText.endsWith('`') && fullText.length > 2) { + // Markdown strings: "`text`" + const strippedText = fullText.slice(1, -1); + + return { + text: strippedText, + type: 'markdown', + }; + } else if (fullText.startsWith('"') && fullText.endsWith('"') && fullText.length > 2) { + // Quoted strings: "text" (fallback case) + const strippedText = fullText.slice(1, -1); + + return { + text: strippedText, + type: 'string', + }; + } + + // Use the detected type from context analysis + return { + text: fullText, + type: detectedType, + }; + } + + // Check if a text context contains markdown by examining the lexer tokens + private checkForMarkdownInContext(textCtx: any): boolean { + // Walk through the token stream to find MD_STR tokens + if (!textCtx.start || !textCtx.stop) return false; + + const startIndex = textCtx.start.tokenIndex; + const stopIndex = textCtx.stop.tokenIndex; + + // Access the token stream from the parser context + // This is a more direct approach to check for MD_STR tokens + try { + const parser = textCtx.parser; + if (parser && parser.getTokenStream) { + const tokenStream = parser.getTokenStream(); + for (let i = startIndex; i <= stopIndex; i++) { + const token = tokenStream.get(i); + if (token && token.type) { + // Check if this token type corresponds to MD_STR + // MD_STR should be token type that comes after MD_STRING_START + const tokenText = token.text; + if (tokenText && !tokenText.includes('`') && !tokenText.includes('"')) { + // This might be the content of an MD_STR token + // Check if there are backticks around this token in the original input + const prevToken = i > 0 ? tokenStream.get(i - 1) : null; + const nextToken = tokenStream.get(i + 1); + + if (prevToken && nextToken) { + const prevText = prevToken.text || ''; + const nextText = nextToken.text || ''; + + // Look for the pattern: "`content`" where content is this token + if (prevText.includes('`') || nextText.includes('`')) { + return true; + } + } + } + } + } + } + } catch (error) { + // Fallback - if we can't access the token stream, return false + } + + return false; } // Handle arrow text (pipe-delimited edge text) @@ -1184,12 +1395,13 @@ class FlowchartListener implements ParseTreeListener { const child = children[i]; if (child.constructor.name === 'TextContext') { // Store the arrow text for use by the parent link rule - this.currentArrowText = this.extractTextContent(child); + const textWithType = this.extractTextWithType(child); + this.currentArrowText = textWithType.text; break; } } } - } catch (error) { + } catch (_error) { // Error handling - silently continue for now } }; @@ -1407,12 +1619,8 @@ class FlowchartListener implements ParseTreeListener { }; exitShapeDataContent = (_ctx: any) => { - try { - // Shape data content is collected and processed when used - // The actual processing happens in vertex statement handlers - } catch (_error) { - // Error handling for shape data content processing - } + // Shape data content is collected and processed when used + // The actual processing happens in vertex statement handlers }; } @@ -1469,7 +1677,20 @@ class ANTLRFlowParser { const parser = new ANTLRFlowParser(); // Export in the format expected by the existing code -export default { +const exportedParser = { parse: (input: string) => parser.parse(input), parser: parser, + yy: null as any, // This will be set by the test setup }; + +// Make sure the parser uses the external yy when available +Object.defineProperty(exportedParser, 'yy', { + get() { + return parser.yy; + }, + set(value) { + parser.yy = value; + }, +}); + +export default exportedParser; diff --git a/packages/mermaid/src/diagrams/flowchart/parser/antlr/debug-tokenizer.cjs b/packages/mermaid/src/diagrams/flowchart/parser/antlr/debug-tokenizer.cjs new file mode 100644 index 000000000..49412abba --- /dev/null +++ b/packages/mermaid/src/diagrams/flowchart/parser/antlr/debug-tokenizer.cjs @@ -0,0 +1,15 @@ +const { CharStream } = require('antlr4ng'); +const { FlowLexer } = require('./generated/FlowLexer.ts'); + +const input = 'D@{ shape: rounded }'; +console.log('Input:', input); + +const chars = CharStream.fromString(input); +const lexer = new FlowLexer(chars); +const tokens = lexer.getAllTokens(); + +console.log('Tokens:'); +for (let i = 0; i < tokens.length; i++) { + const token = tokens[i]; + console.log(` [${i}] Type: ${token.type}, Text: '${token.text}', Channel: ${token.channel}`); +} diff --git a/test-backslash.js b/test-backslash.js new file mode 100644 index 000000000..3f3a9ab91 --- /dev/null +++ b/test-backslash.js @@ -0,0 +1,26 @@ +// Test backslash character parsing +const flow = require('./packages/mermaid/src/diagrams/flowchart/flowDb.ts'); + +// Set up ANTLR parser +process.env.USE_ANTLR_PARSER = 'true'; +const antlrParser = require('./packages/mermaid/src/diagrams/flowchart/parser/antlr/antlr-parser.ts'); + +try { + console.log('Testing backslash character: \\'); + + // Test the problematic input + const input = 'graph TD; \\ --> A'; + console.log('Input:', input); + + // Parse with ANTLR + const result = antlrParser.parse(input); + console.log('Parse result:', result); + + // Check vertices + const vertices = flow.getVertices(); + console.log('Vertices:', vertices); + console.log('Backslash vertex:', vertices.get('\\')); + +} catch (error) { + console.error('Error:', error); +}