From d2ce80be1089820e69a3acdcb7fed6d489306b26 Mon Sep 17 00:00:00 2001 From: Knut Sveidqvist Date: Sat, 19 Apr 2025 07:14:05 -0400 Subject: [PATCH] WIP - fixing grammar added CircleNode, string handling --- .../src/diagrams/mindmap/parser/mindmap.jison | 189 ++++++++++-------- .../src/language/mindmap/mindmap.langium | 11 +- .../parser/src/language/mindmap/module.ts | 6 +- .../src/language/mindmap/valueConverter.ts | 21 ++ packages/parser/tests/mindmap.test.ts | 43 ++-- 5 files changed, 158 insertions(+), 112 deletions(-) create mode 100644 packages/parser/src/language/mindmap/valueConverter.ts diff --git a/packages/mermaid/src/diagrams/mindmap/parser/mindmap.jison b/packages/mermaid/src/diagrams/mindmap/parser/mindmap.jison index 86dacdb06..afd5e2300 100644 --- a/packages/mermaid/src/diagrams/mindmap/parser/mindmap.jison +++ b/packages/mermaid/src/diagrams/mindmap/parser/mindmap.jison @@ -1,94 +1,127 @@ -/** - * Mindmap grammar for Langium - * Converted from mermaid's jison grammar +/** mermaid + * https://knsv.github.io/mermaid + * (c) 2015 Knut Sveidqvist + * MIT license. */ -grammar Mindmap +%lex -// Entry rule - equivalent to the 'start' rule in jison -entry MindmapDocument: - // The document starts with the 'mindmap' keyword - (spaceLines+=SPACELINE)* - 'mindmap' (NL)? - (documentContent=DocumentContent)?; +%options case-insensitive -// Document contains multiple statements separated by newlines -DocumentContent: - statements+=Statement (stop+=Stop statements+=Statement)* (stop+=Stop)?; +%{ + // Pre-lexer code can go here +%} +%x NODE +%x NSTR +%x NSTR2 +%x ICON +%x CLASS -// A stop is a newline, EOF, or a spaceline - used to separate statements -Stop: - NL | EOF | SPACELINE; +%% -// Statements can be nodes, icons, classes, or empty lines -Statement: - // The whitespace prefix determines nesting level in the mindmap - (indent=INDENT)? ( - node=Node | // A node in the mindmap - icon=IconDecoration | // Icon decoration for a node - cssClass=ClassDecoration // CSS class for a node - ) | - SPACELINE; // Empty or comment lines +\s*\%\%.* {yy.getLogger().trace('Found comment',yytext); return 'SPACELINE';} +// \%\%[^\n]*\n /* skip comments */ +"mindmap" return 'MINDMAP'; +":::" { this.begin('CLASS'); } +.+ { this.popState();return 'CLASS'; } +\n { this.popState();} +// [\s]*"::icon(" { this.begin('ICON'); } +"::icon(" { yy.getLogger().trace('Begin icon');this.begin('ICON'); } +[\s]+[\n] {yy.getLogger().trace('SPACELINE');return 'SPACELINE' /* skip all whitespace */ ;} +[\n]+ return 'NL'; +[^\)]+ { return 'ICON'; } +\) {yy.getLogger().trace('end icon');this.popState();} +"-)" { yy.getLogger().trace('Exploding node'); this.begin('NODE');return 'NODE_DSTART'; } +"(-" { yy.getLogger().trace('Cloud'); this.begin('NODE');return 'NODE_DSTART'; } +"))" { yy.getLogger().trace('Explosion Bang'); this.begin('NODE');return 'NODE_DSTART'; } +")" { yy.getLogger().trace('Cloud Bang'); this.begin('NODE');return 'NODE_DSTART'; } +"((" { this.begin('NODE');return 'NODE_DSTART'; } +"{{" { this.begin('NODE');return 'NODE_DSTART'; } +"(" { this.begin('NODE');return 'NODE_DSTART'; } +"[" { this.begin('NODE');return 'NODE_DSTART'; } +[\s]+ return 'SPACELIST' /* skip all whitespace */ ; +// !(-\() return 'NODE_ID'; +[^\(\[\n\)\{\}]+ return 'NODE_ID'; +<> return 'EOF'; +["][`] { this.begin("NSTR2");} +[^`"]+ { return "NODE_DESCR";} +[`]["] { this.popState();} +["] { yy.getLogger().trace('Starting NSTR');this.begin("NSTR");} +[^"]+ { yy.getLogger().trace('description:', yytext); return "NODE_DESCR";} +["] {this.popState();} +[\)]\) {this.popState();yy.getLogger().trace('node end ))');return "NODE_DEND";} +[\)] {this.popState();yy.getLogger().trace('node end )');return "NODE_DEND";} +[\]] {this.popState();yy.getLogger().trace('node end ...',yytext);return "NODE_DEND";} +"}}" {this.popState();yy.getLogger().trace('node end ((');return "NODE_DEND";} +"(-" {this.popState();yy.getLogger().trace('node end (-');return "NODE_DEND";} +"-)" {this.popState();yy.getLogger().trace('node end (-');return "NODE_DEND";} +"((" {this.popState();yy.getLogger().trace('node end ((');return "NODE_DEND";} +"(" {this.popState();yy.getLogger().trace('node end ((');return "NODE_DEND";} +[^\)\]\(\}]+ { yy.getLogger().trace('Long description:', yytext); return 'NODE_DESCR';} +.+(?!\(\() { yy.getLogger().trace('Long description:', yytext); return 'NODE_DESCR';} +// [\[] return 'NODE_START'; +// .+ return 'TXT' ; -// A node can be either simple (just ID) or complex (with description) -Node: - SimpleNode | ComplexNode; +/lex -// Simple node is just an identifier -SimpleNode: - id=NODE_ID; +%start start -// Complex node has a description enclosed in brackets, parentheses, etc. -ComplexNode: - // Optional ID followed by a description with delimiters - (id=NODE_ID)? start=NODE_DSTART description=NODE_DESCR end=NODE_DEND; +%% /* language grammar */ -// Icon decoration for nodes -IconDecoration: - '::icon(' name=ICON ')'; +start +// %{ : info document 'EOF' { return yy; } } + : mindMap + | spaceLines mindMap + ; -// CSS class decoration for nodes -ClassDecoration: - ':::' name=CLASS; +spaceLines + : SPACELINE + | spaceLines SPACELINE + | spaceLines NL + ; -// Hidden terminal rules (comments, whitespace that should be ignored during parsing) -hidden terminal WS: /[ \t]+/; +mindMap + : MINDMAP document { return yy; } + | MINDMAP NL document { return yy; } + ; -// Terminal rules (lexer rules) -terminal INDENT: /[ \t]+/; -terminal SPACELINE: /\s*\%\%.*|[ \t]+\n/; -terminal NL: /\n+/; -terminal EOF: /$/; +stop + : NL {yy.getLogger().trace('Stop NL ');} + | EOF {yy.getLogger().trace('Stop EOF ');} + | SPACELINE + | stop NL {yy.getLogger().trace('Stop NL2 ');} + | stop EOF {yy.getLogger().trace('Stop EOF2 ');} + ; +document + : document statement stop + | statement stop + ; -// Node related terminals with refined regex patterns to match the jison lexer -terminal NODE_ID: /[^\(\[\n\)\{\}]+/; -terminal NODE_DSTART: /\(\(|\{\{|\(|\[|\-\)|\(\-|\)\)|\)/; -terminal NODE_DEND: /\)\)|\}\}|\)|\]|\(\-|\-\)|\(\(/; -terminal NODE_DESCR: /[^"\)`\]]+/; -terminal ICON: /[^\)]+/; -terminal CLASS: /[^\n]+/; +statement + : SPACELIST node { yy.getLogger().info('Node: ',$2.id);yy.addNode($1.length, $2.id, $2.descr, $2.type); } + | SPACELIST ICON { yy.getLogger().trace('Icon: ',$2);yy.decorateNode({icon: $2}); } + | SPACELIST CLASS { yy.decorateNode({class: $2}); } + | SPACELINE { yy.getLogger().trace('SPACELIST');} + | node { yy.getLogger().trace('Node: ',$1.id);yy.addNode(0, $1.id, $1.descr, $1.type); } + | ICON { yy.decorateNode({icon: $1}); } + | CLASS { yy.decorateNode({class: $1}); } + | SPACELIST + ; -// We also need to implement these semantic actions from the jison grammar: -// - addNode(level, id, description, type) -// - decorateNode({icon: iconName}) -// - decorateNode({class: className}) -// - getType(startDelimiter, endDelimiter) -/** - * Interface for a MindmapNode. - * This represents the AST node for a mindmap node. - */ -interface MindmapNode { - id: string; - description?: string; - type: NodeType; - level: number; // Indentation level (derived from the INDENT token) - icon?: string; - cssClass?: string; - children?: MindmapNode[]; -} -/** - * The different node types in mindmap based on delimiters. - * This corresponds to the yy.getType() function in the jison grammar. - */ -type NodeType = 'DEFAULT' | 'CIRCLE' | 'CLOUD' | 'BANG' | 'HEXAGON' | 'ROUND'; +node + :nodeWithId + |nodeWithoutId + ; + +nodeWithoutId + : NODE_DSTART NODE_DESCR NODE_DEND + { yy.getLogger().trace("node found ..", $1); $$ = { id: $2, descr: $2, type: yy.getType($1, $3) }; } + ; + +nodeWithId + : NODE_ID { $$ = { id: $1, descr: $1, type: yy.nodeType.DEFAULT }; } + | NODE_ID NODE_DSTART NODE_DESCR NODE_DEND + { yy.getLogger().trace("node found ..", $1); $$ = { id: $1, descr: $3, type: yy.getType($2, $4) }; } + ; +%% diff --git a/packages/parser/src/language/mindmap/mindmap.langium b/packages/parser/src/language/mindmap/mindmap.langium index e70d91632..d154ecefd 100644 --- a/packages/parser/src/language/mindmap/mindmap.langium +++ b/packages/parser/src/language/mindmap/mindmap.langium @@ -9,7 +9,9 @@ entry MindmapDoc: (MindmapRows+=MindmapRow)*; MindmapRow: - (indent=INDENTATION)? item=Item (terminator=NL)?; + // indent=(INDENTATION | '0') item=Item (terminator=NL)?; + (indent=INDENTATION)? item=Item (terminator=NL)?; + Item: Node | IconDecoration | ClassDecoration; @@ -20,7 +22,9 @@ Node: // Specifically handle double parentheses case - highest priority CircleNode: - id=ID '((' desc=(ID | STRING) '))'; + id=ID desc=(CIRCLE_STR); + // id=ID '((' desc=(CIRCLE_STR) '))'; + // id=ID '((' desc=(ID|STRING) '))'; // Handle other complex node variants OtherComplex: @@ -49,6 +53,9 @@ terminal CLASS_KEYWORD: ':::'; // Basic token types terminal ID: /[a-zA-Z0-9_\-\.\/]+/; +// terminal CIRCLE_STR: /[\s\S]*?\)\)/; +terminal CIRCLE_STR: /\(\(([\s\S]*?)\)\)/; +// terminal CIRCLE_STR: /(?!\(\()[\s\S]+?(?!\(\()/; terminal STRING: /"[^"]*"|'[^']*'/; terminal INDENTATION: /[ \t]{2,}/; // Two or more spaces/tabs for indentation terminal NL: /\r?\n/; diff --git a/packages/parser/src/language/mindmap/module.ts b/packages/parser/src/language/mindmap/module.ts index 8c56d31f4..48ca72e6d 100644 --- a/packages/parser/src/language/mindmap/module.ts +++ b/packages/parser/src/language/mindmap/module.ts @@ -14,7 +14,7 @@ import { import { MermaidGeneratedSharedModule, MindmapGeneratedModule } from '../generated/module.js'; import { MindmapTokenBuilder } from './tokenBuilder.js'; -import { CommonValueConverter } from '../common/valueConverter.js'; +import { MindmapValueConverter } from './valueConverter.js'; /** * Declaration of `Mindmap` services. @@ -22,7 +22,7 @@ import { CommonValueConverter } from '../common/valueConverter.js'; interface MindmapAddedServices { parser: { TokenBuilder: MindmapTokenBuilder; - ValueConverter: CommonValueConverter; + ValueConverter: MindmapValueConverter; }; } @@ -41,7 +41,7 @@ export const MindmapModule: Module< > = { parser: { TokenBuilder: () => new MindmapTokenBuilder(), - ValueConverter: () => new CommonValueConverter(), + ValueConverter: () => new MindmapValueConverter(), }, }; diff --git a/packages/parser/src/language/mindmap/valueConverter.ts b/packages/parser/src/language/mindmap/valueConverter.ts new file mode 100644 index 000000000..46fa2c025 --- /dev/null +++ b/packages/parser/src/language/mindmap/valueConverter.ts @@ -0,0 +1,21 @@ +import type { CstNode, GrammarAST, ValueType } from 'langium'; + +import { AbstractMermaidValueConverter } from '../common/index.js'; + +export class MindmapValueConverter extends AbstractMermaidValueConverter { + protected runCustomConverter( + rule: GrammarAST.AbstractRule, + input: string, + _cstNode: CstNode + ): ValueType | undefined { + console.debug('MermaidValueConverter', rule.name, input); + if (rule.name === 'CIRCLE_STR') { + return input.replace('((', '').replace('))', '').trim(); + } else if (rule.name === 'ARCH_TEXT_ICON') { + return input.replace(/["()]/g, ''); + } else if (rule.name === 'ARCH_TITLE') { + return input.replace(/[[\]]/g, '').trim(); + } + return undefined; + } +} diff --git a/packages/parser/tests/mindmap.test.ts b/packages/parser/tests/mindmap.test.ts index 678c00019..364717447 100644 --- a/packages/parser/tests/mindmap.test.ts +++ b/packages/parser/tests/mindmap.test.ts @@ -50,37 +50,22 @@ describe('MindMap Parser Tests', () => { expect(r1.$type).toBe('MindmapRow'); const node1 = r1.item as CircleNode; - console.debug('NODE1:', node1); expect(node1.$type).toBe('CircleNode'); - expect(result.value.rows[1].element.ID).toBe('Root'); - expect(result.value.rows[1].element.desc).toBe('Root'); - expect(Object.keys(result.value.rows[1].element)).toBe('root'); - expect(result.value.rows[1].indent).toBe('indent'); - expect(Object.keys(result.value.rows[1].element)).toBe(true); - expect(result.value.rows[1].element.id).toBe('SimpleNode'); + expect(node1.id).toBe('child1'); + expect(node1.desc).toBe('Child 1'); + // expect(Object.keys(r1)).toBe(2); - // Temporarily commenting out failing assertions - // expect(result.successful).toBe(true); - // Check that there are 4 rows: mindmap, root, child1, child2, grandchild - expect(result.value.rows.length).toBe(5); - // Check that the first statement is the mindmap - expect(result.value.rows[0].type).toBe('mindmap'); - // Check that the second statement is the root - expect(result.value.rows[1].type.type).toBe('circle'); - expect(result.value.rows[1].text).toBe('Root'); - expect(result.value.rows[1].depth).toBe(0); - // Check that the third statement is the first child - expect(result.value.rows[2].type.type).toBe('circle'); - expect(result.value.rows[2].text).toBe('Child 1'); - expect(result.value.rows[2].depth).toBe(1); - // Check that the fourth statement is the second child - expect(result.value.rows[3].type.type).toBe('circle'); - expect(result.value.rows[3].text).toBe('Child 2'); - expect(result.value.rows[3].depth).toBe(1); - // Check that the fifth statement is the grandchild - expect(result.value.rows[4].type.type).toBe('circle'); - expect(result.value.rows[4].text).toBe('Grand Child'); - expect(result.value.rows[4].depth).toBe(2); + const child2 = rows[2].item as CircleNode; + // expect(result.value.rows[1].indent).toBe('indent'); + // expect(Object.keys(node1)).toBe(true); + expect(child2.id).toBe('child2'); + expect(child2.desc).toBe('Child 2'); + + const grandChild = rows[3].item as CircleNode; + // expect(result.value.rows[1].indent).toBe('indent'); + // expect(Object.keys(node1)).toBe(true); + expect(grandChild.id).toBe('grandchild'); + expect(grandChild.desc).toBe('Grand Child'); }); });