WIP - fixing grammar added CircleNode, string handling

This commit is contained in:
Knut Sveidqvist
2025-04-19 07:14:05 -04:00
parent 6bcfb4df3a
commit d2ce80be10
5 changed files with 158 additions and 112 deletions

View File

@@ -1,94 +1,127 @@
/**
* Mindmap grammar for Langium
* Converted from mermaid's jison grammar
/** mermaid
* https://knsv.github.io/mermaid
* (c) 2015 Knut Sveidqvist
* MIT license.
*/
grammar Mindmap
%lex
// Entry rule - equivalent to the 'start' rule in jison
entry MindmapDocument:
// The document starts with the 'mindmap' keyword
(spaceLines+=SPACELINE)*
'mindmap' (NL)?
(documentContent=DocumentContent)?;
%options case-insensitive
// Document contains multiple statements separated by newlines
DocumentContent:
statements+=Statement (stop+=Stop statements+=Statement)* (stop+=Stop)?;
%{
// Pre-lexer code can go here
%}
%x NODE
%x NSTR
%x NSTR2
%x ICON
%x CLASS
// A stop is a newline, EOF, or a spaceline - used to separate statements
Stop:
NL | EOF | SPACELINE;
%%
// Statements can be nodes, icons, classes, or empty lines
Statement:
// The whitespace prefix determines nesting level in the mindmap
(indent=INDENT)? (
node=Node | // A node in the mindmap
icon=IconDecoration | // Icon decoration for a node
cssClass=ClassDecoration // CSS class for a node
) |
SPACELINE; // Empty or comment lines
\s*\%\%.* {yy.getLogger().trace('Found comment',yytext); return 'SPACELINE';}
// \%\%[^\n]*\n /* skip comments */
"mindmap" return 'MINDMAP';
":::" { this.begin('CLASS'); }
<CLASS>.+ { this.popState();return 'CLASS'; }
<CLASS>\n { this.popState();}
// [\s]*"::icon(" { this.begin('ICON'); }
"::icon(" { yy.getLogger().trace('Begin icon');this.begin('ICON'); }
[\s]+[\n] {yy.getLogger().trace('SPACELINE');return 'SPACELINE' /* skip all whitespace */ ;}
[\n]+ return 'NL';
<ICON>[^\)]+ { return 'ICON'; }
<ICON>\) {yy.getLogger().trace('end icon');this.popState();}
"-)" { yy.getLogger().trace('Exploding node'); this.begin('NODE');return 'NODE_DSTART'; }
"(-" { yy.getLogger().trace('Cloud'); this.begin('NODE');return 'NODE_DSTART'; }
"))" { yy.getLogger().trace('Explosion Bang'); this.begin('NODE');return 'NODE_DSTART'; }
")" { yy.getLogger().trace('Cloud Bang'); this.begin('NODE');return 'NODE_DSTART'; }
"((" { this.begin('NODE');return 'NODE_DSTART'; }
"{{" { this.begin('NODE');return 'NODE_DSTART'; }
"(" { this.begin('NODE');return 'NODE_DSTART'; }
"[" { this.begin('NODE');return 'NODE_DSTART'; }
[\s]+ return 'SPACELIST' /* skip all whitespace */ ;
// !(-\() return 'NODE_ID';
[^\(\[\n\)\{\}]+ return 'NODE_ID';
<<EOF>> return 'EOF';
<NODE>["][`] { this.begin("NSTR2");}
<NSTR2>[^`"]+ { return "NODE_DESCR";}
<NSTR2>[`]["] { this.popState();}
<NODE>["] { yy.getLogger().trace('Starting NSTR');this.begin("NSTR");}
<NSTR>[^"]+ { yy.getLogger().trace('description:', yytext); return "NODE_DESCR";}
<NSTR>["] {this.popState();}
<NODE>[\)]\) {this.popState();yy.getLogger().trace('node end ))');return "NODE_DEND";}
<NODE>[\)] {this.popState();yy.getLogger().trace('node end )');return "NODE_DEND";}
<NODE>[\]] {this.popState();yy.getLogger().trace('node end ...',yytext);return "NODE_DEND";}
<NODE>"}}" {this.popState();yy.getLogger().trace('node end ((');return "NODE_DEND";}
<NODE>"(-" {this.popState();yy.getLogger().trace('node end (-');return "NODE_DEND";}
<NODE>"-)" {this.popState();yy.getLogger().trace('node end (-');return "NODE_DEND";}
<NODE>"((" {this.popState();yy.getLogger().trace('node end ((');return "NODE_DEND";}
<NODE>"(" {this.popState();yy.getLogger().trace('node end ((');return "NODE_DEND";}
<NODE>[^\)\]\(\}]+ { yy.getLogger().trace('Long description:', yytext); return 'NODE_DESCR';}
<NODE>.+(?!\(\() { yy.getLogger().trace('Long description:', yytext); return 'NODE_DESCR';}
// [\[] return 'NODE_START';
// .+ return 'TXT' ;
// A node can be either simple (just ID) or complex (with description)
Node:
SimpleNode | ComplexNode;
/lex
// Simple node is just an identifier
SimpleNode:
id=NODE_ID;
%start start
// Complex node has a description enclosed in brackets, parentheses, etc.
ComplexNode:
// Optional ID followed by a description with delimiters
(id=NODE_ID)? start=NODE_DSTART description=NODE_DESCR end=NODE_DEND;
%% /* language grammar */
// Icon decoration for nodes
IconDecoration:
'::icon(' name=ICON ')';
start
// %{ : info document 'EOF' { return yy; } }
: mindMap
| spaceLines mindMap
;
// CSS class decoration for nodes
ClassDecoration:
':::' name=CLASS;
spaceLines
: SPACELINE
| spaceLines SPACELINE
| spaceLines NL
;
// Hidden terminal rules (comments, whitespace that should be ignored during parsing)
hidden terminal WS: /[ \t]+/;
mindMap
: MINDMAP document { return yy; }
| MINDMAP NL document { return yy; }
;
// Terminal rules (lexer rules)
terminal INDENT: /[ \t]+/;
terminal SPACELINE: /\s*\%\%.*|[ \t]+\n/;
terminal NL: /\n+/;
terminal EOF: /$/;
stop
: NL {yy.getLogger().trace('Stop NL ');}
| EOF {yy.getLogger().trace('Stop EOF ');}
| SPACELINE
| stop NL {yy.getLogger().trace('Stop NL2 ');}
| stop EOF {yy.getLogger().trace('Stop EOF2 ');}
;
document
: document statement stop
| statement stop
;
// Node related terminals with refined regex patterns to match the jison lexer
terminal NODE_ID: /[^\(\[\n\)\{\}]+/;
terminal NODE_DSTART: /\(\(|\{\{|\(|\[|\-\)|\(\-|\)\)|\)/;
terminal NODE_DEND: /\)\)|\}\}|\)|\]|\(\-|\-\)|\(\(/;
terminal NODE_DESCR: /[^"\)`\]]+/;
terminal ICON: /[^\)]+/;
terminal CLASS: /[^\n]+/;
statement
: SPACELIST node { yy.getLogger().info('Node: ',$2.id);yy.addNode($1.length, $2.id, $2.descr, $2.type); }
| SPACELIST ICON { yy.getLogger().trace('Icon: ',$2);yy.decorateNode({icon: $2}); }
| SPACELIST CLASS { yy.decorateNode({class: $2}); }
| SPACELINE { yy.getLogger().trace('SPACELIST');}
| node { yy.getLogger().trace('Node: ',$1.id);yy.addNode(0, $1.id, $1.descr, $1.type); }
| ICON { yy.decorateNode({icon: $1}); }
| CLASS { yy.decorateNode({class: $1}); }
| SPACELIST
;
// We also need to implement these semantic actions from the jison grammar:
// - addNode(level, id, description, type)
// - decorateNode({icon: iconName})
// - decorateNode({class: className})
// - getType(startDelimiter, endDelimiter)
/**
* Interface for a MindmapNode.
* This represents the AST node for a mindmap node.
*/
interface MindmapNode {
id: string;
description?: string;
type: NodeType;
level: number; // Indentation level (derived from the INDENT token)
icon?: string;
cssClass?: string;
children?: MindmapNode[];
}
/**
* The different node types in mindmap based on delimiters.
* This corresponds to the yy.getType() function in the jison grammar.
*/
type NodeType = 'DEFAULT' | 'CIRCLE' | 'CLOUD' | 'BANG' | 'HEXAGON' | 'ROUND';
node
:nodeWithId
|nodeWithoutId
;
nodeWithoutId
: NODE_DSTART NODE_DESCR NODE_DEND
{ yy.getLogger().trace("node found ..", $1); $$ = { id: $2, descr: $2, type: yy.getType($1, $3) }; }
;
nodeWithId
: NODE_ID { $$ = { id: $1, descr: $1, type: yy.nodeType.DEFAULT }; }
| NODE_ID NODE_DSTART NODE_DESCR NODE_DEND
{ yy.getLogger().trace("node found ..", $1); $$ = { id: $1, descr: $3, type: yy.getType($2, $4) }; }
;
%%

View File

@@ -9,8 +9,10 @@ entry MindmapDoc:
(MindmapRows+=MindmapRow)*;
MindmapRow:
// indent=(INDENTATION | '0') item=Item (terminator=NL)?;
(indent=INDENTATION)? item=Item (terminator=NL)?;
Item:
Node | IconDecoration | ClassDecoration;
@@ -20,7 +22,9 @@ Node:
// Specifically handle double parentheses case - highest priority
CircleNode:
id=ID '((' desc=(ID | STRING) '))';
id=ID desc=(CIRCLE_STR);
// id=ID '((' desc=(CIRCLE_STR) '))';
// id=ID '((' desc=(ID|STRING) '))';
// Handle other complex node variants
OtherComplex:
@@ -49,6 +53,9 @@ terminal CLASS_KEYWORD: ':::';
// Basic token types
terminal ID: /[a-zA-Z0-9_\-\.\/]+/;
// terminal CIRCLE_STR: /[\s\S]*?\)\)/;
terminal CIRCLE_STR: /\(\(([\s\S]*?)\)\)/;
// terminal CIRCLE_STR: /(?!\(\()[\s\S]+?(?!\(\()/;
terminal STRING: /"[^"]*"|'[^']*'/;
terminal INDENTATION: /[ \t]{2,}/; // Two or more spaces/tabs for indentation
terminal NL: /\r?\n/;

View File

@@ -14,7 +14,7 @@ import {
import { MermaidGeneratedSharedModule, MindmapGeneratedModule } from '../generated/module.js';
import { MindmapTokenBuilder } from './tokenBuilder.js';
import { CommonValueConverter } from '../common/valueConverter.js';
import { MindmapValueConverter } from './valueConverter.js';
/**
* Declaration of `Mindmap` services.
@@ -22,7 +22,7 @@ import { CommonValueConverter } from '../common/valueConverter.js';
interface MindmapAddedServices {
parser: {
TokenBuilder: MindmapTokenBuilder;
ValueConverter: CommonValueConverter;
ValueConverter: MindmapValueConverter;
};
}
@@ -41,7 +41,7 @@ export const MindmapModule: Module<
> = {
parser: {
TokenBuilder: () => new MindmapTokenBuilder(),
ValueConverter: () => new CommonValueConverter(),
ValueConverter: () => new MindmapValueConverter(),
},
};

View File

@@ -0,0 +1,21 @@
import type { CstNode, GrammarAST, ValueType } from 'langium';
import { AbstractMermaidValueConverter } from '../common/index.js';
export class MindmapValueConverter extends AbstractMermaidValueConverter {
protected runCustomConverter(
rule: GrammarAST.AbstractRule,
input: string,
_cstNode: CstNode
): ValueType | undefined {
console.debug('MermaidValueConverter', rule.name, input);
if (rule.name === 'CIRCLE_STR') {
return input.replace('((', '').replace('))', '').trim();
} else if (rule.name === 'ARCH_TEXT_ICON') {
return input.replace(/["()]/g, '');
} else if (rule.name === 'ARCH_TITLE') {
return input.replace(/[[\]]/g, '').trim();
}
return undefined;
}
}

View File

@@ -50,37 +50,22 @@ describe('MindMap Parser Tests', () => {
expect(r1.$type).toBe('MindmapRow');
const node1 = r1.item as CircleNode;
console.debug('NODE1:', node1);
expect(node1.$type).toBe('CircleNode');
expect(result.value.rows[1].element.ID).toBe('Root');
expect(result.value.rows[1].element.desc).toBe('Root');
expect(Object.keys(result.value.rows[1].element)).toBe('root');
expect(result.value.rows[1].indent).toBe('indent');
expect(Object.keys(result.value.rows[1].element)).toBe(true);
expect(result.value.rows[1].element.id).toBe('SimpleNode');
expect(node1.id).toBe('child1');
expect(node1.desc).toBe('Child 1');
// expect(Object.keys(r1)).toBe(2);
// Temporarily commenting out failing assertions
// expect(result.successful).toBe(true);
// Check that there are 4 rows: mindmap, root, child1, child2, grandchild
expect(result.value.rows.length).toBe(5);
// Check that the first statement is the mindmap
expect(result.value.rows[0].type).toBe('mindmap');
// Check that the second statement is the root
expect(result.value.rows[1].type.type).toBe('circle');
expect(result.value.rows[1].text).toBe('Root');
expect(result.value.rows[1].depth).toBe(0);
// Check that the third statement is the first child
expect(result.value.rows[2].type.type).toBe('circle');
expect(result.value.rows[2].text).toBe('Child 1');
expect(result.value.rows[2].depth).toBe(1);
// Check that the fourth statement is the second child
expect(result.value.rows[3].type.type).toBe('circle');
expect(result.value.rows[3].text).toBe('Child 2');
expect(result.value.rows[3].depth).toBe(1);
// Check that the fifth statement is the grandchild
expect(result.value.rows[4].type.type).toBe('circle');
expect(result.value.rows[4].text).toBe('Grand Child');
expect(result.value.rows[4].depth).toBe(2);
const child2 = rows[2].item as CircleNode;
// expect(result.value.rows[1].indent).toBe('indent');
// expect(Object.keys(node1)).toBe(true);
expect(child2.id).toBe('child2');
expect(child2.desc).toBe('Child 2');
const grandChild = rows[3].item as CircleNode;
// expect(result.value.rows[1].indent).toBe('indent');
// expect(Object.keys(node1)).toBe(true);
expect(grandChild.id).toBe('grandchild');
expect(grandChild.desc).toBe('Grand Child');
});
});