diff --git a/packages/mermaid/package.json b/packages/mermaid/package.json index 1d9e12c83..fd10eb0d6 100644 --- a/packages/mermaid/package.json +++ b/packages/mermaid/package.json @@ -52,7 +52,7 @@ "postinstall": "pnpm antlr:generate", "checkCircle": "npx madge --circular ./src", "antlr:sequence:clean": "rimraf src/diagrams/sequence/parser/antlr/generated", - "antlr:sequence": "pnpm run antlr:sequence:clean && antlr4ng -Dlanguage=TypeScript -Xexact-output-dir -o src/diagrams/sequence/parser/antlr/generated src/diagrams/sequence/parser/antlr/SequenceLexer.g4 src/diagrams/sequence/parser/antlr/SequenceParser.g4", + "antlr:sequence": "pnpm run antlr:sequence:clean && antlr4ng -Dlanguage=TypeScript -Xexact-output-dir -lib src/diagrams/common/parser/antlr -o src/diagrams/sequence/parser/antlr/generated src/diagrams/sequence/parser/antlr/SequenceLexer.g4 src/diagrams/sequence/parser/antlr/SequenceParser.g4", "antlr:class:clean": "rimraf src/diagrams/class/parser/antlr/generated", "antlr:class": "pnpm run antlr:class:clean && antlr4ng -Dlanguage=TypeScript -Xexact-output-dir -o src/diagrams/class/parser/antlr/generated src/diagrams/class/parser/antlr/ClassLexer.g4 src/diagrams/class/parser/antlr/ClassParser.g4", "antlr:flowchart:clean": "rimraf src/diagrams/flowchart/parser/antlr/generated", diff --git a/packages/mermaid/src/diagram-api/regexes.ts b/packages/mermaid/src/diagram-api/regexes.ts index 66873be31..e63b9ee2e 100644 --- a/packages/mermaid/src/diagram-api/regexes.ts +++ b/packages/mermaid/src/diagram-api/regexes.ts @@ -3,7 +3,8 @@ // Note that JS doesn't support the "\A" anchor, which means we can't use // multiline mode. // Relevant YAML spec: https://yaml.org/spec/1.2.2/#914-explicit-documents -export const frontMatterRegex = /^\uFEFF?[\t ]*-{3}[\t ]*\r?\n([\S\s]*?)\r?\n-{3}[\t ]*(?:\r?\n|$)/; +export const frontMatterRegex = + /^\uFEFF?[\t ]*-{3}[\t ]*\r?\n([\S\s]*?)\r?\n {0,2}-{3}[\t ]*(?:\r?\n|$)/; export const directiveRegex = /%{2}{\s*(?:(\w+)\s*:|(\w+))\s*(?:(\w+)|((?:(?!}%{2}).|\r?\n)*))?\s*(?:}%{2})?/gi; diff --git a/packages/mermaid/src/diagrams/common/parser/antlr/HeaderCommon.g4 b/packages/mermaid/src/diagrams/common/parser/antlr/HeaderCommon.g4 new file mode 100644 index 000000000..91d20bc59 --- /dev/null +++ b/packages/mermaid/src/diagrams/common/parser/antlr/HeaderCommon.g4 @@ -0,0 +1,25 @@ +lexer grammar HeaderCommon; + +@members { + // headerMode is true until the diagram header keyword is seen + protected headerMode = true; + // Helper to disable header mode from delegator lexers on diagram start + protected disableHeaderMode(): void { this.headerMode = false; } +} + +// Header directives: only before the diagram header keyword has been seen +// Accept optional leading spaces/tabs on the line before the directive +HEADER_DIRECTIVE: { this.headerMode }? [ \t]* '%%{' .*? '}%%'; + +// YAML front matter (allowed only before the diagram header) +// Use a dedicated mode to consume until the closing '---' line +FRONTMATTER: { this.headerMode }? [ \t]* '---' [ \t]* ('\r'? '\n') -> pushMode(YAML_MODE); + +mode YAML_MODE; +YAML_END: [ \t]* '---' [ \t]* ('\r'? '\n') -> popMode, skip; +YAML_CONTENT: . -> skip; + +// Comments (skip) - simple, broad handling; rely on longest-match to keep HEADER_DIRECTIVE intact +HASH_COMMENT: '#' ~[\r\n]* -> skip; +PERCENT_COMMENT: '%%' ~[\r\n]* -> skip; + diff --git a/packages/mermaid/src/diagrams/sequence/parser/antlr/SequenceLexer.g4 b/packages/mermaid/src/diagrams/sequence/parser/antlr/SequenceLexer.g4 index a29f11507..e76fbf8ef 100644 --- a/packages/mermaid/src/diagrams/sequence/parser/antlr/SequenceLexer.g4 +++ b/packages/mermaid/src/diagrams/sequence/parser/antlr/SequenceLexer.g4 @@ -1,27 +1,17 @@ lexer grammar SequenceLexer; +import HeaderCommon; tokens { AS } -@members { - // headerMode is true until the diagram header (sequenceDiagram) is seen - private headerMode = true; -} -// Header directives: handle %%{ ... }%% only before the diagram header -// Accept optional leading spaces/tabs on the line before the directive -HEADER_DIRECTIVE: { this.headerMode }? [ \t]* '%%{' .*? '}%%'; - -// Comments (skip) - avoid consuming '%%{' which starts a directive -HASH_COMMENT: '#' ~[\r\n]* -> skip; -PERCENT_COMMENT1: '%%' ~['{'] ~[\r\n]* -> skip; -PERCENT_COMMENT2: ~[}] '%%' ~[\r\n]* -> skip; // Whitespace and newline -// YAML front matter (allowed before the diagram header) -FRONTMATTER: { this.headerMode }? [ \t]* '---' [ \t]* ('\r'? '\n') .*? ('\r'? '\n') [ \t]* '---' [ \t]* ('\r'? '\n'); NEWLINE: ('\r'? '\n')+; WS: [ \t]+ -> skip; +// Top-level comments (also defined in HeaderCommon, duplicated here to ensure availability post-header) +HASH_COMMENT_TOP: '#' ~[\r\n]* -> skip; +PERCENT_COMMENT_TOP: '%%' ~[\r\n]* -> skip; // Punctuation and simple symbols COMMA: ','; diff --git a/scripts/antlr-generate.mts b/scripts/antlr-generate.mts index 298394959..cfdfdd4ba 100644 --- a/scripts/antlr-generate.mts +++ b/scripts/antlr-generate.mts @@ -141,12 +141,19 @@ function generateAntlrFiles(grammar: GrammarInfo): void { cleanGeneratedDir(outputDir); ensureGeneratedDir(outputDir); + // Determine common header lib path for imported grammars + const cwd = process.cwd(); + const commonLibPath = cwd.endsWith('/packages/mermaid') + ? 'src/diagrams/common/parser/antlr' + : 'packages/mermaid/src/diagrams/common/parser/antlr'; + // Generate ANTLR files const command = [ 'antlr-ng', '-Dlanguage=TypeScript', '-l', '-v', + `--lib "${commonLibPath}"`, `-o "${outputDir}"`, `"${lexerFile}"`, `"${parserFile}"`,