More robust type detection

This commit is contained in:
Knut Sveidqvist
2025-09-18 15:24:04 +02:00
parent 67f673c8ed
commit 631edc06ef
6 changed files with 73 additions and 24 deletions

View File

@@ -34,17 +34,33 @@ export const detectors: Record<string, DetectorRecord> = {};
* @returns A graph definition key
*/
export const detectType = function (text: string, config?: MermaidConfig): string {
// Strip header prelude (front matter, directives, comments, blank lines) only at the top
// Then detect based on the first significant keyword to avoid false positives in labels/strings
const headerlessText = stripHeaderPrelude(text);
const cleanedText = text
.replace(frontMatterRegex, '')
.replace(directiveRegex, '')
.replace(frontMatterRegex, '') // no-op after stripHeaderPrelude, but safe
.replace(directiveRegex, '') // defensive if any directive remains at the top
.replace(anyCommentRegex, '\n');
// Robust anchored check for sequence only (after header prelude);
// keep inside the loop so that detection before diagram registration still throws
if (detectors.flowchart.detector(headerlessText, config)) {
return 'flowchart';
}
if (detectors.sequence.detector(headerlessText, config)) {
return 'sequence';
}
if (detectors.classDiagram.detector(headerlessText, config)) {
return 'classDiagram';
}
if (detectors.class.detector(headerlessText, config)) {
return 'class';
}
// Fallback to registered detectors in order
for (const [key, { detector }] of Object.entries(detectors)) {
const diagram = detector(cleanedText, config);
const isSequence = /sequenceDiagram/.exec(cleanedText);
if (isSequence) {
return 'sequence';
}
if (diagram) {
return key;
}
@@ -55,6 +71,36 @@ export const detectType = function (text: string, config?: MermaidConfig): strin
);
};
// Remove header prelude (front matter, directives, comments, blank lines) from the start only
function stripHeaderPrelude(input: string): string {
let s = input;
// Remove leading BOM if present
s = s.replace(/^\uFEFF/, '');
// Remove Jekyll-style front matter at the very top
s = s.replace(frontMatterRegex, '');
// Iteratively remove top-of-file blocks: directives, comment lines, and blank lines
// - Directives: %%{ ... }%% possibly multiline
// - Comment lines starting with %% or #
// - Blank lines
const headerPattern = /^(?:\s*%%{[\S\s]*?}%{2}\s*|\s*%%.*\r?\n|\s*#.*\r?\n|\s*\r?\n)*/;
const before = s;
s = s.replace(headerPattern, '');
// If nothing changed, return; otherwise, there could be another front matter after directives (rare)
if (s === before) {
return s;
}
// One extra pass for safety (handles stacked front matter blocks or multiple directives)
s = s.replace(frontMatterRegex, '');
s = s.replace(headerPattern, '');
return s;
}
/**
* Registers lazy-loaded diagrams to Mermaid.
*

View File

@@ -114,5 +114,21 @@ describe('diagram-orchestration', () => {
)
).toBe('er');
});
it('should detect sequence/state even if config strings contain other diagram names', () => {
// sequenceDiagram with config string mentioning stateDiagram
expect(
detectType(
`---
title: Hello Title
config:
theme: base
themeVariables:
primaryColor: "#00ff00"
---
sequenceDiagram\nA->B: hi`
)
).toBe('sequence');
});
});
});

View File

@@ -28,7 +28,6 @@ import architecture from '../diagrams/architecture/architectureDetector.js';
import { registerLazyLoadedDiagrams } from './detectType.js';
import { registerDiagram } from './diagramAPI.js';
import { treemap } from '../diagrams/treemap/detector.js';
import { frontMatterRegex } from './regexes.js';
import '../type.d.ts';
let hasLoadedDiagrams = false;
@@ -69,21 +68,7 @@ export const addDiagrams = () => {
init: () => null, // no op
},
(text) => {
const trimmed = text.trimStart();
if (!trimmed.startsWith('---')) {
return false;
}
// If there is a valid YAML front matter block, and the remaining text starts
// with a sequence diagram header, let the sequence diagram handle it.
const m = trimmed.match(frontMatterRegex);
if (m) {
const rest = trimmed.slice(m[0].length).trimStart();
if (/^sequencediagram\b/i.test(rest)) {
return false;
}
}
// Otherwise, treat this as an invalid diagram beginning with '---'
return true;
return text.toLowerCase().trimStart().startsWith('---');
}
);

View File

@@ -3,7 +3,8 @@
// Note that JS doesn't support the "\A" anchor, which means we can't use
// multiline mode.
// Relevant YAML spec: https://yaml.org/spec/1.2.2/#914-explicit-documents
export const frontMatterRegex = /^-{3}\s*[\n\r](.*?)[\n\r]-{3}\s*[\n\r]+/s;
export const frontMatterRegex =
/^\uFEFF?[\t ]*-{3}[\t ]*\r?\n([\S\s]*?)\r?\n[\t ]*-{3}[\t ]*(?:\r?\n|$)/;
export const directiveRegex =
/%{2}{\s*(?:(\w+)\s*:|(\w+))\s*(?:(\w+)|((?:(?!}%{2}).|\r?\n)*))?\s*(?:}%{2})?/gi;

View File

@@ -1821,7 +1821,7 @@ Alice->Bob: Hello Bob, how are you?`;
expect(bounds.stopy).toBe(models.lastMessage().stopy + 10);
expect(msgs.every((v) => v.wrap)).toBe(true);
});
it('should handle YAML front matter before sequenceDiagram', async () => {
it('should handle YAML front matter before sequenceDiagram XXX12', async () => {
const str = `---
title: Front matter title
config: