Markdown Code Block Validation and Error Detection: Complete Guide for Automated Quality Assurance and Syntax Verification
Advanced Markdown code block validation and error detection systems enable automated quality assurance for technical documentation, ensuring code examples remain accurate, executable, and properly formatted throughout content lifecycles. By implementing sophisticated validation frameworks, syntax verification systems, and automated error detection mechanisms, technical teams can maintain documentation quality at scale while preventing common code block issues that compromise user experience and documentation reliability.
Why Master Code Block Validation and Error Detection?
Professional code block validation provides essential benefits for documentation quality:
- Accuracy Assurance: Automatically detect syntax errors, typos, and formatting issues in code examples
- Documentation Reliability: Ensure code blocks remain executable and accurate across content updates
- Quality Consistency: Maintain consistent code styling and formatting standards across large documentation sets
- Error Prevention: Catch common mistakes before they reach production documentation
- Automated Testing: Integrate code validation into continuous integration workflows for sustainable quality
Foundation Validation Principles
Comprehensive Code Block Detection
Building robust systems for identifying and categorizing code blocks across Markdown documents:
// code-block-detector.js - Advanced code block detection and categorization
class CodeBlockDetector {
constructor(options = {}) {
this.options = {
supportedLanguages: [
'javascript', 'typescript', 'python', 'java', 'csharp', 'cpp', 'c',
'go', 'rust', 'php', 'ruby', 'swift', 'kotlin', 'scala',
'html', 'css', 'scss', 'sass', 'less',
'json', 'xml', 'yaml', 'toml', 'ini',
'sql', 'mongodb', 'graphql',
'bash', 'shell', 'powershell', 'dockerfile',
'markdown', 'tex', 'r', 'matlab', 'julia'
],
customPatterns: {},
includeInlineCode: false,
preserveWhitespace: true,
detectLanguageHints: true,
...options
};
this.detectedBlocks = [];
this.statistics = {
totalBlocks: 0,
languageDistribution: new Map(),
syntaxErrors: [],
formatIssues: []
};
}
analyzeDocument(markdownContent) {
this.reset();
const lines = markdownContent.split('\n');
let currentBlock = null;
let lineNumber = 0;
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
lineNumber = i + 1;
// Detect fenced code blocks
if (this.isFencedCodeStart(line)) {
if (currentBlock) {
this.addValidationError(
'nested_code_block',
'Unclosed code block detected',
lineNumber,
currentBlock
);
}
currentBlock = this.createCodeBlock(line, lineNumber);
continue;
}
if (this.isFencedCodeEnd(line) && currentBlock) {
currentBlock.endLine = lineNumber;
currentBlock.content = currentBlock.content.join('\n');
this.processCodeBlock(currentBlock);
currentBlock = null;
continue;
}
if (currentBlock) {
currentBlock.content.push(line);
continue;
}
// Detect indented code blocks
if (this.isIndentedCode(line, lines, i)) {
const indentedBlock = this.extractIndentedBlock(lines, i);
this.processCodeBlock(indentedBlock);
i = indentedBlock.endLine - 1; // Skip processed lines
continue;
}
// Detect inline code if enabled
if (this.options.includeInlineCode) {
this.detectInlineCode(line, lineNumber);
}
}
// Check for unclosed code blocks
if (currentBlock) {
this.addValidationError(
'unclosed_code_block',
'Code block not properly closed',
currentBlock.startLine,
currentBlock
);
}
this.generateStatistics();
return this.getAnalysisResults();
}
isFencedCodeStart(line) {
return /^```[\w]*/.test(line.trim()) || /^~~~[\w]*/.test(line.trim());
}
isFencedCodeEnd(line) {
return /^```\s*$/.test(line.trim()) || /^~~~\s*$/.test(line.trim());
}
createCodeBlock(fenceLine, lineNumber) {
const match = fenceLine.trim().match(/^([`~]{3,})\s*(\w+)?\s*(.*)$/);
if (!match) {
this.addValidationError(
'invalid_fence',
'Invalid code fence syntax',
lineNumber,
{ line: fenceLine }
);
}
const [, fence, language, metadata] = match || ['', '```', '', ''];
return {
type: 'fenced',
language: language || 'text',
metadata: metadata,
fence: fence,
startLine: lineNumber,
endLine: null,
content: [],
rawContent: '',
validationResults: {
syntaxValid: null,
issues: [],
warnings: []
}
};
}
isIndentedCode(line, lines, index) {
// Must be indented by exactly 4 spaces or 1 tab
if (!/^( |\t)/.test(line) || line.trim() === '') {
return false;
}
// Check if previous line is blank (required for indented code)
const prevLine = index > 0 ? lines[index - 1] : '';
return prevLine.trim() === '';
}
extractIndentedBlock(lines, startIndex) {
const content = [];
let endIndex = startIndex;
for (let i = startIndex; i < lines.length; i++) {
const line = lines[i];
if (line.trim() === '') {
content.push('');
endIndex = i + 1;
continue;
}
if (/^( |\t)/.test(line)) {
content.push(line.substring(4)); // Remove indentation
endIndex = i + 1;
} else {
break;
}
}
return {
type: 'indented',
language: this.detectLanguageFromContent(content.join('\n')),
startLine: startIndex + 1,
endLine: endIndex,
content: content.join('\n'),
rawContent: content.join('\n'),
validationResults: {
syntaxValid: null,
issues: [],
warnings: []
}
};
}
detectInlineCode(line, lineNumber) {
const inlineMatches = line.matchAll(/`([^`]+)`/g);
for (const match of inlineMatches) {
const codeContent = match[1];
const startCol = match.index + 1;
this.detectedBlocks.push({
type: 'inline',
language: 'text',
startLine: lineNumber,
endLine: lineNumber,
startCol,
endCol: startCol + match[0].length,
content: codeContent,
rawContent: match[0],
validationResults: {
syntaxValid: null,
issues: [],
warnings: []
}
});
}
}
processCodeBlock(block) {
// Language detection improvements
if (block.language === 'text' || !block.language) {
block.language = this.detectLanguageFromContent(block.content);
}
// Normalize language names
block.language = this.normalizeLanguageName(block.language);
// Content validation
this.validateCodeContent(block);
// Add to detected blocks
this.detectedBlocks.push(block);
this.statistics.totalBlocks++;
// Update language statistics
const count = this.statistics.languageDistribution.get(block.language) || 0;
this.statistics.languageDistribution.set(block.language, count + 1);
}
detectLanguageFromContent(content) {
const patterns = {
javascript: [
/\b(function|const|let|var|=>\s*{|\.then\(|\.catch\(|require\(|import\s+.*from)\b/,
/\bconsole\.log\(/,
/\b(async|await)\b/
],
python: [
/\b(def|class|import|from|if\s+__name__\s*==|print\()\b/,
/:\s*$/m, // Colon at end of line
/\bself\./
],
java: [
/\b(public|private|protected|class|interface|extends|implements)\b/,
/\bSystem\.out\.println\(/,
/\bmain\(String\[\]\s+args\)/
],
csharp: [
/\b(using|namespace|public|private|class|interface|var)\b/,
/\bConsole\.WriteLine\(/,
/\b(string|int|double|bool|void)\s+\w+/
],
go: [
/\b(package|func|import|var|const|type|struct|interface)\b/,
/\bfmt\.Print/,
/\berr\s*:?=\s*/
],
rust: [
/\b(fn|let|mut|struct|enum|impl|trait|use|pub)\b/,
/\bprintln!\(/,
/\b(String|Vec|Option|Result)/
],
php: [
/^<\?php/,
/\$\w+/,
/\b(echo|print|var_dump|function)\b/
],
ruby: [
/\b(def|class|module|end|puts|require)\b/,
/\b@\w+\b/, // Instance variables
/\.\w+\s*do\s*\|/
],
sql: [
/\b(SELECT|FROM|WHERE|INSERT|UPDATE|DELETE|CREATE|ALTER|DROP)\b/i,
/\b(JOIN|INNER|LEFT|RIGHT|OUTER)\b/i
],
html: [
/<\/?[a-zA-Z][^>]*>/,
/<!DOCTYPE/i,
/&\w+;/
],
css: [
/\{[^}]*\}/,
/[\w-]+\s*:\s*[^;]+;/,
/@media|@import|@keyframes/
],
json: [
/^\s*[\[{][\s\S]*[\]}]\s*$/,
/"\w+":\s*["\d\[\{]/
],
yaml: [
/^\s*\w+:\s*$/m,
/^\s*-\s+\w+/m,
/^---\s*$/m
],
dockerfile: [
/^FROM\s+/m,
/^RUN\s+/m,
/^COPY\s+/m
],
bash: [
/^#!\/bin\/(bash|sh)/,
/\$\{?\w+\}?/,
/\b(echo|grep|sed|awk|if|then|fi|for|do|done)\b/
]
};
for (const [language, regexes] of Object.entries(patterns)) {
for (const regex of regexes) {
if (regex.test(content)) {
return language;
}
}
}
return 'text';
}
normalizeLanguageName(language) {
const normalizations = {
'js': 'javascript',
'ts': 'typescript',
'py': 'python',
'rb': 'ruby',
'sh': 'bash',
'shell': 'bash',
'yml': 'yaml',
'htm': 'html',
'jsx': 'javascript',
'tsx': 'typescript'
};
return normalizations[language?.toLowerCase()] || language?.toLowerCase() || 'text';
}
validateCodeContent(block) {
const validators = {
javascript: this.validateJavaScript.bind(this),
typescript: this.validateTypeScript.bind(this),
python: this.validatePython.bind(this),
json: this.validateJSON.bind(this),
yaml: this.validateYAML.bind(this),
html: this.validateHTML.bind(this),
css: this.validateCSS.bind(this),
sql: this.validateSQL.bind(this)
};
const validator = validators[block.language];
if (validator) {
try {
const result = validator(block.content, block);
block.validationResults = result;
if (!result.syntaxValid) {
this.statistics.syntaxErrors.push({
block,
issues: result.issues
});
}
} catch (error) {
block.validationResults = {
syntaxValid: false,
issues: [`Validation failed: ${error.message}`],
warnings: []
};
}
} else {
// Basic validation for unsupported languages
block.validationResults = this.validateGeneric(block.content, block);
}
}
validateJavaScript(content, block) {
const issues = [];
const warnings = [];
// Basic syntax checks
try {
// Check for common syntax issues
this.checkBracketBalance(content, issues);
this.checkQuoteBalance(content, issues);
this.checkCommonJSPatterns(content, warnings);
// Try parsing if it looks like valid JS
if (this.looksLikeCompleteJS(content)) {
new Function(content); // Basic syntax check
}
} catch (error) {
if (error instanceof SyntaxError) {
issues.push(`JavaScript syntax error: ${error.message}`);
}
}
return {
syntaxValid: issues.length === 0,
issues,
warnings
};
}
validateJSON(content, block) {
const issues = [];
const warnings = [];
try {
JSON.parse(content);
// Additional JSON quality checks
if (content.includes("'")) {
warnings.push("JSON should use double quotes, not single quotes");
}
if (content.match(/,\s*[}\]]/)) {
issues.push("JSON contains trailing commas");
}
} catch (error) {
issues.push(`JSON syntax error: ${error.message}`);
}
return {
syntaxValid: issues.length === 0,
issues,
warnings
};
}
validateYAML(content, block) {
const issues = [];
const warnings = [];
// Basic YAML validation
const lines = content.split('\n');
let indentLevel = 0;
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
const lineNum = i + 1;
if (line.trim() === '') continue;
// Check indentation consistency
const match = line.match(/^(\s*)/);
const currentIndent = match ? match[1].length : 0;
if (line.includes('\t')) {
issues.push(`Line ${lineNum}: YAML should use spaces, not tabs for indentation`);
}
// Check for common YAML issues
if (line.match(/:\s*[^"'\s]/)) {
const value = line.split(':')[1]?.trim();
if (value && value.match(/^[yY]es$|^[nN]o$|^[tT]rue$|^[fF]alse$/)) {
warnings.push(`Line ${lineNum}: Boolean value "${value}" should be quoted to avoid ambiguity`);
}
}
}
return {
syntaxValid: issues.length === 0,
issues,
warnings
};
}
validateHTML(content, block) {
const issues = [];
const warnings = [];
// Check for unclosed tags
const tagPattern = /<\/?([a-zA-Z][a-zA-Z0-9]*)\b[^>]*>/g;
const openTags = [];
const selfClosingTags = new Set(['br', 'img', 'input', 'meta', 'link', 'area', 'base', 'col', 'embed', 'hr', 'source', 'track', 'wbr']);
let match;
while ((match = tagPattern.exec(content)) !== null) {
const [fullMatch, tagName] = match;
const isClosing = fullMatch.startsWith('</');
const isSelfClosing = fullMatch.endsWith('/>') || selfClosingTags.has(tagName.toLowerCase());
if (isClosing) {
const lastOpen = openTags.pop();
if (!lastOpen || lastOpen !== tagName.toLowerCase()) {
issues.push(`Mismatched closing tag: ${fullMatch}`);
}
} else if (!isSelfClosing) {
openTags.push(tagName.toLowerCase());
}
}
if (openTags.length > 0) {
issues.push(`Unclosed HTML tags: ${openTags.join(', ')}`);
}
return {
syntaxValid: issues.length === 0,
issues,
warnings
};
}
validateGeneric(content, block) {
const issues = [];
const warnings = [];
// Universal checks
this.checkBracketBalance(content, issues);
this.checkQuoteBalance(content, issues);
// Check for potential encoding issues
if (content.includes('\uFFFD')) {
issues.push('Content contains replacement characters (encoding issues)');
}
// Check for mixed line endings
const lineEndings = content.match(/\r\n|\r|\n/g) || [];
const lfCount = lineEndings.filter(ending => ending === '\n').length;
const crlfCount = lineEndings.filter(ending => ending === '\r\n').length;
const crCount = lineEndings.filter(ending => ending === '\r').length;
if (lfCount > 0 && crlfCount > 0) {
warnings.push('Mixed line endings detected (LF and CRLF)');
}
if (crCount > 0) {
warnings.push('Old Mac line endings (CR) detected');
}
return {
syntaxValid: issues.length === 0,
issues,
warnings
};
}
checkBracketBalance(content, issues) {
const brackets = { '(': ')', '[': ']', '{': '}' };
const stack = [];
const chars = content.split('');
for (let i = 0; i < chars.length; i++) {
const char = chars[i];
if (brackets[char]) {
stack.push({ bracket: char, position: i });
} else if (Object.values(brackets).includes(char)) {
const last = stack.pop();
if (!last || brackets[last.bracket] !== char) {
issues.push(`Mismatched bracket at position ${i}: expected ${last ? brackets[last.bracket] : 'none'}, found ${char}`);
}
}
}
if (stack.length > 0) {
issues.push(`Unclosed brackets: ${stack.map(item => item.bracket).join(', ')}`);
}
}
checkQuoteBalance(content, issues) {
let singleQuotes = 0;
let doubleQuotes = 0;
let inSingleQuote = false;
let inDoubleQuote = false;
for (let i = 0; i < content.length; i++) {
const char = content[i];
const prevChar = i > 0 ? content[i - 1] : null;
if (char === '"' && prevChar !== '\\' && !inSingleQuote) {
doubleQuotes++;
inDoubleQuote = !inDoubleQuote;
} else if (char === "'" && prevChar !== '\\' && !inDoubleQuote) {
singleQuotes++;
inSingleQuote = !inSingleQuote;
}
}
if (singleQuotes % 2 !== 0) {
issues.push('Unmatched single quotes');
}
if (doubleQuotes % 2 !== 0) {
issues.push('Unmatched double quotes');
}
}
checkCommonJSPatterns(content, warnings) {
// Check for potential issues
if (content.includes('console.log') && content.includes('production')) {
warnings.push('Console.log statements in production code');
}
if (content.match(/==\s*null|!=\s*null/)) {
warnings.push('Use === or !== for null comparisons');
}
if (content.includes('var ')) {
warnings.push('Consider using let or const instead of var');
}
}
looksLikeCompleteJS(content) {
const trimmed = content.trim();
// Skip validation for partial code snippets
if (trimmed.startsWith('//') || trimmed.startsWith('/*')) {
return false;
}
if (trimmed.includes('...') || trimmed.includes('// ...')) {
return false;
}
return true;
}
addValidationError(type, message, lineNumber, context = null) {
this.statistics.syntaxErrors.push({
type,
message,
lineNumber,
context
});
}
generateStatistics() {
const totalBlocks = this.statistics.totalBlocks;
const errorCount = this.statistics.syntaxErrors.length;
this.statistics.errorRate = totalBlocks > 0 ? (errorCount / totalBlocks) * 100 : 0;
this.statistics.validBlocks = totalBlocks - errorCount;
// Generate language insights
this.statistics.languageInsights = {
mostCommon: this.getMostCommonLanguage(),
leastReliable: this.getLeastReliableLanguage(),
coverage: this.getLanguageCoverage()
};
}
getMostCommonLanguage() {
let maxCount = 0;
let mostCommon = 'none';
for (const [language, count] of this.statistics.languageDistribution) {
if (count > maxCount) {
maxCount = count;
mostCommon = language;
}
}
return { language: mostCommon, count: maxCount };
}
getLeastReliableLanguage() {
const languageErrors = new Map();
this.statistics.syntaxErrors.forEach(error => {
if (error.block && error.block.language) {
const lang = error.block.language;
languageErrors.set(lang, (languageErrors.get(lang) || 0) + 1);
}
});
let maxErrors = 0;
let leastReliable = 'none';
for (const [language, errorCount] of languageErrors) {
if (errorCount > maxErrors) {
maxErrors = errorCount;
leastReliable = language;
}
}
return { language: leastReliable, errorCount: maxErrors };
}
getLanguageCoverage() {
const supportedCount = Array.from(this.statistics.languageDistribution.keys())
.filter(lang => this.options.supportedLanguages.includes(lang)).length;
return {
supportedLanguages: supportedCount,
totalLanguages: this.statistics.languageDistribution.size,
coveragePercentage: (supportedCount / Math.max(this.statistics.languageDistribution.size, 1)) * 100
};
}
getAnalysisResults() {
return {
blocks: this.detectedBlocks,
statistics: this.statistics,
summary: {
totalBlocks: this.statistics.totalBlocks,
validBlocks: this.statistics.validBlocks,
errorCount: this.statistics.syntaxErrors.length,
errorRate: this.statistics.errorRate,
languageDistribution: Object.fromEntries(this.statistics.languageDistribution)
}
};
}
generateReport() {
const results = this.getAnalysisResults();
let report = '# Code Block Validation Report\n\n';
report += `## Summary\n`;
report += `- **Total Code Blocks**: ${results.summary.totalBlocks}\n`;
report += `- **Valid Blocks**: ${results.summary.validBlocks}\n`;
report += `- **Error Count**: ${results.summary.errorCount}\n`;
report += `- **Error Rate**: ${results.summary.errorRate.toFixed(2)}%\n\n`;
if (results.statistics.syntaxErrors.length > 0) {
report += `## Errors Found\n\n`;
results.statistics.syntaxErrors.forEach((error, index) => {
report += `### Error ${index + 1}\n`;
report += `- **Type**: ${error.type || 'Syntax Error'}\n`;
report += `- **Message**: ${error.message}\n`;
if (error.lineNumber) {
report += `- **Line**: ${error.lineNumber}\n`;
}
if (error.block) {
report += `- **Language**: ${error.block.language}\n`;
report += `- **Block Type**: ${error.block.type}\n`;
}
report += '\n';
});
}
report += `## Language Distribution\n\n`;
for (const [language, count] of this.statistics.languageDistribution) {
const percentage = ((count / results.summary.totalBlocks) * 100).toFixed(1);
report += `- **${language}**: ${count} blocks (${percentage}%)\n`;
}
return report;
}
reset() {
this.detectedBlocks = [];
this.statistics = {
totalBlocks: 0,
languageDistribution: new Map(),
syntaxErrors: [],
formatIssues: []
};
}
}
// Usage example
const detector = new CodeBlockDetector({
supportedLanguages: ['javascript', 'python', 'java', 'html', 'css', 'json', 'yaml'],
includeInlineCode: true,
detectLanguageHints: true
});
const sampleMarkdown = `
# Code Examples
Here's some JavaScript:
\`\`\`javascript
function validateInput(data) {
if (!data || typeof data !== 'object') {
throw new Error('Invalid input');
}
return data.filter(item => item.isValid;
}
\`\`\`
And some JSON:
\`\`\`json
{
"name": "example",
"version": "1.0.0",
"scripts": {
"test": "npm test",
},
"dependencies": {}
}
\`\`\`
Here's an indented Python block:
def process_data(items):
"""Process a list of items"""
for item in items:
if item.valid:
yield item.transform()
else:
continue
return
Some inline code: \`const x = 'hello';\` and \`invalid json {"test":}\`.
`;
const results = detector.analyzeDocument(sampleMarkdown);
console.log('Validation Results:', results);
console.log('Report:', detector.generateReport());
Advanced Syntax Validation Systems
Implementing language-specific validation engines for comprehensive error detection:
// syntax-validators.js - Language-specific validation engines
class AdvancedSyntaxValidator {
constructor() {
this.validators = new Map();
this.parserCache = new Map();
this.setupValidators();
}
setupValidators() {
// Register language-specific validators
this.registerValidator('javascript', new JavaScriptValidator());
this.registerValidator('typescript', new TypeScriptValidator());
this.registerValidator('python', new PythonValidator());
this.registerValidator('java', new JavaValidator());
this.registerValidator('csharp', new CSharpValidator());
this.registerValidator('go', new GoValidator());
this.registerValidator('rust', new RustValidator());
this.registerValidator('sql', new SQLValidator());
this.registerValidator('html', new HTMLValidator());
this.registerValidator('css', new CSSValidator());
this.registerValidator('json', new JSONValidator());
this.registerValidator('yaml', new YAMLValidator());
this.registerValidator('xml', new XMLValidator());
this.registerValidator('dockerfile', new DockerfileValidator());
this.registerValidator('bash', new BashValidator());
}
registerValidator(language, validator) {
this.validators.set(language, validator);
}
validateCodeBlock(block) {
const validator = this.validators.get(block.language);
if (!validator) {
return this.createBasicValidation(block);
}
try {
const result = validator.validate(block.content, block);
return this.enhanceValidationResult(result, block);
} catch (error) {
return {
syntaxValid: false,
issues: [`Validation engine error: ${error.message}`],
warnings: [],
metadata: {
validationMethod: 'error',
validatorVersion: validator.version || '1.0.0'
}
};
}
}
enhanceValidationResult(result, block) {
// Add metadata and context
return {
...result,
metadata: {
language: block.language,
blockType: block.type,
lineCount: block.content.split('\n').length,
characterCount: block.content.length,
validationTimestamp: new Date().toISOString(),
...result.metadata
}
};
}
createBasicValidation(block) {
// Fallback validation for unsupported languages
const issues = [];
const warnings = [];
// Basic checks
if (block.content.trim() === '') {
warnings.push('Empty code block');
}
if (block.content.includes('\t') && block.content.includes(' ')) {
warnings.push('Mixed indentation (tabs and spaces)');
}
return {
syntaxValid: issues.length === 0,
issues,
warnings,
metadata: {
validationMethod: 'basic',
validatorVersion: '1.0.0'
}
};
}
}
class JavaScriptValidator {
constructor() {
this.version = '2.0.0';
this.eslintRules = this.getDefaultESLintRules();
}
validate(content, block) {
const issues = [];
const warnings = [];
const suggestions = [];
// Syntax validation
try {
this.validateSyntax(content, issues);
} catch (error) {
issues.push(`Syntax error: ${error.message}`);
}
// Style and quality checks
this.checkCodeQuality(content, warnings, suggestions);
// Security checks
this.checkSecurityIssues(content, warnings);
return {
syntaxValid: issues.length === 0,
issues,
warnings,
suggestions,
metadata: {
validationMethod: 'ast-parser',
validatorVersion: this.version,
rulesApplied: Object.keys(this.eslintRules)
}
};
}
validateSyntax(content, issues) {
// Remove common documentation patterns that aren't valid JS
const cleanedContent = this.cleanForValidation(content);
if (!cleanedContent.trim()) return;
try {
// Basic syntax check using Function constructor
new Function(cleanedContent);
// More detailed AST-based validation would go here
this.validateAST(cleanedContent, issues);
} catch (error) {
if (error instanceof SyntaxError) {
const match = error.message.match(/Unexpected token.*at position (\d+)/);
if (match) {
const position = parseInt(match[1]);
const lineInfo = this.getLineFromPosition(content, position);
issues.push(`Syntax error at line ${lineInfo.line}, column ${lineInfo.column}: ${error.message}`);
} else {
issues.push(`Syntax error: ${error.message}`);
}
}
}
}
cleanForValidation(content) {
return content
.replace(/\/\/ \.{3,}/g, '') // Remove comment ellipses
.replace(/\/\*\s*\.{3,}\s*\*\//g, '') // Remove block comment ellipses
.replace(/^\s*\.{3,}\s*$/gm, '') // Remove standalone ellipses
.replace(/^\s*#.*$/gm, '') // Remove shell-style comments (sometimes in examples)
.trim();
}
validateAST(content, issues) {
// This would integrate with a proper JavaScript parser like Babel or Acorn
// For now, we'll do pattern-based validation
// Check for common issues
this.checkUndeclaredVariables(content, issues);
this.checkUnreachableCode(content, issues);
this.checkInfiniteLoops(content, issues);
}
checkUndeclaredVariables(content, issues) {
const variablePattern = /\b([a-zA-Z_$][a-zA-Z0-9_$]*)\s*=/g;
const declarationPattern = /\b(var|let|const|function)\s+([a-zA-Z_$][a-zA-Z0-9_$]*)/g;
const declared = new Set();
const builtins = new Set(['console', 'window', 'document', 'process', 'require', 'module', 'exports']);
// Collect declarations
let match;
while ((match = declarationPattern.exec(content)) !== null) {
declared.add(match[2]);
}
// Check assignments
variablePattern.lastIndex = 0;
while ((match = variablePattern.exec(content)) !== null) {
const variable = match[1];
if (!declared.has(variable) && !builtins.has(variable)) {
issues.push(`Potentially undeclared variable: ${variable}`);
}
}
}
checkUnreachableCode(content, issues) {
const lines = content.split('\n');
let foundReturn = false;
for (let i = 0; i < lines.length; i++) {
const line = lines[i].trim();
if (line.match(/^return\b/) && !line.includes('//')) {
foundReturn = true;
continue;
}
if (foundReturn && line && !line.startsWith('//') && !line.startsWith('}')) {
issues.push(`Unreachable code detected at line ${i + 1}`);
foundReturn = false; // Reset to avoid multiple warnings
}
if (line.includes('}')) {
foundReturn = false;
}
}
}
checkInfiniteLoops(content, issues) {
const loopPatterns = [
/while\s*\(\s*true\s*\)/,
/for\s*\(\s*;;\s*\)/,
/while\s*\(\s*1\s*\)/
];
for (const pattern of loopPatterns) {
if (pattern.test(content)) {
if (!content.includes('break') && !content.includes('return')) {
issues.push('Potential infinite loop detected');
}
}
}
}
checkCodeQuality(content, warnings, suggestions) {
// Check for console.log in production-like code
if (content.includes('console.log') && content.includes('production')) {
warnings.push('Console.log statements detected in production code');
suggestions.push('Consider using a proper logging library');
}
// Check for == instead of ===
if (content.match(/[^=!]==(?!=)/)) {
warnings.push('Use of == operator detected');
suggestions.push('Consider using === for strict equality');
}
// Check for var usage
if (content.includes('var ')) {
warnings.push('Use of var keyword detected');
suggestions.push('Consider using let or const instead of var');
}
// Check for missing semicolons
const lines = content.split('\n');
lines.forEach((line, index) => {
const trimmed = line.trim();
if (trimmed &&
!trimmed.endsWith(';') &&
!trimmed.endsWith('{') &&
!trimmed.endsWith('}') &&
!trimmed.startsWith('//') &&
!trimmed.startsWith('*') &&
trimmed.match(/^[^\/\*].*[a-zA-Z0-9)\]"']$/)) {
suggestions.push(`Consider adding semicolon at line ${index + 1}`);
}
});
}
checkSecurityIssues(content, warnings) {
const securityPatterns = [
{ pattern: /eval\s*\(/, message: 'Use of eval() detected - potential security risk' },
{ pattern: /innerHTML\s*=/, message: 'Use of innerHTML - potential XSS risk' },
{ pattern: /document\.write\s*\(/, message: 'Use of document.write() - potential security risk' },
{ pattern: /setTimeout\s*\(\s*["'`]/, message: 'String passed to setTimeout - potential security risk' },
{ pattern: /setInterval\s*\(\s*["'`]/, message: 'String passed to setInterval - potential security risk' }
];
for (const { pattern, message } of securityPatterns) {
if (pattern.test(content)) {
warnings.push(message);
}
}
}
getLineFromPosition(content, position) {
const beforePosition = content.substring(0, position);
const line = beforePosition.split('\n').length;
const lastNewline = beforePosition.lastIndexOf('\n');
const column = position - lastNewline;
return { line, column };
}
getDefaultESLintRules() {
return {
'no-unused-vars': 'warn',
'no-undef': 'error',
'eqeqeq': 'warn',
'no-console': 'warn',
'no-eval': 'error',
'no-unreachable': 'error'
};
}
}
class PythonValidator {
constructor() {
this.version = '1.0.0';
}
validate(content, block) {
const issues = [];
const warnings = [];
const suggestions = [];
this.validateIndentation(content, issues);
this.checkSyntaxPatterns(content, issues, warnings);
this.checkPythonStyle(content, warnings, suggestions);
return {
syntaxValid: issues.length === 0,
issues,
warnings,
suggestions,
metadata: {
validationMethod: 'pattern-based',
validatorVersion: this.version
}
};
}
validateIndentation(content, issues) {
const lines = content.split('\n');
let expectedIndent = 0;
const indentStack = [0];
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
const lineNum = i + 1;
if (line.trim() === '') continue;
const leadingSpaces = line.match(/^\s*/)[0];
const indent = leadingSpaces.length;
// Check for mixed tabs and spaces
if (leadingSpaces.includes('\t') && leadingSpaces.includes(' ')) {
issues.push(`Line ${lineNum}: Mixed tabs and spaces in indentation`);
continue;
}
// Check indentation level
const prevLine = i > 0 ? lines[i - 1].trim() : '';
if (prevLine.endsWith(':')) {
// Expecting increased indentation
if (indent <= indentStack[indentStack.length - 1]) {
issues.push(`Line ${lineNum}: Expected indentation after colon`);
} else {
indentStack.push(indent);
}
} else {
// Maintain or decrease indentation
while (indentStack.length > 1 && indent < indentStack[indentStack.length - 1]) {
indentStack.pop();
}
if (indent > indentStack[indentStack.length - 1]) {
issues.push(`Line ${lineNum}: Unexpected indentation increase`);
}
}
}
}
checkSyntaxPatterns(content, issues, warnings) {
const lines = content.split('\n');
for (let i = 0; i < lines.length; i++) {
const line = lines[i].trim();
const lineNum = i + 1;
if (!line || line.startsWith('#')) continue;
// Check for common syntax errors
if (line.match(/\bdef\s+\w+\s*\(/)) {
if (!line.endsWith(':')) {
issues.push(`Line ${lineNum}: Function definition missing colon`);
}
}
if (line.match(/\bclass\s+\w+/)) {
if (!line.endsWith(':')) {
issues.push(`Line ${lineNum}: Class definition missing colon`);
}
}
if (line.match(/\bif\b|\bfor\b|\bwhile\b|\belif\b|\belse\b|\btry\b|\bexcept\b|\bfinally\b|\bwith\b/)) {
if (!line.endsWith(':') && !line.includes('#')) {
warnings.push(`Line ${lineNum}: Control statement may be missing colon`);
}
}
// Check for print statements without parentheses (Python 2 style)
if (line.match(/\bprint\s+[^(]/)) {
warnings.push(`Line ${lineNum}: Use print() function instead of print statement`);
}
}
}
checkPythonStyle(content, warnings, suggestions) {
const lines = content.split('\n');
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
const lineNum = i + 1;
// Check line length (PEP 8)
if (line.length > 79) {
suggestions.push(`Line ${lineNum}: Line exceeds 79 characters (PEP 8)`);
}
// Check for trailing whitespace
if (line.match(/\s+$/)) {
suggestions.push(`Line ${lineNum}: Trailing whitespace detected`);
}
// Check import style
if (line.trim().startsWith('from') && line.includes('import *')) {
warnings.push(`Line ${lineNum}: Avoid wildcard imports (import *)`);
}
}
// Check for proper docstrings
if (content.includes('def ') && !content.includes('"""') && !content.includes("'''")) {
suggestions.push('Consider adding docstrings to functions');
}
}
}
class JSONValidator {
constructor() {
this.version = '1.0.0';
}
validate(content, block) {
const issues = [];
const warnings = [];
const suggestions = [];
try {
const parsed = JSON.parse(content);
// Additional quality checks on parsed JSON
this.checkJSONQuality(content, parsed, warnings, suggestions);
} catch (error) {
this.parseJSONError(error, content, issues);
}
return {
syntaxValid: issues.length === 0,
issues,
warnings,
suggestions,
metadata: {
validationMethod: 'native-parser',
validatorVersion: this.version
}
};
}
parseJSONError(error, content, issues) {
let message = error.message;
// Try to provide better error context
const match = message.match(/position (\d+)/);
if (match) {
const position = parseInt(match[1]);
const lineInfo = this.getLineFromPosition(content, position);
const snippet = this.getErrorSnippet(content, position);
issues.push(`JSON syntax error at line ${lineInfo.line}, column ${lineInfo.column}: ${message}`);
if (snippet) {
issues.push(`Near: "${snippet}"`);
}
} else {
issues.push(`JSON syntax error: ${message}`);
}
// Check for common JSON mistakes
this.checkCommonJSONMistakes(content, issues);
}
checkCommonJSONMistakes(content, issues) {
// Check for single quotes
if (content.includes("'")) {
issues.push("JSON must use double quotes, not single quotes");
}
// Check for trailing commas
if (content.match(/,\s*[}\]]/)) {
issues.push("JSON does not allow trailing commas");
}
// Check for unquoted keys
if (content.match(/[{,]\s*[a-zA-Z_$][a-zA-Z0-9_$]*\s*:/)) {
issues.push("JSON object keys must be quoted");
}
// Check for functions or undefined
if (content.includes('undefined') || content.match(/function\s*\(/)) {
issues.push("JSON cannot contain undefined values or functions");
}
}
checkJSONQuality(content, parsed, warnings, suggestions) {
// Check for potential issues in valid JSON
// Deep nesting
const depth = this.calculateNestingDepth(parsed);
if (depth > 10) {
warnings.push(`Very deep nesting detected (${depth} levels)`);
}
// Large objects/arrays
if (typeof parsed === 'object' && parsed !== null) {
const size = JSON.stringify(parsed).length;
if (size > 1000000) { // 1MB
warnings.push('Very large JSON object detected');
}
}
// Inconsistent formatting
if (this.hasInconsistentFormatting(content)) {
suggestions.push('Consider using consistent JSON formatting');
}
}
calculateNestingDepth(obj, depth = 0) {
if (typeof obj !== 'object' || obj === null) {
return depth;
}
let maxDepth = depth;
for (const value of Object.values(obj)) {
const currentDepth = this.calculateNestingDepth(value, depth + 1);
maxDepth = Math.max(maxDepth, currentDepth);
}
return maxDepth;
}
hasInconsistentFormatting(content) {
const lines = content.split('\n');
const indentSizes = new Set();
for (const line of lines) {
const match = line.match(/^(\s*)/);
if (match && match[1].length > 0) {
indentSizes.add(match[1].length);
}
}
return indentSizes.size > 3; // More than 3 different indent sizes suggests inconsistency
}
getLineFromPosition(content, position) {
const beforePosition = content.substring(0, position);
const line = beforePosition.split('\n').length;
const lastNewline = beforePosition.lastIndexOf('\n');
const column = position - lastNewline;
return { line, column };
}
getErrorSnippet(content, position, contextLength = 20) {
const start = Math.max(0, position - contextLength);
const end = Math.min(content.length, position + contextLength);
return content.substring(start, end);
}
}
// Export the main validator
module.exports = {
AdvancedSyntaxValidator,
JavaScriptValidator,
PythonValidator,
JSONValidator
};
Automated Quality Assurance Integration
Advanced code block validation systems integrate seamlessly with continuous integration pipelines and automated quality assurance workflows. When combined with automated testing frameworks, validation systems ensure that code examples remain functional and accurate as documentation evolves through development cycles.
For comprehensive quality management, validation tools work effectively with content automation systems to create self-healing documentation that automatically detects, reports, and in some cases fixes common code block issues before they impact user experience or documentation credibility.
When building sophisticated documentation workflows, code validation complements syntax highlighting systems by providing both visual feedback and functional verification, ensuring that code presentation and code correctness work together to create professional, reliable technical documentation.
Error Detection and Reporting Systems
Comprehensive Error Analysis Framework
Implementing detailed error detection and reporting for actionable documentation quality insights:
// error-analysis.js - Advanced error detection and reporting system
class CodeBlockErrorAnalyzer {
constructor(options = {}) {
this.options = {
enableDiffAnalysis: true,
trackErrorTrends: true,
generateSuggestions: true,
severityLevels: ['error', 'warning', 'suggestion', 'info'],
reportFormats: ['json', 'markdown', 'html', 'csv'],
...options
};
this.errorHistory = new Map();
this.errorPatterns = new Map();
this.suggestionEngine = new ValidationSuggestionEngine();
this.reportGenerator = new ValidationReportGenerator();
}
analyzeErrors(validationResults, documentContext = {}) {
const analysis = {
summary: this.generateErrorSummary(validationResults),
categorization: this.categorizeErrors(validationResults),
patterns: this.detectErrorPatterns(validationResults),
trends: this.analyzeErrorTrends(validationResults),
suggestions: this.generateImprovementSuggestions(validationResults),
impact: this.assessErrorImpact(validationResults, documentContext),
metadata: {
analysisTimestamp: new Date().toISOString(),
documentContext,
analyzerVersion: '2.0.0'
}
};
this.updateErrorHistory(analysis);
return analysis;
}
generateErrorSummary(results) {
const summary = {
totalBlocks: results.summary.totalBlocks,
validBlocks: results.summary.validBlocks,
errorBlocks: results.summary.errorCount,
errorRate: results.summary.errorRate,
severityBreakdown: {
error: 0,
warning: 0,
suggestion: 0,
info: 0
},
languageBreakdown: new Map(),
blockTypeBreakdown: new Map()
};
// Analyze each block's validation results
results.blocks.forEach(block => {
const validation = block.validationResults;
// Count severity levels
if (!validation.syntaxValid) {
summary.severityBreakdown.error++;
}
summary.severityBreakdown.warning += (validation.warnings?.length || 0);
summary.severityBreakdown.suggestion += (validation.suggestions?.length || 0);
summary.severityBreakdown.info += (validation.info?.length || 0);
// Language breakdown
const langCount = summary.languageBreakdown.get(block.language) || { total: 0, errors: 0 };
langCount.total++;
if (!validation.syntaxValid) langCount.errors++;
summary.languageBreakdown.set(block.language, langCount);
// Block type breakdown
const typeCount = summary.blockTypeBreakdown.get(block.type) || { total: 0, errors: 0 };
typeCount.total++;
if (!validation.syntaxValid) typeCount.errors++;
summary.blockTypeBreakdown.set(block.type, typeCount);
});
return summary;
}
categorizeErrors(results) {
const categories = {
syntax: [],
style: [],
security: [],
performance: [],
compatibility: [],
documentation: [],
unknown: []
};
results.blocks.forEach(block => {
const validation = block.validationResults;
if (validation.issues) {
validation.issues.forEach(issue => {
const category = this.classifyError(issue, block);
categories[category].push({
block,
issue,
severity: this.determineSeverity(issue, category),
location: {
startLine: block.startLine,
endLine: block.endLine,
language: block.language
}
});
});
}
if (validation.warnings) {
validation.warnings.forEach(warning => {
const category = this.classifyError(warning, block);
categories[category].push({
block,
issue: warning,
severity: 'warning',
location: {
startLine: block.startLine,
endLine: block.endLine,
language: block.language
}
});
});
}
});
return categories;
}
classifyError(errorMessage, block) {
const message = errorMessage.toLowerCase();
if (message.includes('syntax') || message.includes('parse') || message.includes('unexpected token')) {
return 'syntax';
}
if (message.includes('style') || message.includes('format') || message.includes('indent')) {
return 'style';
}
if (message.includes('security') || message.includes('eval') || message.includes('injection')) {
return 'security';
}
if (message.includes('performance') || message.includes('optimization')) {
return 'performance';
}
if (message.includes('deprecated') || message.includes('compatibility')) {
return 'compatibility';
}
if (message.includes('comment') || message.includes('docstring') || message.includes('documentation')) {
return 'documentation';
}
return 'unknown';
}
determineSeverity(issue, category) {
const message = issue.toLowerCase();
// High severity patterns
if (message.includes('syntax error') || message.includes('parse error')) {
return 'error';
}
if (category === 'security') {
return 'error';
}
// Medium severity patterns
if (message.includes('deprecated') || message.includes('potential')) {
return 'warning';
}
if (category === 'compatibility') {
return 'warning';
}
// Low severity patterns
if (message.includes('consider') || message.includes('suggestion')) {
return 'suggestion';
}
if (category === 'style' || category === 'documentation') {
return 'suggestion';
}
return 'info';
}
detectErrorPatterns(results) {
const patterns = {
recurring: new Map(),
languageSpecific: new Map(),
structural: [],
temporal: []
};
const errorMessages = [];
results.blocks.forEach(block => {
const validation = block.validationResults;
if (validation.issues) {
validation.issues.forEach(issue => {
errorMessages.push({
message: issue,
language: block.language,
type: block.type,
location: block.startLine
});
});
}
});
// Detect recurring error patterns
const messageCount = new Map();
errorMessages.forEach(error => {
const normalized = this.normalizeErrorMessage(error.message);
const count = messageCount.get(normalized) || 0;
messageCount.set(normalized, count + 1);
});
messageCount.forEach((count, message) => {
if (count > 1) {
patterns.recurring.set(message, {
count,
examples: errorMessages.filter(e => this.normalizeErrorMessage(e.message) === message)
});
}
});
// Detect language-specific patterns
const languageErrors = new Map();
errorMessages.forEach(error => {
const errors = languageErrors.get(error.language) || [];
errors.push(error.message);
languageErrors.set(error.language, errors);
});
languageErrors.forEach((errors, language) => {
const commonErrors = this.findCommonPatterns(errors);
if (commonErrors.length > 0) {
patterns.languageSpecific.set(language, commonErrors);
}
});
return patterns;
}
normalizeErrorMessage(message) {
return message
.toLowerCase()
.replace(/line \d+/g, 'line X')
.replace(/position \d+/g, 'position X')
.replace(/column \d+/g, 'column X')
.replace(/['"]\w+['"]/g, '"IDENTIFIER"')
.replace(/\d+/g, 'N');
}
findCommonPatterns(messages) {
const normalized = messages.map(msg => this.normalizeErrorMessage(msg));
const counts = new Map();
normalized.forEach(msg => {
counts.set(msg, (counts.get(msg) || 0) + 1);
});
return Array.from(counts.entries())
.filter(([, count]) => count > 1)
.sort((a, b) => b[1] - a[1])
.slice(0, 5) // Top 5 patterns
.map(([pattern, count]) => ({ pattern, count }));
}
analyzeErrorTrends(results) {
if (!this.options.trackErrorTrends) {
return { enabled: false };
}
const currentTimestamp = Date.now();
const trends = {
improving: [],
worsening: [],
stable: [],
newIssues: [],
resolvedIssues: []
};
// Compare with historical data if available
const previousResults = this.getPreviousResults();
if (!previousResults) {
return { status: 'no-historical-data', currentSnapshot: results.summary };
}
// Analyze trend changes
const currentErrors = new Set(this.extractErrorSignatures(results));
const previousErrors = new Set(this.extractErrorSignatures(previousResults));
// New issues
currentErrors.forEach(error => {
if (!previousErrors.has(error)) {
trends.newIssues.push(error);
}
});
// Resolved issues
previousErrors.forEach(error => {
if (!currentErrors.has(error)) {
trends.resolvedIssues.push(error);
}
});
// Compare error rates
const currentRate = results.summary.errorRate;
const previousRate = previousResults.summary.errorRate;
const rateDiff = currentRate - previousRate;
if (rateDiff > 5) {
trends.worsening.push({
type: 'overall-error-rate',
change: rateDiff,
current: currentRate,
previous: previousRate
});
} else if (rateDiff < -5) {
trends.improving.push({
type: 'overall-error-rate',
change: rateDiff,
current: currentRate,
previous: previousRate
});
} else {
trends.stable.push({
type: 'overall-error-rate',
change: rateDiff,
current: currentRate,
previous: previousRate
});
}
return trends;
}
extractErrorSignatures(results) {
const signatures = [];
results.blocks.forEach(block => {
const validation = block.validationResults;
if (validation.issues) {
validation.issues.forEach(issue => {
signatures.push(`${block.language}:${this.normalizeErrorMessage(issue)}`);
});
}
});
return signatures;
}
generateImprovementSuggestions(results) {
return this.suggestionEngine.generateSuggestions(results);
}
assessErrorImpact(results, documentContext) {
const impact = {
userExperience: this.assessUserExperienceImpact(results),
maintenance: this.assessMaintenanceImpact(results),
reliability: this.assessReliabilityImpact(results),
overall: 'unknown'
};
// Calculate overall impact score
const scores = Object.values(impact).filter(score => typeof score === 'number');
const avgScore = scores.reduce((sum, score) => sum + score, 0) / scores.length;
if (avgScore >= 8) impact.overall = 'critical';
else if (avgScore >= 6) impact.overall = 'high';
else if (avgScore >= 4) impact.overall = 'medium';
else if (avgScore >= 2) impact.overall = 'low';
else impact.overall = 'minimal';
return impact;
}
assessUserExperienceImpact(results) {
let score = 0;
// Syntax errors have high user impact
score += results.summary.severityBreakdown.error * 3;
// Warnings have medium impact
score += results.summary.severityBreakdown.warning * 1;
// High error rate increases impact
if (results.summary.errorRate > 20) score += 2;
else if (results.summary.errorRate > 10) score += 1;
return Math.min(score, 10); // Cap at 10
}
assessMaintenanceImpact(results) {
let score = 0;
// Many different error types increase maintenance burden
const errorCategories = this.categorizeErrors(results);
const categoryCount = Object.values(errorCategories).filter(cat => cat.length > 0).length;
score += Math.min(categoryCount, 5);
// Recurring errors increase maintenance burden
const patterns = this.detectErrorPatterns(results);
score += Math.min(patterns.recurring.size, 3);
return Math.min(score, 10); // Cap at 10
}
assessReliabilityImpact(results) {
let score = 0;
// Security issues have high reliability impact
const errorCategories = this.categorizeErrors(results);
score += errorCategories.security.length * 4;
// Syntax errors reduce reliability
score += Math.min(results.summary.severityBreakdown.error, 5);
return Math.min(score, 10); // Cap at 10
}
updateErrorHistory(analysis) {
const timestamp = Date.now();
const historyEntry = {
timestamp,
summary: analysis.summary,
errorRate: analysis.summary.errorRate,
totalBlocks: analysis.summary.totalBlocks
};
this.errorHistory.set(timestamp, historyEntry);
// Keep only last 30 entries
if (this.errorHistory.size > 30) {
const oldestTimestamp = Math.min(...this.errorHistory.keys());
this.errorHistory.delete(oldestTimestamp);
}
}
getPreviousResults() {
const entries = Array.from(this.errorHistory.entries()).sort((a, b) => b[0] - a[0]);
return entries.length > 1 ? entries[1][1] : null;
}
generateReport(analysis, format = 'markdown') {
return this.reportGenerator.generate(analysis, format);
}
}
class ValidationSuggestionEngine {
generateSuggestions(results) {
const suggestions = {
immediate: [],
strategic: [],
preventive: []
};
// Immediate fixes
this.generateImmediateSuggestions(results, suggestions.immediate);
// Strategic improvements
this.generateStrategicSuggestions(results, suggestions.strategic);
// Preventive measures
this.generatePreventiveSuggestions(results, suggestions.preventive);
return suggestions;
}
generateImmediateSuggestions(results, suggestions) {
const errorSummary = results.summary;
if (errorSummary.severityBreakdown.error > 0) {
suggestions.push({
priority: 'high',
action: 'Fix syntax errors',
description: `${errorSummary.severityBreakdown.error} code blocks have syntax errors that prevent proper rendering`,
effort: 'low',
impact: 'high'
});
}
// Language-specific suggestions
const languageErrors = new Map();
results.blocks.forEach(block => {
if (!block.validationResults.syntaxValid) {
const count = languageErrors.get(block.language) || 0;
languageErrors.set(block.language, count + 1);
}
});
languageErrors.forEach((count, language) => {
if (count > 2) {
suggestions.push({
priority: 'medium',
action: `Review ${language} code blocks`,
description: `${count} ${language} blocks have issues - consider reviewing language-specific patterns`,
effort: 'medium',
impact: 'medium'
});
}
});
}
generateStrategicSuggestions(results, suggestions) {
const totalBlocks = results.summary.totalBlocks;
const errorRate = results.summary.errorRate;
if (errorRate > 15) {
suggestions.push({
priority: 'high',
action: 'Implement automated validation',
description: 'High error rate suggests need for automated code block validation in CI/CD pipeline',
effort: 'high',
impact: 'high'
});
}
if (totalBlocks > 50) {
suggestions.push({
priority: 'medium',
action: 'Consider documentation tooling',
description: 'Large codebase would benefit from advanced documentation tooling and linting',
effort: 'high',
impact: 'medium'
});
}
// Language diversity suggestions
const languageCount = results.summary.languageBreakdown.size;
if (languageCount > 8) {
suggestions.push({
priority: 'low',
action: 'Standardize code examples',
description: 'Many different languages detected - consider standardizing examples or providing language-specific guides',
effort: 'high',
impact: 'low'
});
}
}
generatePreventiveSuggestions(results, suggestions) {
suggestions.push({
priority: 'medium',
action: 'Set up validation hooks',
description: 'Implement pre-commit hooks to validate code blocks before committing',
effort: 'medium',
impact: 'high'
});
suggestions.push({
priority: 'low',
action: 'Create style guide',
description: 'Establish coding standards and style guide for documentation code blocks',
effort: 'medium',
impact: 'medium'
});
suggestions.push({
priority: 'low',
action: 'Regular audits',
description: 'Schedule regular code block audits to catch issues early',
effort: 'low',
impact: 'medium'
});
}
}
class ValidationReportGenerator {
generate(analysis, format = 'markdown') {
const generators = {
markdown: this.generateMarkdownReport.bind(this),
html: this.generateHTMLReport.bind(this),
json: this.generateJSONReport.bind(this),
csv: this.generateCSVReport.bind(this)
};
const generator = generators[format.toLowerCase()];
if (!generator) {
throw new Error(`Unsupported report format: ${format}`);
}
return generator(analysis);
}
generateMarkdownReport(analysis) {
const { summary, categorization, patterns, trends, suggestions, impact } = analysis;
let report = '# Code Block Validation Report\n\n';
// Executive Summary
report += '## Executive Summary\n\n';
report += `- **Total Code Blocks**: ${summary.totalBlocks}\n`;
report += `- **Valid Blocks**: ${summary.validBlocks}\n`;
report += `- **Error Rate**: ${summary.errorRate.toFixed(1)}%\n`;
report += `- **Overall Impact**: ${impact.overall}\n\n`;
// Error Breakdown
report += '## Error Breakdown\n\n';
report += '| Severity | Count |\n';
report += '|----------|-------|\n';
Object.entries(summary.severityBreakdown).forEach(([severity, count]) => {
report += `| ${severity} | ${count} |\n`;
});
report += '\n';
// Language Analysis
if (summary.languageBreakdown.size > 0) {
report += '## Language Analysis\n\n';
report += '| Language | Total | Errors | Error Rate |\n';
report += '|----------|-------|--------|------------|\n';
Array.from(summary.languageBreakdown.entries())
.sort((a, b) => b[1].total - a[1].total)
.forEach(([language, stats]) => {
const errorRate = stats.total > 0 ? (stats.errors / stats.total * 100).toFixed(1) : '0.0';
report += `| ${language} | ${stats.total} | ${stats.errors} | ${errorRate}% |\n`;
});
report += '\n';
}
// Error Categories
report += '## Error Categories\n\n';
Object.entries(categorization).forEach(([category, errors]) => {
if (errors.length > 0) {
report += `### ${category.charAt(0).toUpperCase() + category.slice(1)} Errors (${errors.length})\n\n`;
errors.slice(0, 5).forEach((error, index) => {
report += `${index + 1}. **Line ${error.location.startLine}** (${error.location.language}): ${error.issue}\n`;
});
if (errors.length > 5) {
report += `\n*... and ${errors.length - 5} more ${category} errors*\n`;
}
report += '\n';
}
});
// Recommendations
if (suggestions.immediate.length > 0) {
report += '## Immediate Actions Required\n\n';
suggestions.immediate.forEach((suggestion, index) => {
report += `${index + 1}. **${suggestion.action}** (${suggestion.priority} priority)\n`;
report += ` - ${suggestion.description}\n`;
report += ` - Effort: ${suggestion.effort}, Impact: ${suggestion.impact}\n\n`;
});
}
if (suggestions.strategic.length > 0) {
report += '## Strategic Improvements\n\n';
suggestions.strategic.forEach((suggestion, index) => {
report += `${index + 1}. **${suggestion.action}**\n`;
report += ` - ${suggestion.description}\n`;
report += ` - Effort: ${suggestion.effort}, Impact: ${suggestion.impact}\n\n`;
});
}
// Trending Information
if (trends.enabled !== false && Object.keys(trends).length > 1) {
report += '## Trend Analysis\n\n';
if (trends.improving.length > 0) {
report += '### ✅ Improvements\n';
trends.improving.forEach(trend => {
report += `- ${trend.type}: ${trend.change > 0 ? '+' : ''}${trend.change.toFixed(1)}%\n`;
});
report += '\n';
}
if (trends.worsening.length > 0) {
report += '### ⚠️ Concerns\n';
trends.worsening.forEach(trend => {
report += `- ${trend.type}: ${trend.change > 0 ? '+' : ''}${trend.change.toFixed(1)}%\n`;
});
report += '\n';
}
if (trends.newIssues.length > 0) {
report += '### 🆕 New Issues\n';
trends.newIssues.slice(0, 5).forEach(issue => {
report += `- ${issue}\n`;
});
if (trends.newIssues.length > 5) {
report += `- *... and ${trends.newIssues.length - 5} more new issues*\n`;
}
report += '\n';
}
if (trends.resolvedIssues.length > 0) {
report += '### ✅ Resolved Issues\n';
trends.resolvedIssues.slice(0, 5).forEach(issue => {
report += `- ${issue}\n`;
});
if (trends.resolvedIssues.length > 5) {
report += `- *... and ${trends.resolvedIssues.length - 5} more resolved issues*\n`;
}
report += '\n';
}
}
// Footer
report += '---\n\n';
report += `*Report generated on ${new Date(analysis.metadata.analysisTimestamp).toLocaleString()}*\n`;
report += `*Analyzer version: ${analysis.metadata.analyzerVersion}*\n`;
return report;
}
generateJSONReport(analysis) {
return JSON.stringify(analysis, null, 2);
}
generateHTMLReport(analysis) {
// HTML report implementation would go here
// For brevity, returning a simple HTML structure
return `
<html>
<head>
<title>Code Block Validation Report</title>
<style>
body { font-family: Arial, sans-serif; margin: 2rem; }
.summary { background: #f5f5f5; padding: 1rem; border-radius: 4px; }
.error { color: #d32f2f; }
.warning { color: #f57c00; }
.success { color: #388e3c; }
table { border-collapse: collapse; width: 100%; }
th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
th { background-color: #f2f2f2; }
</style>
</head>
<body>
<h1>Code Block Validation Report</h1>
<div class="summary">
<h2>Summary</h2>
<p>Total Blocks: ${analysis.summary.totalBlocks}</p>
<p>Valid Blocks: ${analysis.summary.validBlocks}</p>
<p>Error Rate: ${analysis.summary.errorRate.toFixed(1)}%</p>
</div>
<!-- Additional HTML content would be generated here -->
<footer>
<small>Generated on ${new Date(analysis.metadata.analysisTimestamp).toLocaleString()}</small>
</footer>
</body>
</html>
`;
}
generateCSVReport(analysis) {
const { summary } = analysis;
let csv = 'Metric,Value\n';
csv += `Total Blocks,${summary.totalBlocks}\n`;
csv += `Valid Blocks,${summary.validBlocks}\n`;
csv += `Error Blocks,${summary.errorBlocks}\n`;
csv += `Error Rate,${summary.errorRate.toFixed(1)}%\n`;
csv += '\nSeverity,Count\n';
Object.entries(summary.severityBreakdown).forEach(([severity, count]) => {
csv += `${severity},${count}\n`;
});
csv += '\nLanguage,Total Blocks,Error Blocks,Error Rate\n';
Array.from(summary.languageBreakdown.entries()).forEach(([language, stats]) => {
const errorRate = stats.total > 0 ? (stats.errors / stats.total * 100).toFixed(1) : '0.0';
csv += `${language},${stats.total},${stats.errors},${errorRate}%\n`;
});
return csv;
}
}
// Usage example
const analyzer = new CodeBlockErrorAnalyzer({
enableDiffAnalysis: true,
trackErrorTrends: true,
generateSuggestions: true
});
// Example validation results (would come from CodeBlockDetector)
const validationResults = {
summary: {
totalBlocks: 15,
validBlocks: 12,
errorCount: 3,
errorRate: 20.0,
languageBreakdown: new Map([
['javascript', { total: 8, errors: 2 }],
['python', { total: 4, errors: 1 }],
['json', { total: 3, errors: 0 }]
])
},
blocks: [
// Example blocks with validation results
]
};
const analysis = analyzer.analyzeErrors(validationResults, {
documentType: 'api-documentation',
lastUpdated: new Date().toISOString()
});
console.log('Error Analysis:', analysis);
console.log('Markdown Report:', analyzer.generateReport(analysis, 'markdown'));
Conclusion
Advanced Markdown code block validation and error detection systems provide essential infrastructure for maintaining high-quality technical documentation at scale, ensuring that code examples remain accurate, executable, and properly formatted throughout content lifecycles. By implementing comprehensive validation frameworks, intelligent error analysis systems, and automated quality assurance workflows, technical teams can create documentation that truly serves users while maintaining professional standards for accuracy and reliability.
The key to successful implementation lies in balancing automated validation with practical workflow integration, ensuring that validation systems enhance rather than hinder the documentation creation process. Whether you’re managing small project documentation or large-scale technical content libraries, the validation techniques covered in this guide provide the foundation for building robust, reliable, and maintainable code documentation systems.
Remember to integrate validation into your continuous integration pipelines, establish clear quality metrics and thresholds, and regularly review validation results to identify patterns and opportunities for improvement. With proper implementation of code block validation and error detection systems, your Markdown documentation can achieve the same level of quality assurance and reliability that users expect from production code while maintaining the simplicity and efficiency that makes Markdown such a powerful documentation format.