Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/fix-greedy-math.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
default: patch
---

Updated the math detection to avoid accidental detection when talking about math or spamming dollar signs.
84 changes: 73 additions & 11 deletions src/app/plugins/markdown/extensions/matrix-math.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import type { TokenizerExtension, RendererExtension } from 'marked';

/** Private-use char so math extensions do not match `$` / `$$` inside code spans. Not U+E000–U+E002 (emoticon placeholders). */
/** Private-use char so math extensions do not match `$` / `$$` inside code spans. Not U+E000–U+E002 (emoticon placeholders). {@link shieldDollarRunsForMarked} uses U+E021–U+E022. */
export const MATH_CODE_DOLLAR_MASK = '\uE020';

function findSameLineFenceClose(md: string, from: number, tick: string, minLen: number): number {
Expand Down Expand Up @@ -155,6 +155,31 @@ export function unmaskMathCodeDollarPlaceholders(html: string): string {
return html.replaceAll(MATH_CODE_DOLLAR_MASK, '$');
}

const MARKED_MATH_BLOCK_SHIELD = '\uE021';
const MARKED_MATH_BLOCK_SHIELD_END = '\uE022';

export function shieldDollarRunsForMarked(markdown: string): string {
const blocks: string[] = [];
const blockRe = /\$\$([^$]+)\$\$\n?/g;
let m: RegExpExecArray | null;
let shielded = '';
let last = 0;
while ((m = blockRe.exec(markdown)) !== null) {
shielded += markdown.slice(last, m.index);
blocks.push(m[0]);
shielded += `${MARKED_MATH_BLOCK_SHIELD}${blocks.length - 1}${MARKED_MATH_BLOCK_SHIELD_END}`;
last = m.index + m[0].length;
}
shielded += markdown.slice(last);

shielded = shielded.replace(/\${2,}/g, (run) => run.replace(/\$/g, () => '\\$'));

return shielded.replace(
new RegExp(`${MARKED_MATH_BLOCK_SHIELD}(\\d+)${MARKED_MATH_BLOCK_SHIELD_END}`, 'g'),
(_, i) => blocks[parseInt(i, 10)] ?? ''
);
}

function escapeHtml(text: string): string {
return text
.replace(/&/g, '&')
Expand All @@ -163,6 +188,49 @@ function escapeHtml(text: string): string {
.replace(/"/g, '"');
}

function isIgnorableMathContent(latex: string): boolean {
const t = latex.replace(/[\u200B-\u200D\uFEFF]/g, '').trim();
if (t === '') return true;
return /^\$+$/.test(t);
}

/**
* Inline math delimiters use `$...$` but must not greedily pair across dollar amounts
* (e.g. "$10 ... $20"). We only treat a pair as math when:
* - the opening `$` is not followed by whitespace, and
* - the closing `$` is not preceded by whitespace, and
* - the closing `$` is not immediately followed by an ASCII digit.
*/
function tryTokenizeInlineMath(
src: string
): { type: 'math'; raw: string; latex: string } | undefined {
if (!src.startsWith('$')) {
return undefined;
}
if (src.startsWith('$$') && (src.length < 3 || src.charAt(2) !== '$')) {
return undefined;
}
if (src.length < 3 || /\s/.test(src.charAt(1))) {
return undefined;
}
for (let j = 1; j < src.length; j++) {
if (src.charAt(j) !== '$') continue;
const before = src.charAt(j - 1);
if (/\s/.test(before)) continue;
const after = j + 1 < src.length ? src.charAt(j + 1) : '';
if (after !== '' && /[0-9]/.test(after)) continue;
const latex = src.slice(1, j);
if (isIgnorableMathContent(latex)) continue;
if (latex.trimStart().startsWith('$$')) continue;
return {
type: 'math',
raw: src.slice(0, j + 1),
latex,
};
}
return undefined;
}

// Inline math: $...$
export const matrixMathExtension = {
name: 'math',
Expand All @@ -171,15 +239,7 @@ export const matrixMathExtension = {
return src.indexOf('$');
},
tokenizer(src: string) {
const match = /^\$([^$]+)\$/.exec(src);
if (match) {
return {
type: 'math',
raw: match[0],
latex: match[1],
};
}
return undefined;
return tryTokenizeInlineMath(src);
},
renderer(token) {
return `<span data-mx-maths="${escapeHtml(token.latex)}">${token.latex}</span>`;
Expand All @@ -196,10 +256,12 @@ export const matrixMathBlockExtension = {
tokenizer(src: string) {
const match = /^\$\$([^$]+)\$\$\n?/.exec(src);
if (match) {
const latex = match[1]?.trim() ?? '';
if (isIgnorableMathContent(latex)) return undefined;
return {
type: 'mathBlock',
raw: match[0],
latex: match[1]?.trim() ?? '',
latex,
};
}
return undefined;
Expand Down
38 changes: 36 additions & 2 deletions src/app/plugins/markdown/extensions/matrix.test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import { describe, expect, it } from 'vitest';
import { marked } from 'marked';
import { matrixSpoilerExtension } from './matrix-spoiler';
import { matrixMathExtension, matrixMathBlockExtension } from './matrix-math';
import {
matrixMathBlockExtension,
matrixMathExtension,
shieldDollarRunsForMarked,
} from './matrix-math';
import { matrixSubscriptExtension } from './matrix-subscript';

function parse(input: string): string {
Expand All @@ -13,7 +17,7 @@ function parse(input: string): string {
matrixSubscriptExtension,
],
});
return processor.parse(input) as string;
return processor.parse(shieldDollarRunsForMarked(input)) as string;
}

describe('matrixSpoilerExtension', () => {
Expand Down Expand Up @@ -48,6 +52,36 @@ describe('matrixMathExtension (inline)', () => {
it('does not parse unmatched $', () => {
expect(parse('No $ math here')).not.toContain('data-mx-maths');
});

it('does not parse dollar amounts in a sentence as inline math', () => {
const input = 'I just bought something for $10 on sale, it was originally $20!';
const result = parse(input);
expect(result).not.toContain('data-mx-maths');
expect(result).toContain('$10');
expect(result).toContain('$20');
});

it('does not treat $ as math when the opening is followed by whitespace', () => {
expect(parse('$ E = mc^2$')).not.toContain('data-mx-maths');
});

it('still parses valid inline math', () => {
expect(parse('$E = mc^2$')).toContain('data-mx-maths');
expect(parse('$2+2$')).toContain('data-mx-maths');
});

it('does not parse inline math when inner trims to empty (e.g. zero-width only)', () => {
expect(parse(`empty $\u200B$ here`)).not.toContain('data-mx-maths');
});

it('does not parse long runs of dollar signs as inline math', () => {
expect(parse('hey $$$$$$$ there')).not.toContain('data-mx-maths');
});

it('does not parse block math when inner is only whitespace or dollars', () => {
expect(parse('$$ $$')).not.toContain('data-mx-maths');
expect(parse('$$ $ $$')).not.toContain('data-mx-maths');
});
});

describe('matrixMathBlockExtension (block)', () => {
Expand Down
20 changes: 20 additions & 0 deletions src/app/plugins/markdown/markdownToHtml.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,26 @@ describe('markdownToHtml', () => {
expect(result).toContain('E = mc^2');
});

it('does not mangle messages with dollar amounts', () => {
const result = markdownToHtml(
'I just bought something for $10 on sale, it was originally $20!'
);
expect(result).not.toContain('data-mx-maths');
expect(result).toContain('$10');
expect(result).toContain('$20');
});

it('does not treat empty or dollar-only block math as KaTeX', () => {
expect(markdownToHtml('$$ $$')).not.toContain('data-mx-maths');
expect(markdownToHtml('$$ $ $$')).not.toContain('data-mx-maths');
});

it('does not parse five consecutive dollar signs in a sentence as math', () => {
const result = markdownToHtml('hey $$$$$ there');
expect(result).not.toContain('data-mx-maths');
expect(result).toContain('$$$$$');
});

it('does not parse dollars inside fenced code as math', () => {
expect(markdownToHtml('```\n$$test$$\n```')).not.toContain('data-mx-maths');
expect(markdownToHtml('```\n$$test$$\n```')).toContain('$$test$$');
Expand Down
3 changes: 2 additions & 1 deletion src/app/plugins/markdown/markdownToHtml.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import {
matrixMathExtension,
matrixMathBlockExtension,
maskDollarSignsInsideMarkdownCode,
shieldDollarRunsForMarked,
unmaskMathCodeDollarPlaceholders,
} from './extensions/matrix-math';
import { matrixSubscriptExtension } from './extensions/matrix-subscript';
Expand Down Expand Up @@ -65,7 +66,7 @@ export function markdownToHtml(markdown: string): string {

const preprocessed = preprocessEmoticon(blockquotePrefixed);

const mathInput = maskDollarSignsInsideMarkdownCode(preprocessed);
const mathInput = shieldDollarRunsForMarked(maskDollarSignsInsideMarkdownCode(preprocessed));

// Parse markdown to HTML using marked with our Matrix extensions
const html = processor.parse(mathInput) as string;
Expand Down
Loading