1
0
mirror of https://github.com/misskey-dev/misskey.git synced 2026-05-06 03:36:03 +02:00
Files
misskey/packages/backend/src/core/MfmService.ts
anatawa12 666f78e676 enable and fix no-unused-vars and no-async-promise-executor (#17070)
* dev: set --no-bail for lint task

* lint: enable no-async-promise-executor lint and fix them

* lint: enable no-unused-vars with allowing _ prefix

* lint: fix semi
2026-01-08 11:49:12 +09:00

439 lines
12 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/*
* SPDX-FileCopyrightText: syuilo and misskey-project
* SPDX-License-Identifier: AGPL-3.0-only
*/
import { URL } from 'node:url';
import { Inject, Injectable } from '@nestjs/common';
import * as htmlParser from 'node-html-parser';
import { DI } from '@/di-symbols.js';
import type { Config } from '@/config.js';
import { intersperse } from '@/misc/prelude/array.js';
import { normalizeForSearch } from '@/misc/normalize-for-search.js';
import type { IMentionedRemoteUsers } from '@/models/Note.js';
import { bindThis } from '@/decorators.js';
import { escapeHtml } from '@/misc/escape-html.js';
import type * as mfm from 'mfm-js';
const urlRegex = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+/;
const urlRegexFull = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+$/;
@Injectable()
export class MfmService {
constructor(
@Inject(DI.config)
private config: Config,
) {
}
@bindThis
public fromHtml(html: string, hashtagNames?: string[]): string {
// some AP servers like Pixelfed use br tags as well as newlines
html = html.replace(/<br\s?\/?>\r?\n/gi, '\n');
const normalizedHashtagNames = hashtagNames == null ? undefined : new Set<string>(hashtagNames.map(x => normalizeForSearch(x)));
const doc = htmlParser.parse(`<div>${html}</div>`);
let text = '';
for (const n of doc.childNodes) {
analyze(n);
}
return text.trim();
function getText(node: htmlParser.Node): string {
if (node instanceof htmlParser.TextNode) return node.textContent;
if (!(node instanceof htmlParser.HTMLElement)) return '';
if (node.tagName === 'BR') return '\n';
if (node.childNodes != null) {
return node.childNodes.map(n => getText(n)).join('');
}
return '';
}
function analyzeChildren(childNodes: htmlParser.Node[] | null): void {
if (childNodes != null) {
for (const n of childNodes) {
analyze(n);
}
}
}
function analyze(node: htmlParser.Node) {
if (node instanceof htmlParser.TextNode) {
text += node.textContent;
return;
}
// Skip comment or document type node
if (!(node instanceof htmlParser.HTMLElement)) {
return;
}
switch (node.tagName) {
case 'BR': {
text += '\n';
break;
}
case 'A': {
const txt = getText(node);
const rel = node.attributes.rel;
const href = node.attributes.href;
// ハッシュタグ
if (normalizedHashtagNames && href != null && normalizedHashtagNames.has(normalizeForSearch(txt))) {
text += txt;
// メンション
} else if (txt.startsWith('@') && !(rel != null && rel.startsWith('me '))) {
const part = txt.split('@');
if (part.length === 2 && href) {
//#region ホスト名部分が省略されているので復元する
const acct = `${txt}@${(new URL(href)).hostname}`;
text += acct;
//#endregion
} else if (part.length === 3) {
text += txt;
}
// その他
} else {
const generateLink = () => {
if (!href && !txt) {
return '';
}
if (!href) {
return txt;
}
if (!txt || txt === href) { // #6383: Missing text node
if (href.match(urlRegexFull)) {
return href;
} else {
return `<${href}>`;
}
}
if (href.match(urlRegex) && !href.match(urlRegexFull)) {
return `[${txt}](<${href}>)`; // #6846
} else {
return `[${txt}](${href})`;
}
};
text += generateLink();
}
break;
}
case 'H1': {
text += '【';
analyzeChildren(node.childNodes);
text += '】\n';
break;
}
case 'B':
case 'STRONG': {
text += '**';
analyzeChildren(node.childNodes);
text += '**';
break;
}
case 'SMALL': {
text += '<small>';
analyzeChildren(node.childNodes);
text += '</small>';
break;
}
case 'S':
case 'DEL': {
text += '~~';
analyzeChildren(node.childNodes);
text += '~~';
break;
}
case 'I':
case 'EM': {
text += '<i>';
analyzeChildren(node.childNodes);
text += '</i>';
break;
}
case 'RUBY': {
let ruby: [string, string][] = [];
for (const child of node.childNodes) {
if ((child instanceof htmlParser.TextNode) && !/\s|\[|\]/.test(child.textContent)) {
ruby.push([child.textContent, '']);
continue;
}
if (!(child instanceof htmlParser.HTMLElement)) continue;
if (child.tagName === 'RP') {
continue;
}
if (child.tagName === 'RT' && ruby.length > 0) {
const rt = getText(child);
if (/\s|\[|\]/.test(rt)) {
// If any space is included in rt, it is treated as a normal text
ruby = [];
analyzeChildren(node.childNodes);
break;
} else {
ruby.at(-1)![1] = rt;
continue;
}
}
// If any other element is included in ruby, it is treated as a normal text
ruby = [];
analyzeChildren(node.childNodes);
break;
}
for (const [base, rt] of ruby) {
text += `$[ruby ${base} ${rt}]`;
}
break;
}
// block code (<pre><code>)
case 'PRE': {
if (node.childNodes.length === 1 && (node.childNodes[0] instanceof htmlParser.HTMLElement) && node.childNodes[0].tagName === 'CODE') {
text += '\n```\n';
text += getText(node.childNodes[0]);
text += '\n```\n';
} else if (node.childNodes.length === 1 && (node.childNodes[0] instanceof htmlParser.TextNode) && node.childNodes[0].textContent.startsWith('<code>') && node.childNodes[0].textContent.endsWith('</code>')) {
text += '\n```\n';
text += node.childNodes[0].textContent.slice(6, -7);
text += '\n```\n';
} else {
analyzeChildren(node.childNodes);
}
break;
}
// inline code (<code>)
case 'CODE': {
text += '`';
analyzeChildren(node.childNodes);
text += '`';
break;
}
case 'BLOCKQUOTE': {
const t = getText(node);
if (t) {
text += '\n> ';
text += t.split('\n').join('\n> ');
}
break;
}
case 'P':
case 'H2':
case 'H3':
case 'H4':
case 'H5':
case 'H6': {
text += '\n\n';
analyzeChildren(node.childNodes);
break;
}
// other block elements
case 'DIV':
case 'HEADER':
case 'FOOTER':
case 'ARTICLE':
case 'LI':
case 'DT':
case 'DD': {
text += '\n';
analyzeChildren(node.childNodes);
break;
}
default: // includes inline elements
{
analyzeChildren(node.childNodes);
break;
}
}
}
}
@bindThis
public toHtml(nodes: mfm.MfmNode[] | null, mentionedRemoteUsers: IMentionedRemoteUsers = [], extraHtml: string | null = null) {
if (nodes == null) {
return null;
}
function toHtml(children?: mfm.MfmNode[]): string {
if (children == null) return '';
return children.map(x => handlers[x.type](x)).join('');
}
function fnDefault(node: mfm.MfmFn) {
return `<i>${toHtml(node.children)}</i>`;
}
const handlers = {
bold: (node) => {
return `<b>${toHtml(node.children)}</b>`;
},
small: (node) => {
return `<small>${toHtml(node.children)}</small>`;
},
strike: (node) => {
return `<del>${toHtml(node.children)}</del>`;
},
italic: (node) => {
return `<i>${toHtml(node.children)}</i>`;
},
fn: (node) => {
switch (node.props.name) {
case 'unixtime': {
const text = node.children[0].type === 'text' ? node.children[0].props.text : '';
try {
const date = new Date(parseInt(text, 10) * 1000);
return `<time datetime="${escapeHtml(date.toISOString())}">${escapeHtml(date.toISOString())}</time>`;
} catch (_) {
return fnDefault(node);
}
}
case 'ruby': {
if (node.children.length === 1) {
const child = node.children[0];
const text = child.type === 'text' ? child.props.text : '';
// ruby未対応のHTMLサニタイザーを通したときにルビが「対象テキストルビテキスト」にフォールバックするようにする
return `<ruby>${escapeHtml(text.split(' ')[0])}<rp>(</rp><rt>${escapeHtml(text.split(' ')[1])}</rt><rp>)</rp></ruby>`;
} else {
const rt = node.children.at(-1);
if (!rt) {
return fnDefault(node);
}
const text = rt.type === 'text' ? rt.props.text : '';
// ruby未対応のHTMLサニタイザーを通したときにルビが「対象テキストルビテキスト」にフォールバックするようにする
return `<ruby>${toHtml(node.children.slice(0, node.children.length - 1))}<rp>(</rp><rt>${escapeHtml(text.trim())}</rt><rp>)</rp></ruby>`;
}
}
default: {
return fnDefault(node);
}
}
},
blockCode: (node) => {
return `<pre><code>${escapeHtml(node.props.code)}</code></pre>`;
},
center: (node) => {
return `<div style="text-align: center;">${toHtml(node.children)}</div>`;
},
emojiCode: (node) => {
return `\u200B:${escapeHtml(node.props.name)}:\u200B`;
},
unicodeEmoji: (node) => {
return node.props.emoji;
},
hashtag: (node) => {
return `<a href="${escapeHtml(`${this.config.url}/tags/${encodeURIComponent(node.props.hashtag)}`)}" rel="tag">#${escapeHtml(node.props.hashtag)}</a>`;
},
inlineCode: (node) => {
return `<code>${escapeHtml(node.props.code)}</code>`;
},
mathInline: (node) => {
return `<code>${escapeHtml(node.props.formula)}</code>`;
},
mathBlock: (node) => {
return `<pre><code>${escapeHtml(node.props.formula)}</code></pre>`;
},
link: (node) => {
try {
const url = new URL(node.props.url);
return `<a href="${escapeHtml(url.href)}">${toHtml(node.children)}</a>`;
} catch (_) {
return `[${toHtml(node.children)}](${escapeHtml(node.props.url)})`;
}
},
mention: (node) => {
const { username, host, acct } = node.props;
const remoteUserInfo = mentionedRemoteUsers.find(remoteUser => remoteUser.username.toLowerCase() === username.toLowerCase() && remoteUser.host?.toLowerCase() === host?.toLowerCase());
const href = remoteUserInfo
? (remoteUserInfo.url ? remoteUserInfo.url : remoteUserInfo.uri)
: `${this.config.url}/${acct.endsWith(`@${this.config.url}`) ? acct.substring(0, acct.length - this.config.url.length - 1) : acct}`;
try {
const url = new URL(href);
return `<a href="${escapeHtml(url.href)}" class="u-url mention">${escapeHtml(acct)}</a>`;
} catch (_) {
return escapeHtml(acct);
}
},
quote: (node) => {
return `<blockquote>${toHtml(node.children)}</blockquote>`;
},
text: (node) => {
if (!node.props.text.match(/[\r\n]/)) {
return escapeHtml(node.props.text);
}
let html = '';
const lines = node.props.text.split(/\r\n|\r|\n/).map(x => escapeHtml(x));
for (const x of intersperse<FIXME | 'br'>('br', lines)) {
html += x === 'br' ? '<br />' : x;
}
return html;
},
url: (node) => {
try {
const url = new URL(node.props.url);
return `<a href="${escapeHtml(url.href)}">${escapeHtml(node.props.url)}</a>`;
} catch (_) {
return escapeHtml(node.props.url);
}
},
search: (node) => {
return `<a href="${escapeHtml(`https://www.google.com/search?q=${encodeURIComponent(node.props.query)}`)}">${escapeHtml(node.props.content)}</a>`;
},
plain: (node) => {
return `<span>${toHtml(node.children)}</span>`;
},
} satisfies { [K in mfm.MfmNode['type']]: (node: mfm.NodeType<K>) => string } as { [K in mfm.MfmNode['type']]: (node: mfm.MfmNode) => string };
return `${toHtml(nodes)}${extraHtml ?? ''}`;
}
}