Skip to content

Commit

Permalink
Merge pull request #2223 from cardstack/cs-8047-user-is-not-able-to-s…
Browse files Browse the repository at this point in the history
…end-html-code-to-the-assistant

AI message: Escape html code sent by user
  • Loading branch information
jurgenwerk authored Mar 1, 2025
2 parents 04afb6f + 4ffa080 commit f961e94
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 11 deletions.
4 changes: 3 additions & 1 deletion packages/host/app/services/matrix-service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import {
getPatchTool,
} from '@cardstack/runtime-common/helpers/ai';

import { escapeHtmlOutsideCodeBlocks } from '@cardstack/runtime-common/helpers/html';
import { getMatrixUsername } from '@cardstack/runtime-common/matrix-client';

import {
Expand Down Expand Up @@ -716,7 +717,8 @@ export default class MatrixService extends Service {
clientGeneratedId = uuidv4(),
context?: OperatorModeContext,
): Promise<void> {
let html = markdownToHtml(body);
let html = markdownToHtml(escapeHtmlOutsideCodeBlocks(body));

let tools: Tool[] = [getSearchTool()];
let attachedOpenCards: CardDef[] = [];
let submode = context?.submode;
Expand Down
13 changes: 12 additions & 1 deletion packages/matrix/tests/messages.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,9 @@ test.describe('Room messages', () => {
);
});

test(`it can send a markdown message`, async ({ page }) => {
test(`it can send a markdown message, and escape html tags`, async ({
page,
}) => {
await login(page, 'user1', 'pass', { url: appURL });
let room1 = await getRoomId(page);
await sendMessage(page, room1, 'message with _style_');
Expand All @@ -143,6 +145,15 @@ test.describe('Room messages', () => {
await expect(
page.locator(`[data-test-message-idx="0"] .content em`),
).toContainText('style');

await sendMessage(page, room1, '<h1>Hello</h1> <template>Hello</template>');
// this is to assert that the html tags are escaped
let innerHTML = await page
.locator('[data-test-message-idx="1"] .content')
.evaluate((el) => el.innerHTML);
expect(innerHTML).toContain(
'&lt;h1&gt;Hello&lt;/h1&gt; &lt;template&gt;Hello&lt;/template&gt;',
);
});

test(`it can create a room specific pending message`, async ({ page }) => {
Expand Down
62 changes: 62 additions & 0 deletions packages/runtime-common/helpers/html.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
export function escapeHtmlTags(html: string) {
// For example, html can be <pre><code><h1>Hello</h1></code></pre>
// We want to escape the <h1>Hello</h1> so that it is rendered as
// <pre><code>&lt;h1&gt;Hello&lt;/h1&gt;</code></pre>, otherwise the h1 will
// be rendered as a real header, not code (same applies for other html tags, such as <template>, <style>, ...)
return html.replace(/</g, '&lt;').replace(/>/g, '&gt;');
}

// Example input:
// Hey can you teach mo how to use the <h1> tag? Is this correct?
// ```html
// <h1>Hello</h1>
// ```
//
// Output:
// Hey can you teach mo how to use the &lt;h1&gt; tag? Is this correct?
// ```html
// <h1>Hello</h1>
// ```
export function escapeHtmlOutsideCodeBlocks(text?: string) {
if (text === undefined) {
return text;
}

let matches = [];
let codeBlockRegex = /```[\s\S]*?```/g;

let match;
while ((match = codeBlockRegex.exec(text)) !== null) {
matches.push({
content: match[0],
start: match.index,
end: match.index + match[0].length,
});
}

if (matches.length === 0) {
return escapeHtmlTags(text);
}

let result = '';
let lastIndex = 0;

for (let block of matches) {
if (block.start > lastIndex) {
let textBeforeBlock = text.substring(lastIndex, block.start);
result += escapeHtmlTags(textBeforeBlock);
}

result += block.content;

lastIndex = block.end;
}

// Process any text after the last code block
if (lastIndex < text.length) {
let textAfterLastBlock = text.substring(lastIndex);
result += escapeHtmlTags(textAfterLastBlock);
}

return result;
}
11 changes: 2 additions & 9 deletions packages/runtime-common/marked-sync.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { marked } from 'marked';
import { sanitizeHtml } from './dompurify-runtime';
import { simpleHash } from '.';
import { escapeHtmlTags } from './helpers/html';

const CODEBLOCK_KEY_PREFIX = 'codeblock_';

Expand All @@ -22,7 +23,7 @@ export function markedSync(markdown: string) {
// also note that since we are in common, we don't have ember-window-mock
// available to us.
globalThis.localStorage?.setItem(id, code);
return `<pre id="${id}" class="language-${language}" data-codeblock="${language}">${escapeHtmlInPreTags(code)}</pre></div>`;
return `<pre id="${id}" class="language-${language}" data-codeblock="${language}">${escapeHtmlTags(code)}</pre></div>`;
},
},
})
Expand All @@ -32,11 +33,3 @@ export function markedSync(markdown: string) {
export function markdownToHtml(markdown: string | null | undefined): string {
return markdown ? sanitizeHtml(markedSync(markdown)) : '';
}

function escapeHtmlInPreTags(html: string) {
// For example, html can be <pre><code><h1>Hello</h1></code></pre>
// We want to escape the <h1>Hello</h1> so that it is rendered as
// <pre><code>&lt;h1&gt;Hello&lt;/h1&gt;</code></pre>, otherwise the h1 will
// be rendered as a real header, not code (same applies for other html tags, such as <template>, <style>, ...)
return html.replace(/</g, '&lt;').replace(/>/g, '&gt;');
}

0 comments on commit f961e94

Please sign in to comment.