Skip to content

Commit

Permalink
fix: citations breaking leading markdown (#721)
Browse files Browse the repository at this point in the history
  • Loading branch information
tomtobac authored Aug 21, 2024
1 parent 3db6a4d commit 031e240
Show file tree
Hide file tree
Showing 5 changed files with 131 additions and 14 deletions.
4 changes: 2 additions & 2 deletions src/interfaces/assistants_web/src/app/(main)/(chat)/Chat.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import { useConversation } from '@/hooks/conversation';
import { useListTools } from '@/hooks/tools';
import { useCitationsStore, useConversationStore, useParamsStore } from '@/stores';
import { OutputFiles } from '@/stores/slices/citationsSlice';
import { createStartEndKey, mapHistoryToMessages } from '@/utils';
import { createStartEndKey, fixCitationsLeadingMarkdown, mapHistoryToMessages } from '@/utils';
import { parsePythonInterpreterToolFields } from '@/utils/tools';

const Chat: React.FC<{ agentId?: string; conversationId?: string }> = ({
Expand Down Expand Up @@ -85,7 +85,7 @@ const Chat: React.FC<{ agentId?: string; conversationId?: string }> = ({
}
}
});
message.citations?.forEach((citation) => {
fixCitationsLeadingMarkdown(message.citations, message.text)?.forEach((citation) => {
const startEndKey = createStartEndKey(citation.start ?? 0, citation.end ?? 0);
const documents = citation.document_ids?.map((id) => documentsMap[id]) ?? [];
addCitation(message.generation_id ?? '', startEndKey, documents);
Expand Down
7 changes: 4 additions & 3 deletions src/interfaces/assistants_web/src/hooks/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import {
} from '@/types/message';
import {
createStartEndKey,
fixCitationsLeadingMarkdown,
fixMarkdownImagesInText,
isGroundingOn,
replaceTextWithCitations,
Expand Down Expand Up @@ -378,10 +379,10 @@ export const useChat = (config?: { onSend?: (msg: string) => void }) => {
case StreamEvent.CITATION_GENERATION: {
const data = eventData.data;
const newCitations = [...(data?.citations ?? [])];
newCitations.sort((a, b) => (a.start ?? 0) - (b.start ?? 0));
citations.push(...newCitations);
const fixedCitations = fixCitationsLeadingMarkdown(newCitations, botResponse);
citations.push(...fixedCitations);
citations.sort((a, b) => (a.start ?? 0) - (b.start ?? 0));
saveCitations(generationId, newCitations, documentsMap);
saveCitations(generationId, fixedCitations, documentsMap);

setStreamingMessage({
type: MessageType.BOT,
Expand Down
102 changes: 101 additions & 1 deletion src/interfaces/assistants_web/src/utils/citations.test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { describe, expect, test } from 'vitest';

import { Citation } from '@/cohere-client';
import { replaceTextWithCitations } from '@/utils/citations';
import { fixCitationsLeadingMarkdown, replaceTextWithCitations } from '@/utils/citations';

describe('replaceTextWithCitations', () => {
test('should replace text with citations', () => {
Expand Down Expand Up @@ -83,4 +83,104 @@ describe('replaceTextWithCitations', () => {
'Abra :cite[![test](https://test.com)]{generationId="12345" start="5" end="31"} :cite[Kadabra]{generationId="12345" start="32" end="39"}'
);
});

test('should allow citations as markdown elements', () => {
const citations: Citation[] = [
{
start: 10,
end: 18,
text: '**test**',
document_ids: ['12345'],
},
];
const text = 'This is a **test** text';
const generationId = '12345';
const result = replaceTextWithCitations(text, citations, generationId);
expect(result).toBe('This is a :cite[**test**]{generationId="12345" start="10" end="18"} text');
});
});

describe('fixCitationsLeadingMarkdown', () => {
test('should fix leading markdown citations breaking markdown', () => {
const citations: Citation[] = [
{
text: '`ENVIRONMENT DIVISION',
start: 4,
end: 25,
document_ids: ['111111'],
},
{
text: 'after the',
start: 46,
end: 55,
document_ids: ['111111'],
},
{
text: '`IDENTIFICATION DIVISION',
start: 56,
end: 80,
document_ids: ['111111'],
},
{
text: '`CONFIGURATION SECTION',
start: 87,
end: 109,
document_ids: ['111111'],
},
{
text: '`SPECIAL-NAMES',
start: 115,
end: 129,
document_ids: ['111111'],
},
{
text: 'within the',
start: 150,
end: 160,
document_ids: ['111111'],
},
];
const text =
'The `ENVIRONMENT DIVISION` should be included after the `IDENTIFICATION DIVISION`. The `CONFIGURATION SECTION` and `SPECIAL-NAMES` should be included within the `ENVIRONMENT DIVISION`.';
const result = fixCitationsLeadingMarkdown(citations, text);

expect(result).toStrictEqual([
{
text: '`ENVIRONMENT DIVISION`',
start: 4,
end: 26,
document_ids: ['111111'],
},
{
text: 'after the',
start: 46,
end: 55,
document_ids: ['111111'],
},
{
text: '`IDENTIFICATION DIVISION`',
start: 56,
end: 81,
document_ids: ['111111'],
},
{
text: '`CONFIGURATION SECTION`',
start: 87,
end: 110,
document_ids: ['111111'],
},
{
text: '`SPECIAL-NAMES`',
start: 115,
end: 130,
document_ids: ['111111'],
},
{
text: 'within the',
start: 150,
end: 160,
document_ids: ['111111'],
},
]);
});
});
22 changes: 21 additions & 1 deletion src/interfaces/assistants_web/src/utils/citations.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,26 @@ export const fixMarkdownImagesInText = (text: string) => {

const formatter = new Intl.ListFormat('en', { style: 'long', type: 'conjunction' });

export const fixCitationsLeadingMarkdown = (citations: Citation[], originalText: string) => {
const citationsCopy = [...citations];
const markdownFixList = ['`', '*', '**'];

for (let citation of citationsCopy) {
for (const markdown of markdownFixList) {
if (citation.text.startsWith(markdown)) {
const canWeIncludeNextCharacterInTheCitation =
originalText.charAt(citation.end) === markdown;
if (canWeIncludeNextCharacterInTheCitation) {
citation.end += markdown.length;
citation.text = citation.text + markdown;
}
}
}
}

return citationsCopy;
};

/**
* Replace text string with citations following the format:
* :cite[<text>]{generationId="<generationId>" start="<startIndex>" end"<endIndex>"}
Expand All @@ -26,10 +46,10 @@ export const replaceTextWithCitations = (
) => {
if (!citations.length || !generationId) return text;
let replacedText = text;

let lengthDifference = 0; // Track the cumulative length difference
let notFoundReferences: string[] = [];
let carryOver = 0;

citations
.filter((citation) => citation.document_ids.length)
.forEach(({ start = 0, end = 0, text: citationText }, index) => {
Expand Down
10 changes: 3 additions & 7 deletions src/interfaces/assistants_web/src/utils/conversation.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { Message, MessageAgent } from '@/cohere-client';
import { BotState, ChatMessage, FulfilledMessage, MessageType, UserMessage } from '@/types/message';
import { replaceTextWithCitations } from '@/utils/citations';
import { fixCitationsLeadingMarkdown, replaceTextWithCitations } from '@/utils/citations';
import { replaceCodeBlockWithIframe } from '@/utils/preview';

/**
Expand Down Expand Up @@ -47,7 +47,7 @@ export const mapHistoryToMessages = (history?: Message[]): UserOrBotMessage[] =>
originalText: message.text ?? '',
text: replaceTextWithCitations(
replaceCodeBlockWithIframe(message.text) ?? '',
message.citations ?? [],
fixCitationsLeadingMarkdown(message.citations ?? [], message.text),
message.generation_id ?? ''
),
generationId: message.generation_id ?? '',
Expand All @@ -69,11 +69,7 @@ export const mapHistoryToMessages = (history?: Message[]): UserOrBotMessage[] =>
} else {
messages.push({
type: MessageType.USER,
text: replaceTextWithCitations(
message.text ?? '',
message.citations ?? [],
message.generation_id ?? ''
),
text: message.text,
files: message.files,
});
}
Expand Down

0 comments on commit 031e240

Please sign in to comment.