Skip to content

Commit

Permalink
fix: Filter xml markdown fences around changes
Browse files Browse the repository at this point in the history
Gemini likes to ignore our instructions to not put changes in fences;
rather than fight its primal urges, let's just strip them out.
  • Loading branch information
dividedmind committed Oct 29, 2024
1 parent c9e71e3 commit f41b7a1
Show file tree
Hide file tree
Showing 4 changed files with 152 additions and 6 deletions.
2 changes: 1 addition & 1 deletion packages/navie/src/agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ export type AgentResponse = {
};

export interface Agent {
filter?: (stream: AsyncIterable<string>) => AsyncIterable<string>;
readonly filter?: (stream: AsyncIterable<string>) => AsyncIterable<string>;

perform(options: AgentOptions, tokensAvailable: () => number): Promise<AgentResponse | void>;

Expand Down
59 changes: 59 additions & 0 deletions packages/navie/src/agents/generate-agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -210,4 +210,63 @@ export default class GenerateAgent implements Agent {
)
);
}

filter = filterXmlFencesAroundChangesets;
}

/**
* Split haystack on the first occurence of needle (or partial needle at the suffix)
* @example splitOn("abc---def", "---") // ["abc", "---", "def"]
* @example splitOn("abc--", "---") // ["abc", "--", ""]
* @example splitOn("abc", "---") // ["abc", "", ""]
* @example splitOn("abc---def---ghi", "---") // ["abc", "---", "def---ghi"]
* @example splitOn("abc---def---ghi", "--") // ["abc", "--", "-def---ghi"]
* @example splitOn("abc-def-ghi", "---") // ["abc-def-ghi", "", ""]
* @example splitOn("abc-def-ghi", "def") // ["abc-", "def", "-ghi"]
* @param haystack the string to split
* @param needle the string to split on
* @returns an array of strings
*/
export function splitOn(haystack: string, needle: string): [string, string, string] {
let needleIdx = 0;
let haystackIdx = 0;
while (needleIdx < needle.length && haystackIdx < haystack.length) {
if (haystack[haystackIdx] === needle[needleIdx]) needleIdx++;
else needleIdx = 0;
haystackIdx++;
}
return [
haystack.slice(0, haystackIdx - needleIdx),
haystack.slice(haystackIdx - needleIdx, haystackIdx),
haystack.slice(haystackIdx),
];
}

// Some models (looking at you Gemini) REALLY like markdown fences.
// Let's make sure to filter them out around the changesets.
export async function* filterXmlFencesAroundChangesets(
stream: AsyncIterable<string>
): AsyncIterable<string> {
let buffer = '';
let outside = true;
for await (const chunk of stream) {
buffer += chunk;
while (buffer) {
const [before, fence, after] = splitOn(
buffer,
outside ? '```xml\n<change>' : '</change>\n```'
);
yield before;
if (fence) {
if (after) {
yield outside ? '<change>' : '</change>';
outside = !outside;
} else {
buffer = fence;
break;
}
}
buffer = after;
}
}
}
4 changes: 3 additions & 1 deletion packages/navie/src/agents/test-agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import ContextService from '../services/context-service';
import FileChangeExtractorService from '../services/file-change-extractor-service';
import FileContentFetcher from '../services/file-content-fetcher';

import { GENERATE_AGENT_FORMAT } from './generate-agent';
import { filterXmlFencesAroundChangesets, GENERATE_AGENT_FORMAT } from './generate-agent';

export const TEST_AGENT_PROMPT = `**Task: Generate a Test Case**
Expand Down Expand Up @@ -91,4 +91,6 @@ export default class TestAgent implements Agent {
)
);
}

filter = filterXmlFencesAroundChangesets;
}
93 changes: 89 additions & 4 deletions packages/navie/test/agents/generate-agent.spec.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
/* eslint-disable @typescript-eslint/unbound-method */
import InteractionHistory from '../../src/interaction-history';
import ApplyContextService from '../../src/services/apply-context-service';
import VectorTermsService from '../../src/services/vector-terms-service';
import LookupContextService from '../../src/services/lookup-context-service';
import { AgentOptions } from '../../src/agent';
import GenerateAgent, { splitOn } from '../../src/agents/generate-agent';
import { suggestsVectorTerms } from '../fixture';
import GenerateAgent from '../../src/agents/generate-agent';
import { UserOptions } from '../../src/lib/parse-options';
import ContextService from '../../src/services/context-service';
import FileChangeExtractorService from '../../src/services/file-change-extractor-service';
Expand All @@ -18,7 +19,7 @@ describe('@generate agent', () => {
let contextService: ContextService;
let tokensAvailable: number;

beforeEach(async () => {
beforeEach(() => {
tokensAvailable = 1000;
interactionHistory = new InteractionHistory();
lookupContextService = {
Expand Down Expand Up @@ -56,8 +57,8 @@ describe('@generate agent', () => {
[
{
directory: 'twitter',
appmapConfig: { language: 'ruby' } as unknown as any,
appmapStats: { numAppMaps: 1 } as unknown as any,
appmapConfig: { language: 'ruby' } as never,
appmapStats: { numAppMaps: 1 } as never,
},
]
);
Expand Down Expand Up @@ -98,4 +99,88 @@ describe('@generate agent', () => {
]);
});
});

describe('splitOn', () => {
it('splits on exact match', () => {
const result = splitOn('abc---def', '---');
expect(result).toEqual(['abc', '---', 'def']);
});

it('splits on partial match at suffix', () => {
const result = splitOn('abc--', '---');
expect(result).toEqual(['abc', '--', '']);
});

it('splits when needle is not found', () => {
const result = splitOn('abc', '---');
expect(result).toEqual(['abc', '', '']);
});

it('splits on first occurrence of needle', () => {
const result = splitOn('abc---def---ghi', '---');
expect(result).toEqual(['abc', '---', 'def---ghi']);
});

it('splits on match within string', () => {
const result = splitOn('abc---def---ghi', '--');
expect(result).toEqual(['abc', '--', '-def---ghi']);
});

it('returns entire string if needle is not found', () => {
const result = splitOn('abc-def-ghi', '---');
expect(result).toEqual(['abc-def-ghi', '', '']);
});

it('splits on exact match within string', () => {
const result = splitOn('abc-def-ghi', 'def');
expect(result).toEqual(['abc-', 'def', '-ghi']);
});
});

describe('#filter', () => {
it('filters out markdown fences around changesets', async () => {
const agent = buildAgent();
const input = [
'Some initial text\n',
'```xml\n<change>',
'<file change-number-for-this-file="1">/path/to/file</file>',
'</change>\n```',
'Some final text\n',
].join('\n');

const expectedOutput = [
'Some initial text\n',
'<change>',
'<file change-number-for-this-file="1">/path/to/file</file>',
'</change>',
'Some final text\n',
].join('\n');

const result = [];
for await (const chunk of agent.filter(streamStringByChunk(input))) result.push(chunk);

expect(result.join('')).toEqual(expectedOutput);
});

it('handles input without markdown fences correctly', async () => {
const agent = buildAgent();
const input = [
'Some initial text\n',
'<change>',
'<file change-number-for-this-file="1">/path/to/file</file>',
'</change>',
'Some final text\n',
].join('\n');

const result = [];
for await (const chunk of agent.filter(streamStringByChunk(input))) result.push(chunk);

expect(result.join('')).toEqual(input);
});
});
});

// eslint-disable-next-line @typescript-eslint/require-await
async function* streamStringByChunk(str: string): AsyncIterable<string> {
for (let i = 0; i < str.length; i += 3) yield str.slice(i, i + 3);
}

0 comments on commit f41b7a1

Please sign in to comment.