diff --git a/packages/cli/src/rpc/explain/collect-context.ts b/packages/cli/src/rpc/explain/collect-context.ts index 664a456ca5..83d2224857 100644 --- a/packages/cli/src/rpc/explain/collect-context.ts +++ b/packages/cli/src/rpc/explain/collect-context.ts @@ -86,9 +86,8 @@ export function buildContextRequest( request.includePatterns = filters.include.map((pattern) => new RegExp(pattern)); if (filters?.itemTypes) request.includeTypes = filters.itemTypes.map((type) => type); if (filters?.locations) { - request.locations = filters.locations - .map((location) => Location.parse(location)) - .filter(Boolean) as Location[]; + // eslint-disable-next-line @typescript-eslint/unbound-method + request.locations = filters.locations.map(Location.parse); warn(`Parsed locations: ${request.locations.map((loc) => loc.toString()).join(', ')}`); } @@ -109,13 +108,18 @@ export default async function collectContext( sourceDirectories: string[], charLimit: number, vectorTerms: string[], - request: ContextRequest + request: ContextRequest, + explicitFiles: string[] = [] ): Promise<{ searchResponse: SearchRpc.SearchResponse; context: ContextV2.ContextResponse }> { let searchResponse: SearchRpc.SearchResponse = { results: [], numResults: 0 }; const context: ContextV2.ContextResponse = []; if (request.locations && request.locations.length > 0) { - const locationResult = await collectLocationContext(sourceDirectories, request.locations); + const locationResult = await collectLocationContext( + sourceDirectories, + request.locations, + explicitFiles + ); context.push(...locationResult); } diff --git a/packages/cli/src/rpc/explain/collect-location-context.ts b/packages/cli/src/rpc/explain/collect-location-context.ts index 31ee7616ab..ad887edada 100644 --- a/packages/cli/src/rpc/explain/collect-location-context.ts +++ b/packages/cli/src/rpc/explain/collect-location-context.ts @@ -1,9 +1,12 @@ -import { readFile } from 'fs/promises'; import { warn } from 'console'; -import { isAbsolute, join } from 'path'; +import { readdir, readFile, stat } from 'node:fs/promises'; +import { basename, dirname, isAbsolute, join } from 'node:path'; + import { ContextV2 } from '@appland/navie'; +import { isBinaryFile } from '@appland/search'; + +import { verbose } from '../../utils'; import Location from './location'; -import { exists, isFile, verbose } from '../../utils'; export type LocationContextRequest = { sourceDirectories: string[]; @@ -24,16 +27,26 @@ export type LocationContextRequest = { */ export default async function collectLocationContext( sourceDirectories: string[], - locations: Location[] + locations: Location[], + explicitFiles: string[] = [] ): Promise { const result: ContextV2.ContextResponse = []; - const candidateLocations = new Array<{ location: Location; directory?: string }>(); + const candidateLocations = new Array<{ location: Location; directory: string }>(); for (const location of locations) { const { path } = location; if (isAbsolute(path)) { const directory = sourceDirectories.find((dir) => path.startsWith(dir)); - candidateLocations.push({ location, directory }); + if (directory) { + location.path = location.path.slice(directory.length + 1); + candidateLocations.push({ location, directory }); + } else if (explicitFiles.includes(path)) { + location.path = basename(path); + candidateLocations.push({ location, directory: dirname(path) }); + } else { + warn(`[location-context] Skipping location outside source directories: ${location.path}`); + continue; + } } else { for (const sourceDirectory of sourceDirectories) { candidateLocations.push({ location, directory: sourceDirectory }); @@ -55,12 +68,22 @@ export default async function collectLocationContext( else if (directory) pathTokens = [directory, location.path].filter(Boolean); const path = join(...pathTokens); - if (!(await exists(path))) { + const stats = await stat(path).catch(() => undefined); + if (!stats) { if (verbose()) warn(`[location-context] Skipping non-existent location: ${path}`); + // TODO: tell the client? continue; - } - if (!(await isFile(path))) { + } else if (stats.isDirectory()) { + result.push(await directoryContextItem(path, location, directory)); + continue; + } else if (!stats.isFile()) { if (verbose()) warn(`[location-context] Skipping non-file location: ${path}`); + // TODO: tell the client? + continue; + } + + if (isBinaryFile(path)) { + if (verbose()) warn(`[location-context] Skipping binary file: ${path}`); continue; } @@ -69,6 +92,7 @@ export default async function collectLocationContext( contents = await readFile(path, 'utf8'); } catch (e) { warn(`[location-context] Failed to read file: ${path}`); + // TODO: tell the client? continue; } @@ -90,3 +114,34 @@ export default async function collectLocationContext( return result; } + +async function directoryContextItem( + path: string, + location: Location, + directory: string +): Promise { + const depth = Number(location.lineRange) || 0; + const entries: string[] = []; + for await (const entry of listDirectory(path, depth)) entries.push(entry); + return { + type: ContextV2.ContextItemType.DirectoryListing, + content: entries.join('\n'), + location: location.toString(), + directory, + }; +} + +async function* listDirectory(path: string, depth: number): AsyncGenerator { + const entries = await readdir(path, { withFileTypes: true }); + for (const entry of entries) { + const entryPath = join(path, entry.name); + if (entry.isDirectory()) { + if (depth > 0) { + yield `${entry.name}/`; + for await (const subentry of listDirectory(entryPath, depth - 1)) yield `\t${subentry}`; + } else yield `${entry.name}/ (${(await readdir(entryPath)).length} entries)`; + } else if (entry.isFile()) { + yield entry.name; + } + } +} diff --git a/packages/cli/src/rpc/explain/collect-snippets.ts b/packages/cli/src/rpc/explain/collect-snippets.ts index d21060da80..c8f50e091b 100644 --- a/packages/cli/src/rpc/explain/collect-snippets.ts +++ b/packages/cli/src/rpc/explain/collect-snippets.ts @@ -22,7 +22,12 @@ export default function collectSnippets( const buildLocation = (result: SnippetSearchResult) => { const snippetId = parseFileChunkSnippetId(result.snippetId); const { filePath, startLine } = snippetId; - return [filePath, startLine].filter(Boolean).join(':'); + let location = filePath; + if (startLine) { + const endLine = startLine + result.content.split('\n').length - 1; + location += `:${startLine}-${endLine}`; + } + return location; }; return snippets.map((snippet) => ({ diff --git a/packages/cli/src/rpc/explain/explain.ts b/packages/cli/src/rpc/explain/explain.ts index 8aba883447..1b8a0c7a6b 100644 --- a/packages/cli/src/rpc/explain/explain.ts +++ b/packages/cli/src/rpc/explain/explain.ts @@ -1,3 +1,5 @@ +import makeDebug from 'debug'; + import { AI, ConversationThread, @@ -28,6 +30,8 @@ import handleReview from './review'; const searchStatusByUserMessageId = new Map(); +const debug = makeDebug('appmap:explain:rpc'); + export type SearchContextOptions = { tokenCount: number; vectorTerms: string[]; @@ -158,12 +162,17 @@ export class Explain extends EventEmitter { data ); + const explicitFiles = Array.isArray(this.codeSelection) + ? this.codeSelection.filter(UserContext.hasLocation).map((cs) => cs.location) + : []; + const searchResult = await collectContext( this.appmapDirectories.map((dir) => dir.directory), this.projectDirectories, charLimit, contextRequest.vectorTerms, - contextRequest.request + contextRequest.request, + explicitFiles ); this.status.searchResponse = searchResult.searchResponse; @@ -248,6 +257,8 @@ export async function explain( codeEditor: string | undefined, prompt: string | undefined ): Promise { + debug('Code selection: ', codeSelection); + const status: ExplainRpc.ExplainStatusResponse = { step: ExplainRpc.Step.NEW, threadId, diff --git a/packages/cli/src/rpc/explain/location.ts b/packages/cli/src/rpc/explain/location.ts index 6cc679ef2a..bb3e6e9719 100644 --- a/packages/cli/src/rpc/explain/location.ts +++ b/packages/cli/src/rpc/explain/location.ts @@ -1,13 +1,31 @@ +import { warn } from 'node:console'; + export default class Location { constructor(public path: string, public lineRange?: string) {} snippet(contents: string): string { - if (!this.lineRange) return contents; + if (!this.lineRange) { + if (contents.length > MAX_BYTES) { + // collect only as many COMPLETE lines as will fit + const lines = contents.split('\n'); + let bytes = 0; + let i = 0; + for (; i < lines.length; i++) { + bytes += lines[i].length + 1; + if (bytes > MAX_BYTES) break; + } + if (i === 0) i++; // at least one line + warn(`Snippet too large, showing only ${i} lines`); + // set the line range to reflect this + this.lineRange = `1-${i}`; + return lines.slice(0, i).join('\n'); + } else return contents; + } const [start, end] = this.lineRange.split('-').map(Number); const lines = contents.split('\n'); - const snippet = lines.slice(start - 1, end || lines.length); + const snippet = lines.slice(Math.max(start - 1, 0), end || lines.length); return snippet.join('\n'); } @@ -15,7 +33,7 @@ export default class Location { return this.lineRange ? `${this.path}:${this.lineRange}` : this.path; } - static parse(location: string): Location | undefined { + static parse(location: string): Location { const tokens = location.split(':'); if (tokens.length === 1) return new Location(tokens[0]); @@ -31,3 +49,7 @@ export default class Location { return new Location(path, lineRange); } } + +// Note this is somewhat of a tradeoff between speed and cost. +// The client can always request additional lines. +const MAX_BYTES = 20_000; diff --git a/packages/cli/src/rpc/explain/navie/navie-local.ts b/packages/cli/src/rpc/explain/navie/navie-local.ts index 4014a716cf..52ef2c6629 100644 --- a/packages/cli/src/rpc/explain/navie/navie-local.ts +++ b/packages/cli/src/rpc/explain/navie/navie-local.ts @@ -1,7 +1,8 @@ import { log, warn } from 'console'; import EventEmitter from 'events'; import { randomUUID } from 'crypto'; -import { ContextV2, Navie, Help, ProjectInfo, navie } from '@appland/navie'; + +import { ContextV2, Help, Navie, navie, ProjectInfo, UserContext } from '@appland/navie'; import INavie from './inavie'; import Telemetry from '../../../telemetry'; @@ -78,7 +79,7 @@ export default class LocalNavie extends EventEmitter implements INavie { async ask( threadId: string | undefined, question: string, - codeSelection?: string, + codeSelection?: UserContext.Context, prompt?: string ): Promise { if (!threadId) { diff --git a/packages/cli/tests/unit/rpc/explain/collect-context.spec.ts b/packages/cli/tests/unit/rpc/explain/collect-context.spec.ts index 79f3e3cfc0..a5dc0d4de8 100644 --- a/packages/cli/tests/unit/rpc/explain/collect-context.spec.ts +++ b/packages/cli/tests/unit/rpc/explain/collect-context.spec.ts @@ -106,7 +106,7 @@ describe('collect-context', () => { (collectLocationContext.default as jest.Mock).mockResolvedValue(['context1', 'context2']); const request: ContextRequest = { - locations: [Location.parse('location1')!, Location.parse('location2')!], + locations: [Location.parse('location1'), Location.parse('location2')], }; const result = await collectContext( ['dir1', 'dir2'], @@ -119,7 +119,8 @@ describe('collect-context', () => { expect(collectSearchContext.default).not.toHaveBeenCalled(); expect(collectLocationContext.default).toHaveBeenCalledWith( ['src1', 'src2'], - request.locations + request.locations, + [] ); expect(result.searchResponse.numResults).toBe(0); expect(result.context).toEqual(['context1', 'context2']); diff --git a/packages/cli/tests/unit/rpc/explain/collect-location-context.spec.ts b/packages/cli/tests/unit/rpc/explain/collect-location-context.spec.ts index 0466fc5f75..b98993a6f3 100644 --- a/packages/cli/tests/unit/rpc/explain/collect-location-context.spec.ts +++ b/packages/cli/tests/unit/rpc/explain/collect-location-context.spec.ts @@ -1,21 +1,18 @@ -import * as fs from 'fs/promises'; -import * as utils from '../../../../src/utils'; +import { type Dirent, type Stats } from 'node:fs'; +import * as fs from 'node:fs/promises'; + +import { isBinaryFile } from '@appland/search'; import Location from '../../../../src/rpc/explain/location'; import collectLocationContext from '../../../../src/rpc/explain/collect-location-context'; -jest.mock('fs/promises'); -// eslint-disable-next-line @typescript-eslint/no-unsafe-return -jest.mock('../../../../src/utils', () => ({ - ...jest.requireActual('../../../../src/utils'), - exists: jest.fn(), - isFile: jest.fn(), -})); +jest.mock('node:fs/promises'); +jest.mock('@appland/search'); describe('collectLocationContext', () => { const sourceDirectories = ['/src', '/lib']; - beforeEach(() => jest.resetAllMocks()); + afterEach(jest.resetAllMocks); describe('with empty locations', () => { it('handles empty locations', async () => { @@ -25,51 +22,131 @@ describe('collectLocationContext', () => { }); describe('with valid locations', () => { - const locations: Location[] = [ - { path: 'file1.js', snippet: (contents: string) => contents.slice(0, 10) }, - { path: '/src/file2.js', snippet: (contents: string) => contents.slice(0, 10) }, - { path: '/other/file3.js', snippet: (contents: string) => contents.slice(0, 10) }, - ]; + const locations = ['file1.js:1-1', '/src/file2.js', '/other/file3.js'].map(Location.parse); + const explicitFiles = ['/other/file3.js']; - const collect = async () => collectLocationContext(sourceDirectories, locations); + const collect = async () => collectLocationContext(sourceDirectories, locations, explicitFiles); - it('handles valid locations', async () => { - jest.spyOn(utils, 'exists').mockResolvedValue(true); - jest.spyOn(utils, 'isFile').mockResolvedValue(true); + const stat = jest.mocked(fs.stat); + beforeEach(() => { + stat.mockResolvedValue({ isDirectory: () => false, isFile: () => true } as Stats); jest.spyOn(fs, 'readFile').mockResolvedValue('file contents'); + }); + + it('includes explicitly named files even if outside source directories', async () => { + jest.mocked(isBinaryFile).mockReturnValue(false); + + expect(await collect()).toMatchInlineSnapshot(` + [ + { + "content": "file contents", + "directory": "/src", + "location": "file1.js:1-1", + "type": "code-snippet", + }, + { + "content": "file contents", + "directory": "/lib", + "location": "file1.js:1-1", + "type": "code-snippet", + }, + { + "content": "file contents", + "directory": "/src", + "location": "file2.js", + "type": "code-snippet", + }, + { + "content": "file contents", + "directory": "/other", + "location": "file3.js", + "type": "code-snippet", + }, + ] + `); + + expect(stat.mock.calls).toStrictEqual([ + ['/src/file1.js'], + ['/lib/file1.js'], + ['/src/file2.js'], + ['/other/file3.js'], + ]); + }); + + it('excludes non-explicitly named files outside source directories', async () => { + const nonExplicitLocations = ['file1.js:1-1', '/src/file2.js', '/other/file4.js'].map( + Location.parse + ); + const collectNonExplicit = async () => + collectLocationContext(sourceDirectories, nonExplicitLocations, explicitFiles); + + jest.mocked(isBinaryFile).mockReturnValue(false); + + expect(await collectNonExplicit()).toMatchInlineSnapshot(` + [ + { + "content": "file contents", + "directory": "/src", + "location": "file1.js:1-1", + "type": "code-snippet", + }, + { + "content": "file contents", + "directory": "/lib", + "location": "file1.js:1-1", + "type": "code-snippet", + }, + { + "content": "file contents", + "directory": "/src", + "location": "file2.js", + "type": "code-snippet", + }, + ] + `); + + expect(stat.mock.calls).toStrictEqual([ + ['/src/file1.js'], + ['/lib/file1.js'], + ['/src/file2.js'], + ]); + }); + + it('handles directory listings', async () => { + stat.mockResolvedValue({ isDirectory: () => true, isFile: () => false } as Stats); + jest.spyOn(fs, 'readdir').mockResolvedValue(['file1.js', 'file2.js'].map(mockDirent)); + + const result = await collectLocationContext(sourceDirectories, [Location.parse('/src:0')]); + expect(result).toEqual([ + { + type: 'directory-listing', + content: 'file1.js\nfile2.js', + location: ':0', + directory: '/src', + }, + ]); + }); + + it('skips binary files', async () => { + jest.mocked(isBinaryFile).mockReturnValue(true); const result = await collect(); - expect(result.length).toBe(4); - expect(result[0].content).toBe('file conte'); - expect(result[1].content).toBe('file conte'); - expect(result[2].content).toBe('file conte'); - expect(result[3].content).toBe('file conte'); - - expect(utils.exists).toHaveBeenCalledTimes(4); - expect(utils.exists).toHaveBeenCalledWith('/src/file1.js'); - expect(utils.exists).toHaveBeenCalledWith('/lib/file1.js'); - expect(utils.exists).toHaveBeenCalledWith('/src/file2.js'); - expect(utils.exists).toHaveBeenCalledWith('/other/file3.js'); + expect(result).toEqual([]); }); it('handles non-file locations', async () => { - jest.spyOn(utils, 'exists').mockResolvedValue(true); - jest.spyOn(utils, 'isFile').mockResolvedValue(false); - + stat.mockResolvedValue({ isDirectory: () => false, isFile: () => false } as Stats); const result = await collect(); expect(result).toEqual([]); }); it('handles non-existent files', async () => { - jest.spyOn(utils, 'exists').mockResolvedValue(false); - + stat.mockRejectedValue(new Error('Not found')); const result = await collect(); expect(result).toEqual([]); }); it('handles file reading errors', async () => { - jest.spyOn(utils, 'exists').mockResolvedValue(true); - jest.spyOn(utils, 'isFile').mockResolvedValue(true); jest.spyOn(fs, 'readFile').mockRejectedValue(new Error('Read error')); const result = await collect(); @@ -77,12 +154,32 @@ describe('collectLocationContext', () => { }); it('extracts snippets correctly', async () => { - jest.spyOn(utils, 'exists').mockResolvedValue(true); - jest.spyOn(utils, 'isFile').mockResolvedValue(true); - jest.spyOn(fs, 'readFile').mockResolvedValue('file contents'); + jest.spyOn(fs, 'readFile').mockResolvedValue('file conte\nnts'); const result = await collect(); expect(result[0].content).toBe('file conte'); }); + + it('handles large files by setting line range', async () => { + const largeContent = 'aaa\n'.repeat(6_000); + jest.spyOn(fs, 'readFile').mockResolvedValue(largeContent); + + // note the limit currently only applies to unbounded requests + const [, , result] = await collect(); + expect(result.content.length).toBeLessThanOrEqual(20_000); + }); }); }); + +function mockDirent(name: string): Dirent { + return { + name, + isFile: () => true, + isDirectory: () => false, + isBlockDevice: () => false, + isCharacterDevice: () => false, + isSymbolicLink: () => false, + isFIFO: () => false, + isSocket: () => false, + }; +} diff --git a/packages/cli/tests/unit/rpc/explain/collect-snippets.spec.ts b/packages/cli/tests/unit/rpc/explain/collect-snippets.spec.ts new file mode 100644 index 0000000000..439db78485 --- /dev/null +++ b/packages/cli/tests/unit/rpc/explain/collect-snippets.spec.ts @@ -0,0 +1,36 @@ +import { SnippetIndex } from '@appland/search'; +import { SnippetType } from '@appland/search/built/snippet-index'; + +import collectSnippets from '../../../../src/rpc/explain/collect-snippets'; + +describe('collectSnippets', () => { + it('should build location with start and end line numbers', () => { + const snippetIndex: SnippetIndex = { + searchSnippets: jest.fn().mockReturnValue([ + { + snippetId: { + id: 'file1.js:1', + type: SnippetType.FileChunk, + }, + content: 'line1\nline2\nline3', + directory: '/path/to/dir', + }, + ]), + } as never; + + const sessionId = 'session-id'; + const query = 'query'; + const charLimit = 100; + + const result = collectSnippets(snippetIndex, sessionId, query, charLimit); + + expect(result).toEqual([ + { + directory: '/path/to/dir', + type: 'code-snippet', + content: 'line1\nline2\nline3', + location: 'file1.js:1-3', + }, + ]); + }); +}); diff --git a/packages/cli/tests/unit/rpc/explain/location.spec.ts b/packages/cli/tests/unit/rpc/explain/location.spec.ts index 0e4314ab3a..4097271149 100644 --- a/packages/cli/tests/unit/rpc/explain/location.spec.ts +++ b/packages/cli/tests/unit/rpc/explain/location.spec.ts @@ -77,4 +77,10 @@ describe('Location', () => { const location = Location.parse('path/to/file.rb:1a'); expect(location).toEqual(new Location('path/to/file.rb:1a')); }); + + it('handles zero starting line correctly', () => { + const location = Location.parse('file1.js:0-2'); + const snippet = location.snippet('line 1\nline 2\nline 3'); + expect(snippet).toBe('line 1\nline 2'); + }); }); diff --git a/packages/navie/src/agents/explain-agent.ts b/packages/navie/src/agents/explain-agent.ts index f25e185d0d..67a46e0297 100644 --- a/packages/navie/src/agents/explain-agent.ts +++ b/packages/navie/src/agents/explain-agent.ts @@ -282,17 +282,8 @@ export default class ExplainAgent implements Agent { this.history.addEvent(new PromptInteractionEvent('agent', 'system', EXPLAIN_AGENT_PROMPT)); // Check for presence of "generate-diagram" classifier and its confidence level. - const classifier = options.contextLabels?.find( - (label) => - label.name === ContextV2.ContextLabelName.GenerateDiagram && - [ContextV2.ContextLabelWeight.Medium, ContextV2.ContextLabelWeight.High].includes( - label.weight - ) - ); - - if (classifier) { + if (hasLabel(options.contextLabels, ContextV2.ContextLabelName.GenerateDiagram)) this.history.addEvent(new PromptInteractionEvent('agent', 'system', DIAGRAM_FORMAT_PROMPT)); - } this.history.addEvent( new PromptInteractionEvent( @@ -303,10 +294,22 @@ export default class ExplainAgent implements Agent { ); await this.contextService.locationContextFromOptions(options); - await this.contextService.searchContext(options, tokensAvailable); + if ( + hasLabel(options.contextLabels, ContextV2.ContextLabelName.Overview)?.weight !== + ContextV2.ContextLabelWeight.High + ) + await this.contextService.searchContext(options, tokensAvailable); } applyQuestionPrompt(question: string): void { this.history.addEvent(new PromptInteractionEvent(PromptType.Question, 'user', question)); } } + +function hasLabel( + labels: ContextV2.ContextLabel[] | undefined, + name: ContextV2.ContextLabelName +): ContextV2.ContextLabel | undefined { + if (!labels) return; + return labels.find((label) => label.name === name); +} diff --git a/packages/navie/src/agents/gatherer.ts b/packages/navie/src/agents/gatherer.ts new file mode 100644 index 0000000000..77e501a052 --- /dev/null +++ b/packages/navie/src/agents/gatherer.ts @@ -0,0 +1,243 @@ +/* eslint-disable @typescript-eslint/no-unsafe-enum-comparison */ +import assert from 'node:assert'; +import { warn } from 'node:console'; +import { debug as makeDebug } from 'node:util'; + +import { + ContextItemEvent, + type InteractionEvent, + PromptInteractionEvent, +} from '../interaction-history'; +import InteractionState from '../interaction-state'; +import type Message from '../message'; +import { PromptType } from '../prompt'; +import type CompletionService from '../services/completion-service'; +import type ContextService from '../services/context-service'; + +export default class Gatherer { + constructor( + events: readonly InteractionEvent[], + public completion: CompletionService, + public context: ContextService + ) { + this.conversation = Gatherer.buildConversation(events); + } + + conversation: Message[]; + async step(): Promise { + assert(!this.done); + let result = ''; + for await (const token of this.completion.complete(this.conversation)) result += token; + debug(`Received completion:\n${result}`); + this.conversation.push({ role: 'assistant', content: result }); + const commands = extractCommands(result); + // finish if we have no more commands or if !!finish is the only command; + // some models want to finish immediately with a batch of commands + // but then change their mind + const onlyFinish = commands.length === 1 && commands[0] === '!!finish'; + if (commands.length > 0 && !onlyFinish) + this.conversation.push({ role: 'user', content: await this.executeCommands(commands) }); + return this.done; + } + + get done(): boolean { + // step() will always add a user message unless it's done + return this.conversation.at(-1)?.role === 'assistant'; + } + + async executeCommands(commands: string[]) { + const locations: string[] = [], + terms: string[] = []; + let badCommand = false; + for (const cmd of commands) { + if (cmd.startsWith('!!find') || cmd.startsWith('!!cat')) { + const location = locationOfCommand(cmd); + if (!location) badCommand = true; + else locations.push(location); + } else if (cmd.startsWith('!!search')) { + const searchTerms = searchTermsOfCommand(cmd); + if (!searchTerms) badCommand = true; + else terms.push(...searchTerms); + } else if (cmd === '!!finish') continue; + else badCommand = true; + } + + let response = ''; + if (locations.length > 0 || terms.length > 0) { + for (const event of await this.context.searchContextWithLocations(terms, locations)) { + const location = event.location?.startsWith(event.directory ?? '') + ? event.location + : [event.directory, event.location].filter(Boolean).join('/'); + if (event.promptType === PromptType.CodeSnippet && event.location) { + response += toCatOutput(location, event.content); + } else if (event.promptType === PromptType.DirectoryListing && event.location) { + const [path, depth] = splitDirDepth(location); + response += toFindOutput(path, depth, event.content); + } + } + response ||= 'No content found.'; + } + if (badCommand) response += '\n\n' + Gatherer.COMMANDS; + return response; + } + + static buildConversation(events: readonly InteractionEvent[]): Message[] { + let system = Gatherer.SYSTEM_PROMPT + '\n\n', + context = '', + commands = '', + response = ''; + const result: Message[] = []; + + // I'm on the fence about this. Sometimes including the code snippets as cat commands multi-shot style + // causes the LLM to assume it must have known what it was doing and is obviously all done. + // On the other hand if pinned files are passed as context snippets it causes the gatherer + // to re-request them. There might be a middle ground here (eg. only !!catting pinned files, + // representing search results with !!search command). -divide + const snippetsAsCat = true; + + // Seems to work fine without all the prompts. Leaving this here in case we want to switch. + // Maybe a more granular approach would work even better (ie. including only some prompts). + const includePrompts = false; + + for (const event of events) { + if (event instanceof PromptInteractionEvent && event.name !== PromptType.CodeSnippet) { + if (event.name === 'agent' && includePrompts) + context += `\n${event.content}\n\n\n`; + else if (event.role === 'system' && includePrompts) system += '\n\n' + event.content; + else if (event.role === 'user') { + if (event.name === PromptType.Question) + context += `\n${event.content}\n\n\n`; + else context += toContext(event) + '\n'; + } + } else if (event instanceof ContextItemEvent) { + // sometime the location is relative, sometimes absolute + const location = event.location?.startsWith(event.directory ?? '') + ? event.location + : [event.directory, event.location].filter(Boolean).join('/'); + if (event.promptType === PromptType.CodeSnippet && event.location && snippetsAsCat) { + // this case is currently disabled, see the comment above + commands += `!!cat ${location}\n`; + response += toCatOutput(location, event.content); + } else if (event.promptType === PromptType.DirectoryListing && event.location) { + const [path, depth] = splitDirDepth(location); + commands += `!!find ${path} -depth ${depth}\n`; + response += toFindOutput(path, depth, event.content); + } else if ( + event.promptType === PromptType.AppMapConfig || + event.promptType === PromptType.AppMapStats || + event.promptType === PromptType.CodeEditor + ) + // ignore things irrelevant for context gathering + continue; + else context += toContext(event) + '\n'; + } + } + if (system) result.push({ role: 'system', content: system }); + if (context) + result.push({ + role: 'user', + content: [Gatherer.USER_PROMPT, `\n${context}\n`].join('\n\n'), + }); + if (commands) + result.push({ role: 'assistant', content: commands }, { role: 'user', content: response }); + + return result; + } + + static readonly COMMANDS = `\ + Supported commands: + !!find [-depth ] + !!cat [:[-]] + !!search + !!finish +`; + + static readonly SYSTEM_PROMPT = `\ +You are a helper for an AI agent. Your task is to gather all the information about a software project (such as relevant file contents) +that the agent will need to perform its task accurately and without any guesswork. + +You will consider the context provided, which may include information about the task and about the software project +and think about what else might be missing that's needed to perform the task. Remember, the agent will complete the task +based only on the information provided, so make extra sure it's complete. For example, if the task will require modifying a file, +make sure to check if the file exists and verify its contents. + +To gather the information, respond with the following commands: +!!find [-depth ] +!!cat [:[-]] + +You can also do a full-text search using +!!search + +When you're done, simply respond with !!finish on a single line. +All the information gathered will be passed to the agent so you don't need to repeat or summarize it. + +Respond with commands ONLY.`; + + static readonly USER_PROMPT = `\ +Please help me gather information about a software project for an AI agent accomplish a task, based on the following context. +DO NOT answer the question; the information you gather will be automatically used to answer the question later. + +When no more information is required, respond with !!finish ONLY. + +If the question is generic and unrelated to the user's project, just finish.`; +} + +function toContext(event: InteractionEvent): string { + const state = new InteractionState(); + event.updateState(state); + // TBD: maybe not pass the full items? But some context items definitely + // need to be passed in full (such as the user query) and even code snippets + // and diagrams can be informative (eg. to hunt down imports) + if (state.messages.length === 1) return state.messages[0].content; + + warn(`Context item has multiple messages: ${event.type}`); + return ''; +} + +function toCatOutput(location: string, content: string): string { + const startingLine = Number(location.split(':').pop()?.split('-').shift()) || 1; + return ( + `Here's the output of \`cat -n ${location}\`:\n` + numberLines(content, startingLine) + '\n\n' + ); +} + +function toFindOutput(path: string, depth: number, content: string): string { + const prefix = `Here's the list of files and directories in ${path} up to the depth of ${depth}\`:\n`; + return prefix + content + '\n\n'; +} + +function splitDirDepth(location: string): [string, number] { + const parts = location.split(':'); + const depth = Number(parts.at(-1)); + if (isNaN(depth)) return [location, 0]; + else return [parts.slice(0, -1).join(':'), depth]; +} + +function numberLines(content: string, startingLine = 1): string { + return content + .split('\n') + .map((line, index) => `${rightJustify(String(index + startingLine), 6)}\t${line}`) + .join('\n'); +} + +function rightJustify(text: string, width: number): string { + return ' '.repeat(Math.max(width - text.length, 0)) + text; +} + +function extractCommands(text: string): string[] { + return text.split('\n').filter((line) => line.startsWith('!!')); +} + +function locationOfCommand(command: string): string | undefined { + let match; + if ((match = /^!!cat (.*)/.exec(command))) return match[1]; + if ((match = /^!!find (.*?)( -depth (\d+))?$/.exec(command))) + return `${match[1]}:${match[3] || 0}`; +} + +function searchTermsOfCommand(command: string): string[] | undefined { + let match; + if ((match = /^!!search (.*)/.exec(command))) return match[1].split(' '); +} + +const debug = makeDebug('appmap:navie:gatherer'); diff --git a/packages/navie/src/agents/generate-agent.ts b/packages/navie/src/agents/generate-agent.ts index c07edf08e2..09e967b65c 100644 --- a/packages/navie/src/agents/generate-agent.ts +++ b/packages/navie/src/agents/generate-agent.ts @@ -188,6 +188,8 @@ export default class GenerateAgent implements Agent { ) ); + await this.contextService.locationContextFromOptions(options); + if (options.userOptions.booleanValue('listfiles', true)) { const contentFetcher = new FileContentFetcher( this.fileChangeExtractorService, diff --git a/packages/navie/src/commands/explain-command.ts b/packages/navie/src/commands/explain-command.ts index d2609af0ea..ab8bb557c2 100644 --- a/packages/navie/src/commands/explain-command.ts +++ b/packages/navie/src/commands/explain-command.ts @@ -1,6 +1,7 @@ import { warn } from 'console'; -import { AgentOptions } from '../agent'; +import { AgentMode, AgentOptions } from '../agent'; +import Gatherer from '../agents/gatherer'; import AgentSelectionService from '../services/agent-selection-service'; import ClassificationService from '../services/classification-service'; import CodeSelectionService from '../services/code-selection-service'; @@ -15,6 +16,7 @@ import { ProjectInfo } from '../project-info'; import Command, { CommandRequest } from '../command'; import { ChatHistory } from '../navie'; import getMostRecentMessages from '../lib/get-most-recent-messages'; +import { type UserOptions } from '../lib/parse-options'; import { ContextV2 } from '../context'; import assert from 'assert'; import { UserContext } from '../user-context'; @@ -76,7 +78,7 @@ export default class ExplainCommand implements Command { contextLabels, request.userOptions ); - const { question, agent: mode } = agentSelectionResult; + const { agentMode, question, agent: mode } = agentSelectionResult; if (agentSelectionResult.selectionMessage) { yield agentSelectionResult.selectionMessage; @@ -152,6 +154,10 @@ export default class ExplainCommand implements Command { if (codeSelection) this.codeSelectionService.applyCodeSelection(codeSelection); mode.applyQuestionPrompt(question); + if (gathererEnabled(request.userOptions, agentMode, contextLabels)) { + yield* this.gatherAdditionalInformation(); + } + const { messages } = this.interactionHistory.buildState(); this.interactionHistory.addEvent( @@ -165,4 +171,33 @@ export default class ExplainCommand implements Command { if (mode.filter) yield* mode.filter(response); else yield* response; } + + private async *gatherAdditionalInformation(maxSteps = 10) { + let steps = 0; + try { + const gatherer = new Gatherer( + this.interactionHistory.events, + this.completionService, + this.agentSelectionService.contextService + ); + for (steps = 0; steps < maxSteps && !(await gatherer.step()); steps++) + yield steps > 0 ? '.' : 'Gathering additional information, please wait...'; + } catch (err) { + console.warn('Error while gathering: ', err); + } finally { + if (steps > 0) yield ' done!\n\n'; + } + } +} + +function gathererEnabled( + userOptions: UserOptions, + agentMode: AgentMode, + contextLabels: ContextV2.ContextLabel[] +): boolean { + const enabledByDefault = + [AgentMode.Generate, AgentMode.Test].includes(agentMode) || + !!contextLabels.find((l) => l.name === ContextV2.ContextLabelName.Overview); + + return userOptions.isEnabled('gatherer', enabledByDefault); } diff --git a/packages/navie/src/context.ts b/packages/navie/src/context.ts index f98c31505e..99276af1f4 100644 --- a/packages/navie/src/context.ts +++ b/packages/navie/src/context.ts @@ -46,6 +46,8 @@ export namespace ContextV2 { // A selection from a source file, including the location (specified as a path // and range of lines), as well as the content extracted from the location CodeSelection = 'code-selection', + // A directory listing + DirectoryListing = 'directory-listing', } // A specific context item that is returned in the response. @@ -63,7 +65,8 @@ export namespace ContextV2 { type: | ContextItemType.CodeSnippet | ContextItemType.SequenceDiagram - | ContextItemType.DataRequest; + | ContextItemType.DataRequest + | ContextItemType.DirectoryListing; // The directory in which the context item is located. directory: string; // Identifies the location in the project directory from which the context was obtained. @@ -78,7 +81,8 @@ export namespace ContextV2 { return ( contextItem.type === ContextItemType.CodeSnippet || contextItem.type === ContextItemType.SequenceDiagram || - contextItem.type === ContextItemType.DataRequest + contextItem.type === ContextItemType.DataRequest || + contextItem.type === ContextItemType.DirectoryListing ); } diff --git a/packages/navie/src/prompt.ts b/packages/navie/src/prompt.ts index 8d59b15610..ca227b2e9d 100644 --- a/packages/navie/src/prompt.ts +++ b/packages/navie/src/prompt.ts @@ -7,6 +7,7 @@ export enum PromptType { AppMapStats = 'appmapStats', SequenceDiagram = 'sequenceDiagrams', CodeSnippet = 'codeSnippets', + DirectoryListing = 'directoryListings', DataRequest = 'dataRequest', HelpDoc = 'helpDoc', CodeEditor = 'codeEditor', @@ -22,6 +23,7 @@ const PROMPT_NAMES: Record = { [PromptType.AppMapStats]: { singular: 'AppMap statistics', plural: 'AppMap statistics' }, [PromptType.SequenceDiagram]: { singular: 'sequence diagram', plural: 'sequence diagrams' }, [PromptType.CodeSnippet]: { singular: 'code snippet', plural: 'code snippets' }, + [PromptType.DirectoryListing]: { singular: 'directory listing', plural: 'directory listings' }, [PromptType.DataRequest]: { singular: 'data request', plural: 'data requests' }, [PromptType.HelpDoc]: { singular: 'help document', plural: 'help documents' }, [PromptType.CodeEditor]: { singular: 'code editor', plural: 'code editors' }, @@ -70,7 +72,7 @@ focused on describing the problem fully as a software Issue, aka Ticket. [PromptType.AppMapConfig]: { content: `**AppMap configuration** -You're provided with all AppMap configuration files within the user's workspace. The project information +You're provided with all AppMap configuration files within the user's workspace. The project information is encoded as an array of AppMap configurations (\`appmap.yml\`) provided in JSON format. The contents of each element contain the configuration of the AppMap agent, including: @@ -153,6 +155,15 @@ app.listen(3000, () => console.log('Server started on port 3000')); tagName: 'code-snippet', multiple: true, }, + [PromptType.DirectoryListing]: { + content: `**Directory listings** + +You're provided with directory listings that are relevant to the task. +Each directory listing contains a list of files and directories within that directory. +`, + tagName: 'directory-listing', + multiple: true, + }, [PromptType.DataRequest]: { content: `**Data requests** diff --git a/packages/navie/src/services/agent-selection-service.ts b/packages/navie/src/services/agent-selection-service.ts index d9d33be943..5508105c02 100644 --- a/packages/navie/src/services/agent-selection-service.ts +++ b/packages/navie/src/services/agent-selection-service.ts @@ -53,7 +53,15 @@ export default class AgentSelectionService { private applyContextService: ApplyContextService, private techStackService: TechStackService, private mermaidFixerService: MermaidFixerService - ) {} + ) { + this.contextService = new ContextService( + this.history, + this.vectorTermsService, + this.lookupContextService, + this.applyContextService + ); + } + contextService: ContextService; selectAgent( question: string, @@ -62,13 +70,6 @@ export default class AgentSelectionService { ): AgentModeResult { let modifiedQuestion = question; - const contextService = new ContextService( - this.history, - this.vectorTermsService, - this.lookupContextService, - this.applyContextService - ); - const helpAgent = () => new HelpAgent( this.history, @@ -78,20 +79,20 @@ export default class AgentSelectionService { ); const testAgent = () => - new TestAgent(this.history, contextService, this.fileChangeExtractorService); + new TestAgent(this.history, this.contextService, this.fileChangeExtractorService); - const planAgent = () => new PlanAgent(this.history, contextService); + const planAgent = () => new PlanAgent(this.history, this.contextService); const generateAgent = () => - new GenerateAgent(this.history, contextService, this.fileChangeExtractorService); + new GenerateAgent(this.history, this.contextService, this.fileChangeExtractorService); const diagramAgent = () => - new DiagramAgent(this.history, contextService, this.mermaidFixerService); + new DiagramAgent(this.history, this.contextService, this.mermaidFixerService); const explainAgent = () => - new ExplainAgent(this.history, contextService, this.mermaidFixerService); + new ExplainAgent(this.history, this.contextService, this.mermaidFixerService); - const searchAgent = () => new SearchAgent(this.history, contextService); + const searchAgent = () => new SearchAgent(this.history, this.contextService); const buildAgent: { [key in AgentMode]: () => Agent } = { [AgentMode.Explain]: explainAgent, diff --git a/packages/navie/src/services/apply-context-service.ts b/packages/navie/src/services/apply-context-service.ts index a8f53b6da7..751c166806 100644 --- a/packages/navie/src/services/apply-context-service.ts +++ b/packages/navie/src/services/apply-context-service.ts @@ -44,35 +44,8 @@ export default class ApplyContextService { const charsRemaining = characterLimit - charsApplied; for (const item of appliedContextItems) { - let promptType: PromptType | undefined; - switch (item.type) { - case ContextV2.ContextItemType.SequenceDiagram: - promptType = PromptType.SequenceDiagram; - break; - case ContextV2.ContextItemType.CodeSnippet: - promptType = PromptType.CodeSnippet; - break; - case ContextV2.ContextItemType.DataRequest: - promptType = PromptType.DataRequest; - break; - case ContextV2.ContextItemType.HelpDoc: - promptType = PromptType.HelpDoc; - break; - default: - } - if (promptType) { - const isFile = ContextV2.isFileContextItem(item); - this.interactionHistory.addEvent( - new ContextItemEvent( - promptType, - item.content, - isFile ? item.location : undefined, - isFile ? item.directory : undefined - ) - ); - } else { - warn(`Unknown context item type: ${item.type} for content: ${item.content}`); - } + const event = eventOfContextItem(item); + if (event) this.interactionHistory.addEvent(event); } this.interactionHistory.log(`Remaining characters after context: ${charsRemaining}`); @@ -128,3 +101,35 @@ export default class ApplyContextService { ); } } + +export function eventOfContextItem(item: ContextV2.ContextItem): undefined | ContextItemEvent { + let promptType: PromptType | undefined; + switch (item.type) { + case ContextV2.ContextItemType.SequenceDiagram: + promptType = PromptType.SequenceDiagram; + break; + case ContextV2.ContextItemType.CodeSnippet: + promptType = PromptType.CodeSnippet; + break; + case ContextV2.ContextItemType.DataRequest: + promptType = PromptType.DataRequest; + break; + case ContextV2.ContextItemType.HelpDoc: + promptType = PromptType.HelpDoc; + break; + case ContextV2.ContextItemType.DirectoryListing: + promptType = PromptType.DirectoryListing; + default: + } + if (promptType) { + const isFile = ContextV2.isFileContextItem(item); + return new ContextItemEvent( + promptType, + item.content, + isFile ? item.location : undefined, + isFile ? item.directory : undefined + ); + } else { + warn(`Unknown context item type: ${item.type} for content: ${item.content}`); + } +} diff --git a/packages/navie/src/services/context-service.ts b/packages/navie/src/services/context-service.ts index 9a2d15c6c7..bef20521b1 100644 --- a/packages/navie/src/services/context-service.ts +++ b/packages/navie/src/services/context-service.ts @@ -1,12 +1,11 @@ import { warn } from 'console'; import { AgentOptions } from '../agent'; import transformSearchTerms from '../lib/transform-search-terms'; -import ApplyContextService from './apply-context-service'; import LookupContextService from './lookup-context-service'; import VectorTermsService from './vector-terms-service'; import { ContextV2 } from '../context'; import InteractionHistory, { ContextItemEvent } from '../interaction-history'; -import { PromptType } from '../prompt'; +import ApplyContextService, { eventOfContextItem } from './apply-context-service'; export default class ContextService { constructor( @@ -48,10 +47,10 @@ export default class ContextService { } } - async locationContext(fileNames: string[]): Promise { + async locationContext(fileNames: string[]): Promise { if (!fileNames || fileNames.length === 0) { this.history.log('[context-service] No file names provided for location context'); - return; + return []; } this.history.log(`[context-service] Retrieving full context of files: ${fileNames.join(', ')}`); @@ -65,20 +64,46 @@ export default class ContextService { // Full text of requested files is always added to the prompt. Context limits are not applied // in this case due to their important role in generating code. let charsAdded = 0; + const events: ContextItemEvent[] = []; for (const item of context) { - const contextItem = new ContextItemEvent(PromptType.CodeSnippet, item.content); - if (ContextV2.isFileContextItem(item)) { - contextItem.location = item.location; - } + const contextItem = eventOfContextItem(item); + if (!contextItem) continue; charsAdded += contextItem.content.length; + events.push(contextItem); this.history.addEvent(contextItem); } this.history.log(`[context-service] Added ${charsAdded} characters of file context`); + return events; + } + + async searchContextWithLocations( + searchTerms: string[], + fileNames: string[] + ): Promise { + this.history.log('[context-service] Searching for context with locations'); + + const filters = { locations: fileNames }; + const context = await this.lookupContextService.lookupContext(searchTerms, 1024, filters); + + let charsAdded = 0; + const events: ContextItemEvent[] = []; + for (const item of ContextService.guardContextType(context)) { + const contextItem = eventOfContextItem(item); + if (!contextItem) continue; + charsAdded += contextItem.content.length; + events.push(contextItem); + this.history.addEvent(contextItem); + } + this.history.log(`[context-service] Added ${charsAdded} characters of context`); + return events; } - locationContextFromOptions(options: AgentOptions): Promise { - const { locations } = options.buildContextFilters(); - return locations && locations.length > 0 ? this.locationContext(locations) : Promise.resolve(); + async locationContextFromOptions(options: AgentOptions): Promise { + const locations = options.buildContextFilters().locations ?? []; + // Also list project directories + locations.unshift(':0'); + console.log(locations); + await this.locationContext(locations); } static guardContextType( diff --git a/packages/navie/src/services/google-vertexai-completion-service.ts b/packages/navie/src/services/google-vertexai-completion-service.ts index bc7fd41a24..8d01865a1e 100644 --- a/packages/navie/src/services/google-vertexai-completion-service.ts +++ b/packages/navie/src/services/google-vertexai-completion-service.ts @@ -136,7 +136,7 @@ export default class GoogleVertexAICompletionService implements CompletionServic temperature += 0.1; if (attempt < maxAttempts - 1 && tokens.length === 0) { const nextAttempt = CompletionRetryDelay * 2 ** attempt; - warn(`Received ${JSON.stringify(cause)}, retrying in ${nextAttempt}ms`); + warn(`Received ${JSON.stringify(cause).slice(0, 400)}, retrying in ${nextAttempt}ms`); await new Promise((resolve) => { setTimeout(resolve, nextAttempt); }); diff --git a/packages/navie/test/agents/__snapshots__/gatherer.spec.ts.snap b/packages/navie/test/agents/__snapshots__/gatherer.spec.ts.snap new file mode 100644 index 0000000000..139b278560 --- /dev/null +++ b/packages/navie/test/agents/__snapshots__/gatherer.spec.ts.snap @@ -0,0 +1,69 @@ +// Jest Snapshot v1, https://goo.gl/fbAQLP + +exports[`Gatherer buildConversation transforms interaction history events into a conversation 1`] = ` +"system: +user: + + + +@startuml +!includeurl + + +query:SELECT * FROM "users" WHERE ("users"."login" = 'admin') LIMIT 1 + + +query:SELECT * FROM "api_keys" WHERE ("login" = 'admin') + + +@startuml +!includeurl https://raw.githubusercontent.com/getappmap/plantuml-theme/main/appmap-theme.puml + + +query:WITH "recording_methods" AS (SELECT scenarios.metadata->'recorder'->>'name' + + +query:INSERT INTO "api_keys" ("login") VALUES ('user@example.com') RETURNING * + + +@startuml +!includeurl https://raw.githubusercontent.com/getappmap/plantuml-theme/main/appmap-theme.puml +participant + + +Users are listed on the admin page. + +Some users are in the system using invalid or disallowed email addresses. + +Add a link or button to "Deactivate" a user on the admin page. + +The button should: + +* Revoke the user's API keys. +* Mark the user record as deactivated. +* Indicate on the user record who deactivated the user, and when. + + + +A resolution to the given issue, also recording who and when deactivated the account + + + +assistant: !!cat /test/appmap-server/app/models/api_key.rb:53 +!!cat /test/appmap-server/app/models/api_key.rb:66 +!!cat /test/appmap-server/app/models/api_key.rb:15 +user: Here's the output of \`cat -n /test/appmap-server/app/models/api_key.rb:53\`: + 53 # @label security.api_key.touch + 54 def touch(api_key) + 55 if api_key.last_used.nil? || api_key.last_used > 15.minutes.ago + +Here's the output of \`cat -n /test/appmap-server/app/models/api_key.rb:66\`: + 66 # @label security.api_key.revoke + 67 def revoke_from_user(login, key_id) + 68 DAO::ApiKey + +Here's the output of \`cat -n /test/appmap-server/app/models/api_key.rb:15\`: + 15 def decode(api_key) + 16 Base64.urlsafe_decode64(api_key).split(SEPARATOR) + 17 end" +`; diff --git a/packages/navie/test/agents/diagram-agent.spec.ts b/packages/navie/test/agents/diagram-agent.spec.ts index b2a4f879fc..c648089c2c 100644 --- a/packages/navie/test/agents/diagram-agent.spec.ts +++ b/packages/navie/test/agents/diagram-agent.spec.ts @@ -89,6 +89,8 @@ describe('@diagram agent', () => { await diagramAgent.perform(options, () => tokensAvailable); // eslint-disable-next-line @typescript-eslint/unbound-method - expect(contextService.locationContext).toHaveBeenCalledWith(['file1', 'file2']); + expect(contextService.locationContext).toHaveBeenCalledWith( + expect.arrayContaining(['file1', 'file2']) + ); }); }); diff --git a/packages/navie/test/agents/examples/interaction-history.json b/packages/navie/test/agents/examples/interaction-history.json new file mode 100644 index 0000000000..2955353a8b --- /dev/null +++ b/packages/navie/test/agents/examples/interaction-history.json @@ -0,0 +1,183 @@ +{ + "_events": {}, + "_eventsCount": 1, + "events": [ + { + "type": "classification", + "classification": [ + { + "name": "feature", + "weight": "medium" + } + ] + }, + { + "type": "agentSelection", + "agent": "plan" + }, + { + "type": "prompt", + "name": "appmapConfig", + "role": "system", + "content": "**AppMap configuration**\n\nYou're provided with all" + }, + { + "type": "contextItem", + "promptType": "appmapConfig", + "content": "- name: getappmap/appmap-server\n language: ruby\n" + }, + { + "type": "prompt", + "name": "appmapStats", + "role": "system", + "content": "**AppMap statistics**\n\nYou're provided with information about the AppMaps that have been recorded and are available in the user's workspace." + }, + { + "type": "contextItem", + "promptType": "appmapStats", + "content": "- numAppMaps: 530\n" + }, + { + "type": "prompt", + "name": "codeEditor", + "role": "system", + "content": "**Code editor**\n\nYou're provided" + }, + { + "type": "contextItem", + "promptType": "codeEditor", + "content": "The code editor is not specified." + }, + { + "type": "prompt", + "name": "agent", + "role": "system", + "content": "**Task: Specification of Software Issues**\n\n## About you**\n\nYour name is Navie.\n" + }, + { + "type": "prompt", + "name": "problemStatement", + "role": "system", + "content": "**The problem statement**\nThis is a description of the problem that the user wants you to help them with. Your response should be" + }, + { + "type": "vectorTerms", + "terms": ["deactivate"] + }, + { + "type": "contextLookup", + "context": [] + }, + { + "type": "prompt", + "name": "sequenceDiagrams", + "role": "system", + "content": "**Sequence diagrams**\n\nYou're provided with sequence diagrams that are relevant to the task." + }, + { + "type": "prompt", + "name": "codeSnippets", + "role": "system", + "content": "**Code snippets**\n\nYou're provided with code snippets that are relevant to the task.\n\nSequence diagrams," + }, + { + "type": "prompt", + "name": "dataRequest", + "role": "system", + "content": "**Data requests**\n\nYou're provided with data requests that are relevant to the task." + }, + { + "type": "contextItem", + "promptType": "sequenceDiagrams", + "content": "@startuml\n!includeurl ", + "location": "/test/appmap-server/tmp/appmap/rspec/RecordingMethod_on_a_mapset_shows_the_recording_method_breakdown.appmap.json", + "directory": "/test/appmap-server" + }, + { + "type": "contextItem", + "promptType": "codeSnippets", + "content": "# @label security.api_key.touch\n def touch(api_key)\n if api_key.last_used.nil? || api_key.last_used > 15.minutes.ago", + "location": "app/models/api_key.rb:53", + "directory": "/test/appmap-server" + }, + { + "type": "contextItem", + "promptType": "codeSnippets", + "content": "# @label security.api_key.revoke\n def revoke_from_user(login, key_id)\n DAO::ApiKey", + "location": "app/models/api_key.rb:66", + "directory": "/test/appmap-server" + }, + { + "type": "contextItem", + "promptType": "codeSnippets", + "content": "def decode(api_key)\n Base64.urlsafe_decode64(api_key).split(SEPARATOR)\n end", + "location": "app/models/api_key.rb:15", + "directory": "/test/appmap-server" + }, + { + "type": "contextItem", + "promptType": "dataRequest", + "content": "query:SELECT * FROM \"users\" WHERE (\"users\".\"login\" = 'admin') LIMIT 1", + "location": "/test/appmap-server/tmp/appmap/rspec/API_APIKeysController_revoke_an_existing_api_key.appmap.json:66", + "directory": "/test/appmap-server" + }, + { + "type": "contextItem", + "promptType": "dataRequest", + "content": "query:SELECT * FROM \"api_keys\" WHERE (\"login\" = 'admin')", + "location": "/test/appmap-server/tmp/appmap/rspec/API_APIKeysController_revoke_an_existing_api_key.appmap.json:44", + "directory": "/test/appmap-server" + }, + { + "type": "contextItem", + "promptType": "sequenceDiagrams", + "content": "@startuml\n!includeurl https://raw.githubusercontent.com/getappmap/plantuml-theme/main/appmap-theme.puml", + "location": "/test/appmap-server/tmp/appmap/rspec/Activation_Validate_verify_when_valid_parameters_are_provided_marks_the_user_as_having_accepted_the_most_recent_EULA.appmap.json", + "directory": "/test/appmap-server" + }, + { + "type": "contextItem", + "promptType": "dataRequest", + "content": "query:WITH \"recording_methods\" AS (SELECT scenarios.metadata->'recorder'->>'name' ", + "location": "/test/appmap-server/tmp/appmap/rspec/RecordingMethod_on_a_mapset_shows_the_recording_method_breakdown.appmap.json:6", + "directory": "/test/appmap-server" + }, + { + "type": "contextItem", + "promptType": "dataRequest", + "content": "query:INSERT INTO \"api_keys\" (\"login\") VALUES ('user@example.com') RETURNING *", + "location": "/test/appmap-server/tmp/appmap/rspec/Activation_Validate_verify_when_valid_parameters_are_provided_marks_the_user_as_having_accepted_the_most_recent_EULA.appmap.json:95", + "directory": "/test/appmap-server" + }, + { + "type": "contextItem", + "promptType": "sequenceDiagrams", + "content": "@startuml\n!includeurl https://raw.githubusercontent.com/getappmap/plantuml-theme/main/appmap-theme.puml\nparticipant ", + "location": "/test/appmap-server/tmp/appmap/rspec/Key_data_stats_messages_one_unique_on_each_of_two_scenarios_in_a_shared_mapset_indicates_2_messages_on_the_app.appmap.json", + "directory": "/test/appmap-server" + }, + { + "type": "prompt", + "name": "codeSelection", + "role": "system", + "content": "**The user's code selection**\n\nThe user is asking about specific lines of code that they have selected in their code editor.\n\nThe code selection will be provided as context within a `` tag." + }, + { + "type": "prompt", + "name": "codeSelection", + "role": "user", + "content": "\nUsers are listed on the admin page.\n\nSome users are in the system using invalid or disallowed email addresses.\n\nAdd a link or button to \"Deactivate\" a user on the admin page.\n\nThe button should:\n\n* Revoke the user's API keys.\n* Mark the user record as deactivated.\n* Indicate on the user record who deactivated the user, and when.\n\n" + }, + { + "type": "prompt", + "name": "problemStatement", + "role": "user", + "content": "\nA resolution to the given issue, also recording who and when deactivated the account\n" + }, + { + "type": "completion", + "model": "gemini-1.5-pro-002", + "temperature": 0.2 + } + ] +} diff --git a/packages/navie/test/agents/gatherer.spec.ts b/packages/navie/test/agents/gatherer.spec.ts new file mode 100644 index 0000000000..184efea66e --- /dev/null +++ b/packages/navie/test/agents/gatherer.spec.ts @@ -0,0 +1,200 @@ +/* eslint jest/expect-expect: ["error", { "assertFunctionNames": ["expectResult"]}] */ +import { readFileSync } from 'node:fs'; +import path from 'node:path'; + +import Gatherer from '../../src/agents/gatherer'; +import { + ContextItemEvent, + type InteractionEvent, + PromptInteractionEvent, +} from '../../src/interaction-history'; + +describe('Gatherer', () => { + describe('buildConversation', () => { + it('transforms interaction history events into a conversation', () => { + const history = JSON.parse( + readFileSync(path.resolve(__filename, '../examples/interaction-history.json'), 'utf-8') + ) as { events: EventData[] }; + expectResult(history.events).toMatchSnapshot(); + }); + it('ignores irrelevant events', () => { + expectResult([ + { + type: 'classification', + classification: [ + { + name: 'feature', + weight: 'medium', + }, + ], + }, + ]).toMatchInlineSnapshot(`"system: "`); + }); + + // eslint-disable-next-line jest/no-disabled-tests + it.skip('adds prompts into the system prompt', () => { + expectResult([ + { + type: 'prompt', + name: 'appmapConfig', + role: 'system', + content: "**AppMap configuration**\n\nYou're provided with all AppMap configuration", + }, + + { + type: 'prompt', + name: 'appmapStats', + role: 'system', + content: "**AppMap statistics**\n\nYou're provided with information about the AppMaps", + }, + ]).toMatchInlineSnapshot(` + "system: + + + + **AppMap configuration** + + You're provided with all AppMap configuration + + **AppMap statistics** + + You're provided with information about the AppMaps" + `); + }); + + it('ignores code snippets prompt', () => { + expectResult([ + { + type: 'prompt', + name: 'codeSnippets', + role: 'system', + content: "**Code snippets**\n\nYou're provided with code snippets", + }, + ]).toMatchInlineSnapshot(`"system: "`); + }); + + it('formats code snippets and other context items in context', () => { + expectResult([ + { + type: 'contextItem', + promptType: 'codeSnippets', + content: + '# @label security.api_key.touch\n def touch(api_key)\n if api_key.last_used.nil?', + location: 'app/models/api_key.rb:53', + directory: '/test/appmap-server', + }, + + { + type: 'contextItem', + promptType: 'sequenceDiagrams', + content: + '@startuml\n!includeurl https://raw.githubusercontent.com/getappmap/plantuml-theme/main/appmap-theme.puml\nparticipant', + location: '/test/appmap-server/tmp/appmap', + directory: '/test/appmap-server', + }, + + { + type: 'contextItem', + promptType: 'codeSnippets', + content: + '# @label security.api_key.revoke\n def revoke_from_user(login, key_id)\n DAO::ApiKey', + location: 'app/models/api_key.rb:66', + directory: '/test/appmap-server', + }, + ]).toMatchInlineSnapshot(` + "system: + user: + + + + @startuml + !includeurl https://raw.githubusercontent.com/getappmap/plantuml-theme/main/appmap-theme.puml + participant + + + + assistant: !!cat /test/appmap-server/app/models/api_key.rb:53 + !!cat /test/appmap-server/app/models/api_key.rb:66 + user: Here's the output of \`cat -n /test/appmap-server/app/models/api_key.rb:53\`: + 53 # @label security.api_key.touch + 54 def touch(api_key) + 55 if api_key.last_used.nil? + + Here's the output of \`cat -n /test/appmap-server/app/models/api_key.rb:66\`: + 66 # @label security.api_key.revoke + 67 def revoke_from_user(login, key_id) + 68 DAO::ApiKey" + `); + }); + + // eslint-disable-next-line jest/no-disabled-tests + it.skip('puts the agent prompt in the context', () => { + expectResult([ + { + type: 'prompt', + name: 'agent', + role: 'system', + content: '**Task: Specification of Software Issues**\n\n## About you**', + }, + ]).toMatchInlineSnapshot(` + "system: + user: + + + + **Task: Specification of Software Issues** + + ## About you** + + + + " + `); + }); + + type EventData = Record; + + // eslint-disable-next-line jest/valid-expect + const expectResult = (xs: readonly EventData[]) => expect(perform(xs)); + + function perform(events: readonly EventData[]) { + return Gatherer.buildConversation(events.map(event)) + .map(({ role, content }) => + [ + role, + content + .replace(Gatherer.SYSTEM_PROMPT, '') + .replace(Gatherer.USER_PROMPT, '') + .trim(), + ].join(': ') + ) + .join('\n'); + } + + function event(ev: EventData): InteractionEvent { + switch (ev.type) { + case 'prompt': + return new PromptInteractionEvent( + String(ev.name) ?? 'test', + (ev.role ?? 'system') as never, + String(ev.content) + ); + case 'contextItem': + return new ContextItemEvent( + (String(ev.promptType) ?? 'test') as never, + String(ev.content), + ev.location as never, + ev.directory as never + ); + default: + return { + type: 'test', + message: '', + metadata: {}, + updateState() {}, + ...ev, + }; + } + } + }); +}); diff --git a/packages/navie/test/services/context-service.spec.ts b/packages/navie/test/services/context-service.spec.ts index 47c6e82121..80adf7aa95 100644 --- a/packages/navie/test/services/context-service.spec.ts +++ b/packages/navie/test/services/context-service.spec.ts @@ -1,12 +1,14 @@ +import assert from 'node:assert'; + import { AgentOptions } from '../../src/agent'; import ContextService from '../../src/services/context-service'; import VectorTermsService from '../../src/services/vector-terms-service'; import LookupContextService from '../../src/services/lookup-context-service'; import ApplyContextService from '../../src/services/apply-context-service'; import { UserOptions } from '../../src/lib/parse-options'; -import InteractionHistory from '../../src/interaction-history'; import { SEARCH_CONTEXT } from '../fixture'; import { ContextV2 } from '../../src/context'; +import InteractionHistory, { ContextItemEvent } from '../../src/interaction-history'; describe('ContextService', () => { let history: InteractionHistory; @@ -148,5 +150,25 @@ describe('ContextService', () => { }, ]); }); + + it('sets the directory field on ContextItemEvent', async () => { + const locationContextWithDirectory = [ + { + type: ContextV2.ContextItemType.CodeSnippet, + location: 'file1', + content: 'the file 1', + directory: 'dir1', + }, + ]; + lookupContextService.lookupContext = jest + .fn() + .mockResolvedValue(locationContextWithDirectory); + + await contextService.locationContext(['file1']); + + const event = history.events[0]; + assert(event instanceof ContextItemEvent); + expect(event.directory).toEqual('dir1'); + }); }); }); diff --git a/packages/search/src/splitter.ts b/packages/search/src/splitter.ts index 85d2665c47..a71955128d 100644 --- a/packages/search/src/splitter.ts +++ b/packages/search/src/splitter.ts @@ -55,9 +55,11 @@ export async function langchainSplitter(content: string, fileExtension: string): const loc = doc.metadata?.loc as { lines: { from: number; to: number } } | undefined; const lines = loc?.lines; const result: Chunk = { - content: doc.pageContent, + content: '', }; if (lines) { + const contentLines = content.split('\n'); + result.content = contentLines.slice(lines.from - 1, lines.to).join('\n'); result.startLine = lines.from; result.endLine = lines.to; } diff --git a/packages/search/test/splitter.spec.ts b/packages/search/test/splitter.spec.ts new file mode 100644 index 0000000000..afe4f39ded --- /dev/null +++ b/packages/search/test/splitter.spec.ts @@ -0,0 +1,11 @@ +import { langchainSplitter } from '../src/splitter'; + +describe('langchainSplitter', () => { + it('should extract complete lines while maintaining indentation for long content', async () => { + const content = Array(200).fill(' console.log("Indented");').join('\n'); + const fileExtension = 'js'; + + const chunks = await langchainSplitter(content, fileExtension); + for (const chunk of chunks) expect(chunk.content).toMatch(/^ {2}/); + }); +});