Skip to content

Commit

Permalink
Merge pull request #2145 from getappmap/feat/gatherer
Browse files Browse the repository at this point in the history
Implement Gatherer for enhanced context collection
  • Loading branch information
kgilpin authored Dec 18, 2024
2 parents 3339f32 + f525fc2 commit 626c1d8
Show file tree
Hide file tree
Showing 27 changed files with 1,193 additions and 137 deletions.
14 changes: 9 additions & 5 deletions packages/cli/src/rpc/explain/collect-context.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,8 @@ export function buildContextRequest(
request.includePatterns = filters.include.map((pattern) => new RegExp(pattern));
if (filters?.itemTypes) request.includeTypes = filters.itemTypes.map((type) => type);
if (filters?.locations) {
request.locations = filters.locations
.map((location) => Location.parse(location))
.filter(Boolean) as Location[];
// eslint-disable-next-line @typescript-eslint/unbound-method
request.locations = filters.locations.map(Location.parse);
warn(`Parsed locations: ${request.locations.map((loc) => loc.toString()).join(', ')}`);
}

Expand All @@ -109,13 +108,18 @@ export default async function collectContext(
sourceDirectories: string[],
charLimit: number,
vectorTerms: string[],
request: ContextRequest
request: ContextRequest,
explicitFiles: string[] = []
): Promise<{ searchResponse: SearchRpc.SearchResponse; context: ContextV2.ContextResponse }> {
let searchResponse: SearchRpc.SearchResponse = { results: [], numResults: 0 };
const context: ContextV2.ContextResponse = [];

if (request.locations && request.locations.length > 0) {
const locationResult = await collectLocationContext(sourceDirectories, request.locations);
const locationResult = await collectLocationContext(
sourceDirectories,
request.locations,
explicitFiles
);
context.push(...locationResult);
}

Expand Down
73 changes: 64 additions & 9 deletions packages/cli/src/rpc/explain/collect-location-context.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import { readFile } from 'fs/promises';
import { warn } from 'console';
import { isAbsolute, join } from 'path';
import { readdir, readFile, stat } from 'node:fs/promises';
import { basename, dirname, isAbsolute, join } from 'node:path';

import { ContextV2 } from '@appland/navie';
import { isBinaryFile } from '@appland/search';

import { verbose } from '../../utils';
import Location from './location';
import { exists, isFile, verbose } from '../../utils';

export type LocationContextRequest = {
sourceDirectories: string[];
Expand All @@ -24,16 +27,26 @@ export type LocationContextRequest = {
*/
export default async function collectLocationContext(
sourceDirectories: string[],
locations: Location[]
locations: Location[],
explicitFiles: string[] = []
): Promise<ContextV2.ContextResponse> {
const result: ContextV2.ContextResponse = [];

const candidateLocations = new Array<{ location: Location; directory?: string }>();
const candidateLocations = new Array<{ location: Location; directory: string }>();
for (const location of locations) {
const { path } = location;
if (isAbsolute(path)) {
const directory = sourceDirectories.find((dir) => path.startsWith(dir));
candidateLocations.push({ location, directory });
if (directory) {
location.path = location.path.slice(directory.length + 1);
candidateLocations.push({ location, directory });
} else if (explicitFiles.includes(path)) {
location.path = basename(path);
candidateLocations.push({ location, directory: dirname(path) });
} else {
warn(`[location-context] Skipping location outside source directories: ${location.path}`);
continue;
}
} else {
for (const sourceDirectory of sourceDirectories) {
candidateLocations.push({ location, directory: sourceDirectory });
Expand All @@ -55,12 +68,22 @@ export default async function collectLocationContext(
else if (directory) pathTokens = [directory, location.path].filter(Boolean);

const path = join(...pathTokens);
if (!(await exists(path))) {
const stats = await stat(path).catch(() => undefined);
if (!stats) {
if (verbose()) warn(`[location-context] Skipping non-existent location: ${path}`);
// TODO: tell the client?
continue;
}
if (!(await isFile(path))) {
} else if (stats.isDirectory()) {
result.push(await directoryContextItem(path, location, directory));
continue;
} else if (!stats.isFile()) {
if (verbose()) warn(`[location-context] Skipping non-file location: ${path}`);
// TODO: tell the client?
continue;
}

if (isBinaryFile(path)) {
if (verbose()) warn(`[location-context] Skipping binary file: ${path}`);
continue;
}

Expand All @@ -69,6 +92,7 @@ export default async function collectLocationContext(
contents = await readFile(path, 'utf8');
} catch (e) {
warn(`[location-context] Failed to read file: ${path}`);
// TODO: tell the client?
continue;
}

Expand All @@ -90,3 +114,34 @@ export default async function collectLocationContext(

return result;
}

async function directoryContextItem(
path: string,
location: Location,
directory: string
): Promise<ContextV2.FileContextItem> {
const depth = Number(location.lineRange) || 0;
const entries: string[] = [];
for await (const entry of listDirectory(path, depth)) entries.push(entry);
return {
type: ContextV2.ContextItemType.DirectoryListing,
content: entries.join('\n'),
location: location.toString(),
directory,
};
}

async function* listDirectory(path: string, depth: number): AsyncGenerator<string> {
const entries = await readdir(path, { withFileTypes: true });
for (const entry of entries) {
const entryPath = join(path, entry.name);
if (entry.isDirectory()) {
if (depth > 0) {
yield `${entry.name}/`;
for await (const subentry of listDirectory(entryPath, depth - 1)) yield `\t${subentry}`;
} else yield `${entry.name}/ (${(await readdir(entryPath)).length} entries)`;
} else if (entry.isFile()) {
yield entry.name;
}
}
}
7 changes: 6 additions & 1 deletion packages/cli/src/rpc/explain/collect-snippets.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,12 @@ export default function collectSnippets(
const buildLocation = (result: SnippetSearchResult) => {
const snippetId = parseFileChunkSnippetId(result.snippetId);
const { filePath, startLine } = snippetId;
return [filePath, startLine].filter(Boolean).join(':');
let location = filePath;
if (startLine) {
const endLine = startLine + result.content.split('\n').length - 1;
location += `:${startLine}-${endLine}`;
}
return location;
};

return snippets.map((snippet) => ({
Expand Down
13 changes: 12 additions & 1 deletion packages/cli/src/rpc/explain/explain.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import makeDebug from 'debug';

import {
AI,
ConversationThread,
Expand Down Expand Up @@ -28,6 +30,8 @@ import handleReview from './review';

const searchStatusByUserMessageId = new Map<string, ExplainRpc.ExplainStatusResponse>();

const debug = makeDebug('appmap:explain:rpc');

export type SearchContextOptions = {
tokenCount: number;
vectorTerms: string[];
Expand Down Expand Up @@ -158,12 +162,17 @@ export class Explain extends EventEmitter {
data
);

const explicitFiles = Array.isArray(this.codeSelection)
? this.codeSelection.filter(UserContext.hasLocation).map((cs) => cs.location)
: [];

const searchResult = await collectContext(
this.appmapDirectories.map((dir) => dir.directory),
this.projectDirectories,
charLimit,
contextRequest.vectorTerms,
contextRequest.request
contextRequest.request,
explicitFiles
);

this.status.searchResponse = searchResult.searchResponse;
Expand Down Expand Up @@ -248,6 +257,8 @@ export async function explain(
codeEditor: string | undefined,
prompt: string | undefined
): Promise<ExplainRpc.ExplainResponse> {
debug('Code selection: ', codeSelection);

const status: ExplainRpc.ExplainStatusResponse = {
step: ExplainRpc.Step.NEW,
threadId,
Expand Down
28 changes: 25 additions & 3 deletions packages/cli/src/rpc/explain/location.ts
Original file line number Diff line number Diff line change
@@ -1,21 +1,39 @@
import { warn } from 'node:console';

export default class Location {
constructor(public path: string, public lineRange?: string) {}

snippet(contents: string): string {
if (!this.lineRange) return contents;
if (!this.lineRange) {
if (contents.length > MAX_BYTES) {
// collect only as many COMPLETE lines as will fit
const lines = contents.split('\n');
let bytes = 0;
let i = 0;
for (; i < lines.length; i++) {
bytes += lines[i].length + 1;
if (bytes > MAX_BYTES) break;
}
if (i === 0) i++; // at least one line
warn(`Snippet too large, showing only ${i} lines`);
// set the line range to reflect this
this.lineRange = `1-${i}`;
return lines.slice(0, i).join('\n');
} else return contents;
}

const [start, end] = this.lineRange.split('-').map(Number);

const lines = contents.split('\n');
const snippet = lines.slice(start - 1, end || lines.length);
const snippet = lines.slice(Math.max(start - 1, 0), end || lines.length);
return snippet.join('\n');
}

toString(): string {
return this.lineRange ? `${this.path}:${this.lineRange}` : this.path;
}

static parse(location: string): Location | undefined {
static parse(location: string): Location {
const tokens = location.split(':');
if (tokens.length === 1) return new Location(tokens[0]);

Expand All @@ -31,3 +49,7 @@ export default class Location {
return new Location(path, lineRange);
}
}

// Note this is somewhat of a tradeoff between speed and cost.
// The client can always request additional lines.
const MAX_BYTES = 20_000;
5 changes: 3 additions & 2 deletions packages/cli/src/rpc/explain/navie/navie-local.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import { log, warn } from 'console';
import EventEmitter from 'events';
import { randomUUID } from 'crypto';
import { ContextV2, Navie, Help, ProjectInfo, navie } from '@appland/navie';

import { ContextV2, Help, Navie, navie, ProjectInfo, UserContext } from '@appland/navie';

import INavie from './inavie';
import Telemetry from '../../../telemetry';
Expand Down Expand Up @@ -78,7 +79,7 @@ export default class LocalNavie extends EventEmitter implements INavie {
async ask(
threadId: string | undefined,
question: string,
codeSelection?: string,
codeSelection?: UserContext.Context,
prompt?: string
): Promise<void> {
if (!threadId) {
Expand Down
5 changes: 3 additions & 2 deletions packages/cli/tests/unit/rpc/explain/collect-context.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ describe('collect-context', () => {
(collectLocationContext.default as jest.Mock).mockResolvedValue(['context1', 'context2']);

const request: ContextRequest = {
locations: [Location.parse('location1')!, Location.parse('location2')!],
locations: [Location.parse('location1'), Location.parse('location2')],
};
const result = await collectContext(
['dir1', 'dir2'],
Expand All @@ -119,7 +119,8 @@ describe('collect-context', () => {
expect(collectSearchContext.default).not.toHaveBeenCalled();
expect(collectLocationContext.default).toHaveBeenCalledWith(
['src1', 'src2'],
request.locations
request.locations,
[]
);
expect(result.searchResponse.numResults).toBe(0);
expect(result.context).toEqual(['context1', 'context2']);
Expand Down
Loading

0 comments on commit 626c1d8

Please sign in to comment.