Skip to content

Commit

Permalink
fix: adds sorting diff size and then adds diffs size wise add
Browse files Browse the repository at this point in the history
  • Loading branch information
sshivaditya committed Oct 27, 2024
1 parent 5571e23 commit 8d78153
Show file tree
Hide file tree
Showing 6 changed files with 85 additions and 23 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
"@supabase/supabase-js": "^2.45.4",
"@ubiquity-dao/ubiquibot-logger": "^1.3.0",
"dotenv": "^16.4.5",
"github-diff-tool": "^1.0.3",
"github-diff-tool": "^1.0.6",
"gpt-tokenizer": "^2.5.1",
"openai": "^4.63.0",
"typebox-validators": "0.3.5",
Expand Down
2 changes: 1 addition & 1 deletion src/adapters/openai/helpers/completions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ export class Completions extends SuperOpenAi {
return { answer: "", tokenUsage: { input: 0, output: 0, total: 0 } };
}

async findTokenLength(prompt: string, additionalContext: string[], localContext: string[], groundTruths: string[]): Promise<number> {
async findTokenLength(prompt: string, additionalContext: string[] = [], localContext: string[] = [], groundTruths: string[] = []): Promise<number> {
return encode(prompt + additionalContext.join("\n") + localContext.join("\n") + groundTruths.join("\n")).length;
}
}
1 change: 0 additions & 1 deletion src/handlers/ask-llm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ export async function askGpt(context: Context, question: string, formattedChat:
"supabase",
"openai",
]);
// TODO: If numTokens exceed limit then limit the context size
context.logger.info(`Number of tokens: ${numTokens}`);
return context.adapters.openai.completions.createCompletion(
question,
Expand Down
32 changes: 24 additions & 8 deletions src/helpers/format-chat-history.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { StreamlinedComment, StreamlinedComments } from "../types/llm";
import { createKey, streamlineComments } from "../handlers/comments";
import { fetchPullRequestDiff, fetchIssue, fetchIssueComments, fetchLinkedPrFromIssue } from "./issue-fetching";
import { splitKey } from "./issue";
const MAX_TOKENS_ALLOWED = 7000;

/**
* Formats the chat history by combining streamlined comments and specifications or bodies for issues and pull requests.
Expand All @@ -18,10 +19,13 @@ export async function formatChatHistory(
specAndBodies: Record<string, string>
): Promise<string[]> {
const keys = new Set([...Object.keys(streamlined), ...Object.keys(specAndBodies), createKey(context.payload.issue.html_url)]);
let runningTokenCount = 0;
const chatHistory = await Promise.all(
Array.from(keys).map(async (key) => {
const isCurrentIssue = key === createKey(context.payload.issue.html_url);
return createContextBlockSection(context, key, streamlined, specAndBodies, isCurrentIssue);
const [currentTokenCount, result] = await createContextBlockSection(context, key, streamlined, specAndBodies, isCurrentIssue, runningTokenCount);
runningTokenCount += currentTokenCount;
return result;
})
);
return Array.from(new Set(chatHistory));
Expand Down Expand Up @@ -65,8 +69,9 @@ async function createContextBlockSection(
key: string,
streamlined: Record<string, StreamlinedComment[]>,
specAndBodies: Record<string, string>,
isCurrentIssue: boolean
) {
isCurrentIssue: boolean,
currentContextTokenCount: number = 0
): Promise<[number, string]> {
let comments = streamlined[key];
if (!comments || comments.length === 0) {
const [owner, repo, number] = splitKey(key);
Expand All @@ -84,8 +89,17 @@ async function createContextBlockSection(
throw context.logger.error("Issue number is not valid");
}
const pulls = await fetchLinkedPrFromIssue(org, repo, issueNumber, context);
const prDiffs = await Promise.all(pulls.map(async (pull) => await fetchPullRequestDiff(context, org, repo, pull.number)));
const prDiff = prDiffs.join("\n");
const prDiffs = await Promise.all(pulls.map((pull) => fetchPullRequestDiff(context, org, repo, pull.number)));
let prDiff: string | null = null;
for (const pullDiff of prDiffs.flat()) {
if (currentContextTokenCount > MAX_TOKENS_ALLOWED) break;
if (pullDiff) {
const tokenLength = await context.adapters.openai.completions.findTokenLength(pullDiff.diff);
if (currentContextTokenCount + tokenLength > MAX_TOKENS_ALLOWED) break;
currentContextTokenCount += tokenLength;
prDiff = (prDiff ? prDiff + "\n" : "") + pullDiff.diff;
}
}
const specHeader = getCorrectHeaderString(prDiff, issueNumber, isCurrentIssue, false);
let specOrBody = specAndBodies[key];
if (!specOrBody) {
Expand All @@ -100,14 +114,16 @@ async function createContextBlockSection(
)?.body || "No specification or body available";
}
const specOrBodyBlock = [createHeader(specHeader, key), createSpecOrBody(specOrBody), createFooter(specHeader)];
currentContextTokenCount += await context.adapters.openai.completions.findTokenLength(specOrBody);
const header = getCorrectHeaderString(prDiff, issueNumber, isCurrentIssue, true);
const repoString = `${org}/${repo} #${issueNumber}`;
const block = [specOrBodyBlock.join(""), createHeader(header, repoString), createComment({ issueNumber, repo, org, comments }), createFooter(header)];
currentContextTokenCount += await context.adapters.openai.completions.findTokenLength(block.join(" "));
if (!prDiff) {
return block.join("");
return [currentContextTokenCount, block.join("")];
}
const diffBlock = [createHeader("Linked Pull Request Code Diff", repoString), prDiff, createFooter("Linked Pull Request Code Diff")];
return block.concat(diffBlock).join("");
const diffBlock = [createHeader("Linked Pull Request Code Diff", repoString), prDiff, createFooter("\nLinked Pull Request Code Diff")];
return [currentContextTokenCount, block.join("") + diffBlock.join("")];
}

/**
Expand Down
63 changes: 55 additions & 8 deletions src/helpers/issue-fetching.ts
Original file line number Diff line number Diff line change
Expand Up @@ -165,21 +165,40 @@ export async function mergeCommentsAndFetchSpec(
* @param issue - The pull request number.
* @returns A promise that resolves to the diff of the pull request as a string, or null if an error occurs.
*/
export async function fetchPullRequestDiff(context: Context, org: string, repo: string, issue: number): Promise<string | null> {
export async function fetchPullRequestDiff(context: Context, org: string, repo: string, issue: number): Promise<{ diff: string; diffSize: number }[] | null> {
const { octokit, logger } = context;
try {
const githubDiff = new GithubDiff(octokit);
//Fetch the statistics of the pull request
const stats = await githubDiff.getPullRequestStats(org, repo, issue);
//Find the filenames which do not have more than 200 changes
let files = stats.filter((file) => file.changes < 500).map((file) => file.filename);
//Ignore files like in dist or build or .lock files
const ignoredFiles = ["dist/*", "build/*", ".lock", "index.js"];
files = files.filter((file) => !ignoredFiles.some((pattern) => file.match(pattern)));
const ignoredFiles = (await buildIgnoreFilesFromGitIgnore(context, org, repo)) || [];
const files = stats
.filter((file) => !ignoredFiles.some((pattern) => file.filename.includes(pattern)))
.map((file) => ({ filename: file.filename, diffSizeInBytes: file.diffSizeInBytes }));
//Fetch the diff of the files
return await githubDiff.getPullRequestDiffsFiltered(org, repo, issue, {
includeFiles: files,
});
const prDiffs = await Promise.all(
files.map(async (file) => {
let diff = null;
try {
diff = await githubDiff.getPullRequestDiff({
owner: org,
repo,
pullNumber: issue,
filePath: file.filename,
});
} catch {
logger.error(`Error fetching pull request diff for the file`, {
owner: org,
repo,
pull_number: issue,
file: file.filename,
});
}
return diff ? { diff: file.filename + diff, diffSize: file.diffSizeInBytes } : null;
})
);
return prDiffs.filter((diff): diff is { diff: string; diffSize: number } => diff !== null).sort((a, b) => a.diffSize - b.diffSize);
} catch (error) {
logger.error(`Error fetching pull request diff`, {
error: error as Error,
Expand Down Expand Up @@ -309,3 +328,31 @@ export async function fetchLinkedPrFromIssue(owner: string, repo: string, issueN
//Filter the PRs which are linked to the issue using the body of the PR
return prs.data.filter((pr) => pr.body?.includes(`#${issueNumber}`));
}

async function buildIgnoreFilesFromGitIgnore(context: Context, owner: string, repo: string): Promise<string[] | null> {
try {
const gitignore = await context.octokit.rest.repos.getContent({
owner,
repo,
path: ".gitignore",
});
// Build an array of files to ignore
const ignoreFiles: string[] = [];
if ("content" in gitignore.data) {
const content = Buffer.from(gitignore.data.content, "base64").toString();
content.split("\n").forEach((line) => {
if (line && !line.startsWith("#")) {
ignoreFiles.push(line);
}
});
}
return ignoreFiles;
} catch (error) {
context.logger.error(`Error fetching .gitignore file`, {
error: error as Error,
owner,
repo,
});
return null;
}
}
8 changes: 4 additions & 4 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -4187,10 +4187,10 @@ git-raw-commits@^4.0.0:
meow "^12.0.1"
split2 "^4.0.0"

github-diff-tool@^1.0.3:
version "1.0.3"
resolved "https://registry.yarnpkg.com/github-diff-tool/-/github-diff-tool-1.0.3.tgz#6845765b36b9f86ea0160f6c90e8a5774c274799"
integrity sha512-h+0XfJwzMwp6V5SwLZp/HANGgCMx3a+ulYe1V9rQ/HM5nvvDNtuUCAkdTv6Aa6qGxoX3BpATTQLJmFEnjmk94A==
github-diff-tool@^1.0.6:
version "1.0.6"
resolved "https://registry.yarnpkg.com/github-diff-tool/-/github-diff-tool-1.0.6.tgz#e633b46397db850ad3dc0d500450357cb7ee26f9"
integrity sha512-DOqKck+WUj3HsfOwef5cjS32qqOkKWFncIl4erBtp2+dfccrkSi6Ee14mKGnrQaAhMrx/9LWFh8X5KGivZVY8A==
dependencies:
dotenv "^16.3.1"
esbuild "^0.24.0"
Expand Down

0 comments on commit 8d78153

Please sign in to comment.