Skip to content

Commit

Permalink
Add file search functionality by adding a separate collection for fil…
Browse files Browse the repository at this point in the history
…es (#5)

* Add file search functionality by adding a separate collection for files

* duplication error fixed

* use file path from env + reformat

---------

Co-authored-by: generall <[email protected]>
  • Loading branch information
kartik-gupta-ij and generall authored Nov 17, 2023
1 parent d92fdf6 commit bc44f69
Show file tree
Hide file tree
Showing 12 changed files with 268 additions and 43 deletions.
1 change: 1 addition & 0 deletions code_search/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

QDRANT_CODE_COLLECTION_NAME = "code-snippets-unixcoder"
QDRANT_NLU_COLLECTION_NAME = "code-signatures"
QDRANT_FILE_COLLECTION_NAME="code-files"

ENCODER_NAME = "all-MiniLM-L6-v2"
ENCODER_SIZE = 384
38 changes: 38 additions & 0 deletions code_search/get_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from typing import List

from qdrant_client import QdrantClient
from qdrant_client.http import models

from code_search.config import QDRANT_URL, QDRANT_API_KEY, QDRANT_FILE_COLLECTION_NAME

class FileGet:

def __init__(self):
self.collection_name = QDRANT_FILE_COLLECTION_NAME
self.client = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)

def get(self, path, limit=5) -> List[dict]:
result = self.client.scroll(
collection_name=self.collection_name,
scroll_filter=models.Filter(
must=[
models.FieldCondition(
key="path",
match=models.MatchValue(value=path),
)
]
),
limit=limit,
)

return [hit.payload for hit in result[0]]


if __name__ == '__main__':
path = "lib/collection/src/collection_manager/optimizers/indexing_optimizer.rs"

searcher = FileGet()

res = searcher.get(path)
for hit in res:
print(hit)
42 changes: 42 additions & 0 deletions code_search/index/file_uploader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from pathlib import Path
from qdrant_client import QdrantClient
import json

from code_search.config import QDRANT_URL, QDRANT_API_KEY, DATA_DIR, QDRANT_FILE_COLLECTION_NAME


def encode_and_upload():
qdrant_client = QdrantClient(
QDRANT_URL,
api_key=QDRANT_API_KEY,
)

collection_name = QDRANT_FILE_COLLECTION_NAME
input_file = Path(DATA_DIR) / "rs_files.json"

if not input_file.exists():
raise RuntimeError(f"File {input_file} does not exist. Skipping")

payload = []
with open(input_file, 'r') as json_file:
data = json.load(json_file)
payload = data

print(f"Recreating the collection {collection_name}")
qdrant_client.recreate_collection(
collection_name=collection_name,
vectors_config={}
)

print(f"Storing data in the collection {collection_name}")
qdrant_client.upload_collection(
collection_name=collection_name,
payload=payload,
vectors=[{}] * len(payload),
ids=None,
batch_size=256
)


if __name__ == '__main__':
encode_and_upload()
41 changes: 41 additions & 0 deletions code_search/index/files_to_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import os.path
import json
from pathlib import Path

from code_search.config import DATA_DIR


def process_file(root_dir, file_path):
with open(file_path, 'r', encoding='utf-8', errors='ignore') as file:
code_lines = file.readlines()
relative_path = os.path.relpath(file_path, root_dir)
return {
"path": relative_path,
"code": code_lines,
"startline": 1,
"endline": len(code_lines)
}


def explore_directory(root_dir):
result = []
for foldername, subfolders, filenames in os.walk(root_dir):
for filename in filenames:
file_path = os.path.join(foldername, filename)
if file_path.endswith('.rs'):
result.append(process_file(root_dir, file_path))
return result


def main():
folder_path = os.getenv('QDRANT_PATH')
output_file = Path(DATA_DIR) / "rs_files.json"

files_data = explore_directory(folder_path)

with open(output_file, 'w', encoding='utf-8') as json_file:
json.dump(files_data, json_file, indent=2)


if __name__ == "__main__":
main()
8 changes: 8 additions & 0 deletions code_search/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@

from code_search.config import ROOT_DIR
from code_search.searcher import CombinedSearcher
from code_search.get_file import FileGet

app = FastAPI()

searcher = CombinedSearcher()
get_file = FileGet()


@app.get("/api/search")
Expand All @@ -17,6 +19,12 @@ async def search(query: str):
"result": searcher.search(query, limit=5)
}

@app.get("/api/file")
async def file(path: str):
return {
"result": get_file.get(path)
}


app.mount("/", StaticFiles(directory=os.path.join(ROOT_DIR, 'frontend', 'dist'), html=True))

Expand Down
2 changes: 2 additions & 0 deletions frontend/src/api/constants.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
const API_V1 = "api/";

export const SEARCH_URL = `${API_V1}search`;

export const FILE_URL = `${API_V1}file`;
13 changes: 13 additions & 0 deletions frontend/src/api/file.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import { Axios } from "./axios";
import { FILE_URL } from "./constants";

export type PathRequest = {
path: string;
};

export const getFileResult = (PathRequest: PathRequest) => {
const params = {
path: PathRequest.path,
};
return Axios().get(FILE_URL, { params });
};
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
.codeLoad {
padding-right: 1rem;
padding-left: 1rem;
height: 24px;
width: 56px;
color: #646d76;
background-color: #bddfff;
display: flex;
Expand Down
112 changes: 71 additions & 41 deletions frontend/src/components/CodeContainer/index.tsx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { Box, Button, Image, ThemeIcon, Tooltip } from "@mantine/core";
import { Box, Button, Image, Loader, ThemeIcon, Tooltip } from "@mantine/core";
import classes from "./CodeContainer.module.css";
import { Highlight, themes } from "prism-react-renderer";
import {
Expand All @@ -7,6 +7,8 @@ import {
IconFoldUp,
} from "@tabler/icons-react";
import useMountedState from "@/hooks/useMountedState";
import { useGetFile } from "@/hooks/useGetFile";
import { useEffect } from "react";

type CodeContainerProps = {
code_type: string;
Expand All @@ -16,8 +18,6 @@ type CodeContainerProps = {
module: string;
snippet: string;
struct_name: string;
upper_lines: string;
lower_lines: string;
};
docstring: string | null;
line: number;
Expand All @@ -35,42 +35,61 @@ const loadCount = 10;
export function CodeContainer(props: CodeContainerProps) {
const { context, line_from, sub_matches, line_to } = props;
const [codeLineFrom, setCodeLineFrom] = useMountedState(line_from);
const [codeLineTo, setCodeLineTo] = useMountedState(0);
const [codeLineTo, setCodeLineTo] = useMountedState(line_to);
const [code, setCode] = useMountedState(props.context.snippet);
const { data, error, loading, getFile } = useGetFile();
const [inStack, setInStack] = useMountedState<
"loadUpperCode" | "loadLowerCode" | null
>(null);

const loadUpperCode = () => {
const upperCodeArray = context.upper_lines.split("\n");
const upperCode = upperCodeArray
.slice(
codeLineFrom - loadCount + 1 > 0 ? codeLineFrom - loadCount + 1 : 0,
codeLineFrom
)
.join("\n");
setCodeLineFrom((number) => {
return number - loadCount > 0 ? number - loadCount : 1;
});
setCode(`${upperCode}${code}`);
if (!data) {
getFile(context.file_path);
setInStack("loadUpperCode");
}
if (data) {
const upperCodeArray = data.result[0].code;
const upperCode = upperCodeArray
.slice(
codeLineFrom - loadCount + 1 > 0 ? codeLineFrom - loadCount + 1 : 0,
codeLineFrom
)
.join("");
setCodeLineFrom((number) => {
return number - loadCount > 0 ? number - loadCount : 1;
});
setCode(`${upperCode}${code}`);
}
};

const loadLowerCode = () => {
const lowerCodeArray = context.lower_lines.split("\n");
if (lowerCodeArray.length > codeLineTo + loadCount) {
if (!data) {
getFile(context.file_path);
setInStack("loadLowerCode");
}
if (data) {
const lowerCodeArray = data.result[0].code;
const lowerCode = lowerCodeArray
.slice(codeLineTo, codeLineTo + loadCount + 1)
.join("\n");
.slice(codeLineTo, codeLineTo + loadCount)
.join("");
setCodeLineTo((number) => {
return number + loadCount;
});
setCode(`${code}${lowerCode}`);
} else {
const lowerCode = lowerCodeArray
.slice(codeLineTo, lowerCodeArray.length)
.join("\n");
setCodeLineTo(lowerCodeArray.length);
setCode(`${code}${lowerCode}`);
}
};

useEffect(() => {
if (inStack === "loadUpperCode" && data) {
loadUpperCode();
setInStack(null);
}
if (inStack === "loadLowerCode" && data) {
loadLowerCode();
setInStack(null);
}
}, [data]);

return (
<Box
className={classes.wrapper}
Expand Down Expand Up @@ -131,12 +150,18 @@ export function CodeContainer(props: CodeContainerProps) {
withArrow
>
<span className={classes.codeLoad} onClick={loadUpperCode}>
<IconFoldUp />
{loading && inStack === "loadUpperCode" ? (
<Loader type="oval" size="xs" />
) : (
<IconFoldUp />
)}
</span>
</Tooltip>
<div className={classes.codeLine}>
<span className={classes.codeNumber}>
@@ {1} - {codeLineFrom - 1} of {context.file_name}
{error
? error
: `@@ 1 - ${codeLineFrom - 1} of ${context.file_name}`}
</span>
</div>
</div>
Expand Down Expand Up @@ -174,10 +199,7 @@ export function CodeContainer(props: CodeContainerProps) {
))}
<div
style={
codeLineTo === context.lower_lines.split("\n").length ||
context.lower_lines === undefined ||
context.lower_lines === null ||
context.lower_lines === ""
data?.result[0].endline && codeLineTo >= data?.result[0].endline
? { display: "none" }
: {
display: "flex",
Expand All @@ -191,12 +213,12 @@ export function CodeContainer(props: CodeContainerProps) {
}
>
<Tooltip
label={`Load ${line_to + codeLineTo + 2} to ${
line_to + codeLineTo + loadCount+1 <
context.lower_lines.split("\n").length + line_to
? line_to + codeLineTo + loadCount+1
: context.lower_lines.split("\n").length + line_to
}`}
label={`Load ${codeLineTo + 2} to ${
data?.result[0].endline &&
data?.result[0].endline < codeLineTo + loadCount + 2
? data?.result[0].endline + 1
: codeLineTo + loadCount + 2
} of file`}
withArrow
>
<span
Expand All @@ -206,14 +228,22 @@ export function CodeContainer(props: CodeContainerProps) {
}}
onClick={loadLowerCode}
>
<IconFoldDown />
{loading && inStack === "loadLowerCode" ? (
<Loader type="oval" size="xs" />
) : (
<IconFoldDown />
)}
</span>
</Tooltip>
<div className={classes.codeLine}>
<span className={classes.codeNumber}>
@@ {line_to + codeLineTo + 2} -{" "}
{context.lower_lines.split("\n").length + line_to} of{" "}
{context.file_name}
{error
? error
: `@@ ${codeLineTo + 2} - ${
data?.result[0].endline
? data?.result[0].endline + 1
: "end"
} of ${context.file_name}`}
</span>
</div>
</div>
Expand Down
Loading

0 comments on commit bc44f69

Please sign in to comment.