-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add file search functionality by adding a separate collection for fil…
…es (#5) * Add file search functionality by adding a separate collection for files * duplication error fixed * use file path from env + reformat --------- Co-authored-by: generall <[email protected]>
- Loading branch information
1 parent
d92fdf6
commit bc44f69
Showing
12 changed files
with
268 additions
and
43 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
from typing import List | ||
|
||
from qdrant_client import QdrantClient | ||
from qdrant_client.http import models | ||
|
||
from code_search.config import QDRANT_URL, QDRANT_API_KEY, QDRANT_FILE_COLLECTION_NAME | ||
|
||
class FileGet: | ||
|
||
def __init__(self): | ||
self.collection_name = QDRANT_FILE_COLLECTION_NAME | ||
self.client = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY) | ||
|
||
def get(self, path, limit=5) -> List[dict]: | ||
result = self.client.scroll( | ||
collection_name=self.collection_name, | ||
scroll_filter=models.Filter( | ||
must=[ | ||
models.FieldCondition( | ||
key="path", | ||
match=models.MatchValue(value=path), | ||
) | ||
] | ||
), | ||
limit=limit, | ||
) | ||
|
||
return [hit.payload for hit in result[0]] | ||
|
||
|
||
if __name__ == '__main__': | ||
path = "lib/collection/src/collection_manager/optimizers/indexing_optimizer.rs" | ||
|
||
searcher = FileGet() | ||
|
||
res = searcher.get(path) | ||
for hit in res: | ||
print(hit) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
from pathlib import Path | ||
from qdrant_client import QdrantClient | ||
import json | ||
|
||
from code_search.config import QDRANT_URL, QDRANT_API_KEY, DATA_DIR, QDRANT_FILE_COLLECTION_NAME | ||
|
||
|
||
def encode_and_upload(): | ||
qdrant_client = QdrantClient( | ||
QDRANT_URL, | ||
api_key=QDRANT_API_KEY, | ||
) | ||
|
||
collection_name = QDRANT_FILE_COLLECTION_NAME | ||
input_file = Path(DATA_DIR) / "rs_files.json" | ||
|
||
if not input_file.exists(): | ||
raise RuntimeError(f"File {input_file} does not exist. Skipping") | ||
|
||
payload = [] | ||
with open(input_file, 'r') as json_file: | ||
data = json.load(json_file) | ||
payload = data | ||
|
||
print(f"Recreating the collection {collection_name}") | ||
qdrant_client.recreate_collection( | ||
collection_name=collection_name, | ||
vectors_config={} | ||
) | ||
|
||
print(f"Storing data in the collection {collection_name}") | ||
qdrant_client.upload_collection( | ||
collection_name=collection_name, | ||
payload=payload, | ||
vectors=[{}] * len(payload), | ||
ids=None, | ||
batch_size=256 | ||
) | ||
|
||
|
||
if __name__ == '__main__': | ||
encode_and_upload() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
import os.path | ||
import json | ||
from pathlib import Path | ||
|
||
from code_search.config import DATA_DIR | ||
|
||
|
||
def process_file(root_dir, file_path): | ||
with open(file_path, 'r', encoding='utf-8', errors='ignore') as file: | ||
code_lines = file.readlines() | ||
relative_path = os.path.relpath(file_path, root_dir) | ||
return { | ||
"path": relative_path, | ||
"code": code_lines, | ||
"startline": 1, | ||
"endline": len(code_lines) | ||
} | ||
|
||
|
||
def explore_directory(root_dir): | ||
result = [] | ||
for foldername, subfolders, filenames in os.walk(root_dir): | ||
for filename in filenames: | ||
file_path = os.path.join(foldername, filename) | ||
if file_path.endswith('.rs'): | ||
result.append(process_file(root_dir, file_path)) | ||
return result | ||
|
||
|
||
def main(): | ||
folder_path = os.getenv('QDRANT_PATH') | ||
output_file = Path(DATA_DIR) / "rs_files.json" | ||
|
||
files_data = explore_directory(folder_path) | ||
|
||
with open(output_file, 'w', encoding='utf-8') as json_file: | ||
json.dump(files_data, json_file, indent=2) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
const API_V1 = "api/"; | ||
|
||
export const SEARCH_URL = `${API_V1}search`; | ||
|
||
export const FILE_URL = `${API_V1}file`; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
import { Axios } from "./axios"; | ||
import { FILE_URL } from "./constants"; | ||
|
||
export type PathRequest = { | ||
path: string; | ||
}; | ||
|
||
export const getFileResult = (PathRequest: PathRequest) => { | ||
const params = { | ||
path: PathRequest.path, | ||
}; | ||
return Axios().get(FILE_URL, { params }); | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.