-
Notifications
You must be signed in to change notification settings - Fork 0
/
migrate_collection.py
77 lines (65 loc) · 3.35 KB
/
migrate_collection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import os
import shutil
import subprocess
import tempfile
import argparse
def clone_repo(url, temp_dir):
"""Clones the repository to a temporary directory."""
print(f"Cloning repository: {url} into {temp_dir}")
subprocess.check_call(['git', 'clone', url, temp_dir])
def copy_files(src_dir, dest_dir, filenames=None):
"""Copies specified files from source to destination directory. If filenames is None, copy all files."""
print(f"Copying files from {src_dir} to {dest_dir}")
if filenames is None: # Copy all files and directories in the src_dir
for item in os.listdir(src_dir):
s = os.path.join(src_dir, item)
d = os.path.join(dest_dir, item)
print(f"Copying {s} to {d}")
if os.path.isdir(s):
shutil.copytree(s, d, dirs_exist_ok=True)
else:
shutil.copy2(s, d)
else: # Copy only specified files
for filename in filenames:
src_path = os.path.join(src_dir, filename)
if os.path.exists(src_path):
print(f"Copying file {src_path} to {dest_dir}")
shutil.copy(src_path, dest_dir)
else:
print(f"File not found: {src_path}")
def main(collection_name, repo_url):
temp_dir = tempfile.mkdtemp()
try:
# Clone the repository
clone_repo(repo_url, temp_dir)
# Handling collection folder
collection_path = os.path.join(temp_dir, "collection")
print(f"Looking for collection at: {collection_path}")
handle_collection_or_pipeline(collection_path, "collection", ["endpoint.csv", "source.csv", "old-resource.csv"])
# Handling pipeline folder
pipeline_path = os.path.join(temp_dir, "pipeline")
print(f"Looking for pipeline at: {pipeline_path}")
pipeline_files = ["column.csv", "combine.csv", "concat.csv", "convert.csv", "default-value.csv", "default.csv", "filter.csv", "lookup.csv", "patch.csv", "skip.csv", "transform.csv"]
handle_collection_or_pipeline(pipeline_path, "pipeline", pipeline_files)
finally:
# Cleanup: Remove the cloned repository directory
shutil.rmtree(temp_dir)
def handle_collection_or_pipeline(path, folder_type, filenames):
"""Handles copying files from either collection or pipeline folder."""
if os.path.exists(path):
# Destination directory path
dest_dir = os.path.join("./", folder_type, collection_name)
print(f"Destination directory for {folder_type}: {dest_dir}")
os.makedirs(dest_dir, exist_ok=True)
# Copy the specified files
copy_files(path, dest_dir, filenames)
print(f"{folder_type.capitalize()} files copied successfully.")
else:
print(f"{folder_type.capitalize()} '{collection_name}' not found in the repository.")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Copy files from a GitHub repository collection, including log files and pipeline files.")
parser.add_argument("collection_name", type=str, help="The name of the collection")
parser.add_argument("repo_url", type=str, help="The URL of the GitHub repository")
args = parser.parse_args()
collection_name = args.collection_name # This makes collection_name available in handle_collection_or_pipeline
main(args.collection_name, args.repo_url)