-
Notifications
You must be signed in to change notification settings - Fork 119
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Storing data as JSON + Restore code added
Choosing JSON instead of CSV since: 1. CSV does not retain nested dict-like document data structure of MongoDB documents. 2. CSV stores redundant empty NaN columns as well.
- Loading branch information
Mahadik, Mukul Chandrakant
authored and
Mahadik, Mukul Chandrakant
committed
Jan 8, 2024
1 parent
0d0a0ba
commit ae6eae6
Showing
3 changed files
with
106 additions
and
24 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
import logging | ||
import argparse | ||
import uuid | ||
from datetime import datetime | ||
import emission.core.wrapper.user as ecwu | ||
import emission.core.get_database as edb | ||
import emission.core.wrapper.pipelinestate as ecwp | ||
import emission.core.wrapper.pipelinestate as ecwp | ||
import emission.storage.pipeline_queries as esp | ||
import pandas as pd | ||
import pymongo | ||
from bson import ObjectId | ||
import json | ||
|
||
def restoreUserTimeseries(filename): | ||
# df = pd.read_csv(filename) | ||
# df['_id'] = df['_id'].apply(lambda x: ObjectId(x)) | ||
# data = df.to_dict(orient='records') | ||
# print(df) | ||
# result = edb.get_timeseries_db().insert_many(data) | ||
|
||
with open(filename, 'r') as file: | ||
data = json.load(file) | ||
result = edb.get_timeseries_db().insert_many(data) | ||
|
||
logging.info("{} documents successfully inserted".format(len(result.inserted_ids))) | ||
|
||
if __name__ == '__main__': | ||
logging.basicConfig(level=logging.DEBUG) | ||
|
||
parser = argparse.ArgumentParser(prog="restore_user_timeseries") | ||
parser.add_argument( | ||
"-f", "--file_name", | ||
help="Path to the CSV file containing data to be imported" | ||
) | ||
|
||
args = parser.parse_args() | ||
restoreUserTimeseries(args.file_name) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters