-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathapp.py
executable file
·135 lines (107 loc) · 3.99 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Feb 15 10:18:41 2018
@author: syed.moshfeq.salaken
run with:
$ chmod a+x app.py
$ ./app.py
install flask
configure AWS credential with AWS CLI: run >> AWS CONFIGURE
boto3 uses that
explicitly state region
supplied S3 bucket must follow the following convention:
<bucket name>/input : this will contain the json/csv file containing data
<bucket name>/results : this will contain the output results
take care of serde in CREATE TABLE STATEMENT, need to think how we can remove hardcoding on this
"""
#!flask/bin/python
from flask import Flask, jsonify, make_response, abort
from helpers_s3 import create_athena_DB, run_query, getResults # import everything for fetching data from s3
from time import sleep
app = Flask(__name__)
@app.errorhandler(404)
def not_found(error):
return make_response(jsonify({'error': 'Not found'}), 404)
tasks = [
{
'id': 1,
'title': u'Buy groceries',
'description': u'Milk, Cheese, Pizza, Fruit, Tylenol',
'done': False
},
{
'id': 2,
'title': u'Learn Python',
'description': u'Need to find a good Python tutorial on the web',
'done': False
}
]
# format of API:
# http://localhost:5000/s3/athena/servain-isf-1-s3/ap-southeast-2/test_database/persons/select * from test_database.persons where age > 25
@app.route('/s3/athena/<string:bucketname>/<string:AWSregion>/<string:database_name>/<string:table_name>/<string:DTquery>', methods=['GET'])
def get_task(bucketname, database_name, table_name, DTquery, AWSregion):
#task = [task for task in tasks if task['id'] == task_id]
task = [
{
"bucket_name" : bucketname,
"database_name" : database_name,
"table_name" : table_name,
"DTquery" : DTquery,
"AWSregion" : AWSregion
}
]
if len(task) == 0:
abort(404)
# user input
#Athena configuration
s3_input = 's3://' + bucketname + '/input' #'s3://servain-isf-1-s3/input'
s3_ouput = 's3://' + bucketname + '/results/'#'s3://servain-isf-1-s3/results/'
database = database_name #'test_database'
table = table_name #'persons'
regionName = AWSregion #'ap-southeast-2'
DBbucket = 's3://' + bucketname + '/' #'s3://servain-isf-1-s3/'
s3bucketOutputPrefix = 'results/'
# print(s3_input)
# print(s3_ouput)
# print(database)
# print(table)
# print(regionName)
# print(DBbucket)
# print(s3bucketOutputPrefix)
# print(DTquery)
#
# >> we need to think how to take care of this table creation sql from user <<
# create table STATEMENT from json file
# for CSV files, use wither LazySimpleSerde or OpenCSVSerde
# see: https://docs.aws.amazon.com/athena/latest/ug/csv.html
create_table = \
"""CREATE EXTERNAL TABLE IF NOT EXISTS %s.%s (
`name` string,
`sex`string,
`city` string,
`country` string,
`age` int,
`job` string
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES (
'serialization.format' = '1'
) LOCATION '%s'
TBLPROPERTIES ('has_encrypted_data'='false');""" % ( database, table, s3_input )
# fetching data: Query definitions
#query_1 = "SELECT * FROM %s.%s where sex = 'F';" % (database, table)
#query_2 = "SELECT * FROM %s.%s where age > 30;" % (database, table)
# actions:
# create the database
res_db = create_athena_DB(database, regionName, DBbucket)
sleep(0.5) # necessary, otherwise, check for query status
# create table on the db
res = run_query(create_table, database, s3_ouput)
sleep(0.5) # necessary
# run query 1
result = getResults(DTquery, database, s3_ouput, s3bucketOutputPrefix, DBbucket, regionName)
#return jsonify({'task': task[0]})
return result
if __name__ == '__main__':
app.run(debug=True)