-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathno3_group_data.py
72 lines (53 loc) · 2.29 KB
/
no3_group_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# import libraries
from google.cloud import bigquery
# create a client object
client = bigquery.Client()
### --------------------------------------------------------------------------
### SELECT FROM
### --------------------------------------------------------------------------
# Construct a reference to the "hacker_news" dataset
dataset_ref = client.dataset("hacker_news", project="bigquery-public-data")
# https://openaq.org/#/?_k=esd1sb
# API request - fetch the dataset
dataset = client.get_dataset(dataset_ref)
# construct a reference to the "comments" table
table_ref = dataset_ref.table("comments")
# API request - fetch the table
table = client.get_table(table_ref)
# Print information on all the columns in the "comments" table in the "hacker_news" dataset
table.schema
# Print information on all the columns in the "comments" table in the "hacker_news" dataset
client.list_rows(table, max_results = 5).to_dataframe()
# query
query = """
SELECT parent, COUNT(id)
FROM `bigquery-public-data.hacker_news.comments`
GROUP BY parent
HAVING COUNT(id) > 10
"""
# Set up the query (cancel the query if it would use too much of
# your quota, with the limit set to 1 GB)
safe_config = bigquery.QueryJobConfig(maximum_bytes_billed=10**9)
query_job = client.query(query, job_config=safe_config)
# API request - run the query, and convert the results to a pandas DataFrame
popular_comments = query_job.to_dataframe()
# Print the first five rows of the DataFrame
popular_comments.head()
### --------------------------------------------------------------------------
### ALIASING AND OTHER IMPROVEMENTS
### --------------------------------------------------------------------------
# query
query = """
SELECT parent, COUNT(1) AS NumPosts
FROM `bigquery-public-data.hacker_news.comments`
GROUP BY parent
HAVING COUNT(1) > 10
"""
# Set up the query (cancel the query if it would use too much of
# your quota, with the limit set to 1 GB)
safe_config = bigquery.QueryJobConfig(maximum_bytes_billed=10**9)
query_job = client.query(query, job_config=safe_config)
# API request - run the query, and convert the results to a pandas DataFrame
popular_comments = query_job.to_dataframe()
# Print the first five rows of the DataFrame
popular_comments.head()