Skip to content

Commit

Permalink
Merge in local changes.
Browse files Browse the repository at this point in the history
  • Loading branch information
GilHoggarth committed Nov 22, 2023
1 parent 40092d0 commit 2d8cc78
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions duckdb-query.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@ def run_print_and_save(query, csv_file):
q.to_csv(csv_file)


print(duckdb.query(f"DESCRIBE SELECT * FROM '{in_file}'").df())

print(duckdb.query(f"SELECT COUNT(*) FROM '{in_file}'"))

#print(duckdb.query(f"SELECT * FROM '{in_file}' ORDER BY id ASC LIMIT 1"))
Expand All @@ -20,6 +18,12 @@ def run_print_and_save(query, csv_file):
#run_print_and_save(f"SELECT * FROM '{in_file}' WHERE url_domain == 'bbc.co.uk' LIMIT 10", "some_rows.csv")
run_print_and_save(f"SELECT * FROM '{in_file}' WHERE status_code == -5002 LIMIT 10", "some_rows.csv")

#run_print_and_save(f"SELECT url, start_time, STRPTIME(COALESCE(NULLIF(REGEXP_EXTRACT(annotations, '.*launchTimestamp:([0-9]+).*',1),''),'20300101000000'),'%Y%m%d%H%M%S') AS launch_time, (start_time - launch_time) AS delay, REGEXP_EXTRACT(annotations, '.*WebRenderStatus:([0-9]+).*',1) AS webrender_status_code, annotations FROM '{in_file}' WHERE status_code == -5002 AND delay IS NOT NULL ORDER BY delay DESC LIMIT 100", "some_rows.csv")
#print(duckdb.query(f"SELECT domain, status_code, COUNT(*) from '{in_file}' GROUP BY domain, status_code ORDER BY COUNT(*) DESC"))
#print(duckdb.query(f"SELECT domain, status_code, content_type, SUM(content_length), COUNT(*) from '{in_file}' WHERE domain == 'bbc.co.uk' GROUP BY domain, status_code, content_type ORDER BY COUNT(*) DESC"))

print(duckdb.query(f"DESCRIBE SELECT * FROM '{in_file}'").df())

#run_print_and_save(f"SELECT url, start_time, STRPTIME(COALESCE(NULLIF(REGEXP_EXTRACT(annotations, '.*launchTimestamp:([0-9]+).*',1),''),'20300101000000'),'%Y%m%d%H%M%S') AS launch_time, (start_time - launch_time) AS delay, REGEXP_EXTRACT(annotations, '.*WebRenderStatus:([0-9]+).*',1) AS webrender_status_code, annotations FROM '{in_file}' WHERE status_code == -5002 AND delay IS NOT NULL ORDER BY delay DESC LIMIT 100", "some_rows.csv")
run_print_and_save(f"SELECT url, tries, start_time, duration, launch_time, (start_time - launch_time) AS delay, webrender_status_code, annotations FROM '{in_file}' WHERE status_code == -5002 ORDER BY delay DESC LIMIT 100", "some_rows.csv")

Expand Down

0 comments on commit 2d8cc78

Please sign in to comment.