diff --git a/Lab10.html b/Lab10.html new file mode 100644 index 0000000..bd09f99 --- /dev/null +++ b/Lab10.html @@ -0,0 +1,4622 @@ + + + + + + + + + + +Lab 10 + + + + + + + + + + + + + + + + + + + +
+ +
+ +
+
+

Lab 10

+
+ + + +
+ +
+
Author
+
+

Giuliet Kibler

+
+
+ + + +
+ + + +
+ + +
+

Setup

+
+
# install.packages(c("RSQLite", "DBI"))
+
+library(RSQLite)
+library(DBI)
+
+# Initialize a temporary in memory database
+con <- dbConnect(SQLite(), ":memory:")
+
+# Download tables
+actor <- read.csv("https://raw.githubusercontent.com/ivanceras/sakila/master/csv-sakila-db/actor.csv")
+rental <- read.csv("https://raw.githubusercontent.com/ivanceras/sakila/master/csv-sakila-db/rental.csv")
+customer <- read.csv("https://raw.githubusercontent.com/ivanceras/sakila/master/csv-sakila-db/customer.csv")
+payment <- read.csv("https://raw.githubusercontent.com/ivanceras/sakila/master/csv-sakila-db/payment_p2007_01.csv")
+
+# Copy data.frames to database
+dbWriteTable(con, "actor", actor)
+dbWriteTable(con, "rental", rental)
+dbWriteTable(con, "customer", customer)
+dbWriteTable(con, "payment", payment)
+
+
+
dbListTables(con)
+
+
[1] "actor"    "customer" "payment"  "rental"  
+
+
+

TIP: You can use the following QUERY to see the structure of a table

+
+
PRAGMA table_info(actor)
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
4 records
cidnametypenotnulldflt_valuepk
0actor_idINTEGER0NA0
1first_nameTEXT0NA0
2last_nameTEXT0NA0
3last_updateTEXT0NA0
+
+
+

SQL references:

+

https://www.w3schools.com/sql/

+
+
+

Exercise 1

+

Edit the code below to retrieve the actor ID, first name and last name for all actors using the actor table. Sort by last name and then by first name (note that the code chunk below is set up to run SQL code rather than R code).

+
+
SELECT actor_id, first_name, last_name
+FROM actor
+ORDER by last_name, first_name
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Displaying records 1 - 10
actor_idfirst_namelast_name
58CHRISTIANAKROYD
182DEBBIEAKROYD
92KIRSTENAKROYD
118CUBAALLEN
145KIMALLEN
194MERYLALLEN
76ANGELINAASTAIRE
112RUSSELLBACALL
190AUDREYBAILEY
67JESSICABAILEY
+
+
+
+
+

Exercise 2

+

Retrieve the actor ID, first name, and last name for actors whose last name equals ‘WILLIAMS’ or ‘DAVIS’.

+
+
SELECT actor_id, first_name, last_name
+FROM actor
+WHERE last_name IN ('WILLIAMS', 'DAVIS')
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
6 records
actor_idfirst_namelast_name
4JENNIFERDAVIS
72SEANWILLIAMS
101SUSANDAVIS
110SUSANDAVIS
137MORGANWILLIAMS
172GROUCHOWILLIAMS
+
+
+
+
+

Exercise 3

+

Write a query against the rental table that returns the IDs of the customers who rented a film on July 5, 2005 (use the rental.rental_date column, and you can use the date() function to ignore the time component). Include a single row for each distinct customer ID.

+
+
SELECT DISTINCT rental_id
+FROM rental
+WHERE date(rental_date) = '2005-07-05'
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Displaying records 1 - 10
rental_id
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
+
+
+
+
+

Exercise 4

+
+

Exercise 4.1

+

Construct a query that retrieves all rows from the payment table where the amount is either 1.99, 7.99, 9.99.

+
+
SELECT *
+FROM payment
+WHERE amount IN (1.99, 7.99, 9.99)
+
+ + ++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Displaying records 1 - 10
payment_idcustomer_idstaff_idrental_idamountpayment_date
16050269271.992007-01-24 21:40:19.996577
1605627011931.992007-01-26 05:10:14.996577
160812822481.992007-01-25 04:49:12.996577
1610329415951.992007-01-28 12:28:20.996577
1613330716141.992007-01-28 14:01:54.996577
16158316110651.992007-01-31 07:23:22.996577
1616031812249.992007-01-26 08:46:53.996577
161613191159.992007-01-24 23:07:48.996577
1618033029677.992007-01-30 17:40:32.996577
16206351111371.992007-01-31 17:48:40.996577
+
+
+
+
+

Exercise 4.2

+

Construct a query that retrieves all rows from the payment table where the amount is greater then 5.

+
+
SELECT *
+FROM payment
+WHERE amount > 5
+
+ + ++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Displaying records 1 - 10
payment_idcustomer_idstaff_idrental_idamountpayment_date
1605226926786.992007-01-28 21:44:14.996577
16058271110968.992007-01-31 11:59:15.996577
1606027214056.992007-01-27 12:01:05.996577
16061272110416.992007-01-31 04:14:49.996577
1606827413945.992007-01-27 09:54:37.996577
16073276186010.992007-01-30 01:13:42.996577
1607427723086.992007-01-26 20:30:05.996577
1608228222826.992007-01-26 17:24:52.996577
16086284111456.992007-01-31 18:42:11.996577
160872862816.992007-01-25 10:43:45.996577
+
+
+
+
+

Exercise 4.2

+

Construct a query that retrieves all rows from the payment table where the amount is greater then 5 and less then 8.

+
+
SELECT *
+FROM payment
+WHERE amount > 5 AND amount < 8
+
+ + ++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Displaying records 1 - 10
payment_idcustomer_idstaff_idrental_idamountpayment_date
1605226926786.992007-01-28 21:44:14.996577
1606027214056.992007-01-27 12:01:05.996577
16061272110416.992007-01-31 04:14:49.996577
1606827413945.992007-01-27 09:54:37.996577
1607427723086.992007-01-26 20:30:05.996577
1608228222826.992007-01-26 17:24:52.996577
16086284111456.992007-01-31 18:42:11.996577
160872862816.992007-01-25 10:43:45.996577
1609228824276.992007-01-27 14:38:30.996577
1609428825655.992007-01-28 07:54:57.996577
+
+
+
+
+
+

Exercise 5

+

Retrieve all the payment IDs and their amounts from the customers whose last name is ‘DAVIS’.

+
+
SELECT payment_id, amount
+FROM payment
+  INNER JOIN customer
+WHERE last_name IN ('DAVIS')
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Displaying records 1 - 10
payment_idamount
160501.99
160510.99
160526.99
160530.99
160544.99
160552.99
160561.99
160574.99
160588.99
160590.99
+
+
+
+
+

Exercise 6

+
+

Exercise 6.1

+

Use COUNT(*) to count the number of rows in rental.

+
+
SELECT
+COUNT(*) AS count
+FROM rental
+
+ + + + + + + + + + + + +
1 records
count
16044
+
+
+
+
+

Exercise 6.2

+

Use COUNT(*) and GROUP BY to count the number of rentals for each customer_id.

+
+
SELECT 
+COUNT(*) AS count
+FROM rental
+GROUP BY customer_id
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Displaying records 1 - 10
count
32
27
26
22
38
28
33
24
23
25
+
+
+
+
+

Exercise 6.3

+

Repeat the previous query and sort by the count in descending order.

+
+
SELECT 
+COUNT(*) AS count
+FROM rental
+GROUP BY customer_id 
+ORDER BY COUNT DESC
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Displaying records 1 - 10
count
46
45
42
42
41
40
40
39
39
39
+
+
+
+
+

Exercise 6.4

+

Repeat the previous query but use HAVING to only keep the groups with 40 or more.

+
+
SELECT 
+COUNT(*) AS count
+FROM rental
+GROUP BY customer_id 
+HAVING COUNT(*) > 40
+ORDER BY COUNT DESC
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
5 records
count
46
45
42
42
41
+
+
+
+
+
+

Exercise 7

+

Write a query that calculates a number of summary statistics for the payment table using MAX, MIN, AVG and SUM

+
+
SELECT 
+    MAX(amount) AS max_amount,
+    MIN(amount) AS min_amount,
+    AVG(amount) AS avg_amount,
+    SUM(amount) AS total_amount
+FROM payment;
+
+ + + + + + + + + + + + + + + + + + +
1 records
max_amountmin_amountavg_amounttotal_amount
11.990.994.1697754824.43
+
+
+
+

Exercise 7.1

+

Modify the above query to do those calculations for each customer_id.

+
+
SELECT 
+    customer_id,
+    MAX(amount) AS max_amount,
+    MIN(amount) AS min_amount,
+    AVG(amount) AS avg_amount,
+    SUM(amount) AS total_amount
+FROM payment
+GROUP BY customer_id;
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Displaying records 1 - 10
customer_idmax_amountmin_amountavg_amounttotal_amount
12.990.991.9900003.98
24.994.994.9900004.99
32.991.992.4900004.98
56.990.993.3233339.97
64.990.992.9900008.97
75.990.994.19000020.95
86.996.996.9900006.99
94.990.993.65666710.97
104.994.994.9900004.99
116.996.996.9900006.99
+
+
+
+
+

Exercise 7.2

+

Modify the above query to only keep the customer_ids that have more then 5 payments.

+
+
SELECT 
+    customer_id,
+    MAX(amount) AS max_amount,
+    MIN(amount) AS min_amount,
+    AVG(amount) AS avg_amount,
+    SUM(amount) AS total_amount
+FROM payment
+GROUP BY customer_id
+HAVING COUNT(*) > 5;
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Displaying records 1 - 10
customer_idmax_amountmin_amountavg_amounttotal_amount
199.990.994.49000026.94
539.990.994.49000026.94
1097.990.993.99000027.93
1615.990.992.99000017.94
1973.990.992.61500020.92
2076.990.992.99000017.94
2397.992.995.65666733.94
2458.990.994.82333328.94
2514.991.993.32333319.94
2696.990.993.15666718.94
+
+
+
+
+
+

Cleanup

+

Run the following chunk to disconnect from the connection.

+
+
# clean up
+dbDisconnect(con)
+
+
+ +
+ + +
+ + + + + \ No newline at end of file diff --git a/Lab10.qmd b/Lab10.qmd new file mode 100644 index 0000000..54a13f5 --- /dev/null +++ b/Lab10.qmd @@ -0,0 +1,216 @@ +--- +title: "Lab 10" +author: "Giuliet Kibler" +format: + html: + embed-resources: true +editor: visual +--- + +# Setup + +```{r} +# install.packages(c("RSQLite", "DBI")) + +library(RSQLite) +library(DBI) + +# Initialize a temporary in memory database +con <- dbConnect(SQLite(), ":memory:") + +# Download tables +actor <- read.csv("https://raw.githubusercontent.com/ivanceras/sakila/master/csv-sakila-db/actor.csv") +rental <- read.csv("https://raw.githubusercontent.com/ivanceras/sakila/master/csv-sakila-db/rental.csv") +customer <- read.csv("https://raw.githubusercontent.com/ivanceras/sakila/master/csv-sakila-db/customer.csv") +payment <- read.csv("https://raw.githubusercontent.com/ivanceras/sakila/master/csv-sakila-db/payment_p2007_01.csv") + +# Copy data.frames to database +dbWriteTable(con, "actor", actor) +dbWriteTable(con, "rental", rental) +dbWriteTable(con, "customer", customer) +dbWriteTable(con, "payment", payment) +``` + +```{r} +dbListTables(con) +``` + +TIP: You can use the following QUERY to see the structure of a table + +```{sql, connection=con} +PRAGMA table_info(actor) +``` + +SQL references: + +https://www.w3schools.com/sql/ + +# Exercise 1 + +Edit the code below to retrieve the actor ID, first name and last name for all actors using the `actor` table. Sort by last name and then by first name (note that the code chunk below is set up to run SQL code rather than R code). + +```{sql, connection=con} +SELECT actor_id, first_name, last_name +FROM actor +ORDER by last_name, first_name +``` + +# Exercise 2 + +Retrieve the actor ID, first name, and last name for actors whose last name equals 'WILLIAMS' or 'DAVIS'. + +```{sql, connection=con} +SELECT actor_id, first_name, last_name +FROM actor +WHERE last_name IN ('WILLIAMS', 'DAVIS') +``` + +# Exercise 3 + +Write a query against the `rental` table that returns the IDs of the customers who rented a film on July 5, 2005 (use the rental.rental_date column, and you can use the `date()` function to ignore the time component). Include a single row for each distinct customer ID. + +```{sql, connection=con} +SELECT DISTINCT rental_id +FROM rental +WHERE date(rental_date) = '2005-07-05' +``` +# Exercise 4 + +## Exercise 4.1 + +Construct a query that retrieves all rows from the `payment` table where the amount is either 1.99, 7.99, 9.99. + +```{sql, connection=con} +SELECT * +FROM payment +WHERE amount IN (1.99, 7.99, 9.99) +``` + +## Exercise 4.2 + +Construct a query that retrieves all rows from the `payment` table where the amount is greater then 5. + +```{sql, connection=con} +SELECT * +FROM payment +WHERE amount > 5 +``` + +## Exercise 4.2 + +Construct a query that retrieves all rows from the `payment` table where the amount is greater then 5 and less then 8. + +```{sql, connection=con} +SELECT * +FROM payment +WHERE amount > 5 AND amount < 8 +``` +# Exercise 5 + +Retrieve all the payment IDs and their amounts from the customers whose last name is 'DAVIS'. + +```{sql, connection=con} +SELECT payment_id, amount +FROM payment + INNER JOIN customer +WHERE last_name IN ('DAVIS') +``` + +# Exercise 6 + +## Exercise 6.1 + +Use `COUNT(*)` to count the number of rows in `rental`. + +```{sql, connection=con} +SELECT +COUNT(*) AS count +FROM rental + +``` + +## Exercise 6.2 + +Use `COUNT(*)` and `GROUP BY` to count the number of rentals for each `customer_id`. + +```{sql, connection=con} +SELECT +COUNT(*) AS count +FROM rental +GROUP BY customer_id +``` +## Exercise 6.3 + +Repeat the previous query and sort by the count in descending order. + +```{sql, connection=con} +SELECT +COUNT(*) AS count +FROM rental +GROUP BY customer_id +ORDER BY COUNT DESC +``` + +## Exercise 6.4 + +Repeat the previous query but use `HAVING` to only keep the groups with 40 or more. + +```{sql, connection=con} +SELECT +COUNT(*) AS count +FROM rental +GROUP BY customer_id +HAVING COUNT(*) > 40 +ORDER BY COUNT DESC +``` + +# Exercise 7 + +Write a query that calculates a number of summary statistics for the payment table using `MAX`, `MIN`, `AVG` and `SUM` + +```{sql, connection=con} +SELECT + MAX(amount) AS max_amount, + MIN(amount) AS min_amount, + AVG(amount) AS avg_amount, + SUM(amount) AS total_amount +FROM payment; +``` + +## Exercise 7.1 + +Modify the above query to do those calculations for each `customer_id`. + +```{sql, connection=con} +SELECT + customer_id, + MAX(amount) AS max_amount, + MIN(amount) AS min_amount, + AVG(amount) AS avg_amount, + SUM(amount) AS total_amount +FROM payment +GROUP BY customer_id; +``` +## Exercise 7.2 + +Modify the above query to only keep the `customer_id`s that have more then 5 payments. + +```{sql, connection=con} +SELECT + customer_id, + MAX(amount) AS max_amount, + MIN(amount) AS min_amount, + AVG(amount) AS avg_amount, + SUM(amount) AS total_amount +FROM payment +GROUP BY customer_id +HAVING COUNT(*) > 5; +``` +# Cleanup + +Run the following chunk to disconnect from the connection. + +```{r} +# clean up +dbDisconnect(con) +``` \ No newline at end of file