From f789767397fe5b72d65584f03e251c3d69ffb0c1 Mon Sep 17 00:00:00 2001 From: Daniel Possenriede Date: Tue, 22 Nov 2022 20:51:23 +0100 Subject: [PATCH] chore: Rework `list_tables()` Original: 7730f1365b0eb18cd5de33e0236b7b268755426d refactor `dbListTables()` with `list_tables()`, now orders result by `table_type` and `table_name` refactor `dbExistsTable()` with `list_tables()` refactor `dbListObjects()` with `list_tables()` merge `find_table()` code into `list_fields()` `find_table()` isn't used anywhere else anymore (e.g. `exists_table()`) simplify the "get current_schemas() as table" code pass full `id` to `list_fields()` align `dbExistsTable()` with `dbListFields()` simplify `where_schema` in `list_tables()` align `where_table` with `where_schema` in `list_tables()` --- R/dbExistsTable_PqConnection_character.R | 9 +- R/dbListFields_PqConnection_Id.R | 2 +- R/dbListObjects_PqConnection_ANY.R | 25 +++-- R/dbListTables_PqConnection.R | 9 +- R/tables.R | 114 ++++++++++++++++------- 5 files changed, 109 insertions(+), 50 deletions(-) diff --git a/R/dbExistsTable_PqConnection_character.R b/R/dbExistsTable_PqConnection_character.R index 74663758..462b6820 100644 --- a/R/dbExistsTable_PqConnection_character.R +++ b/R/dbExistsTable_PqConnection_character.R @@ -2,10 +2,11 @@ #' @usage NULL dbExistsTable_PqConnection_character <- function(conn, name, ...) { stopifnot(length(name) == 1L) - name <- dbQuoteIdentifier(conn, name) - - # Convert to identifier - id <- dbUnquoteIdentifier(conn, name)[[1]] + # use (Un)QuoteIdentifier roundtrip instead of Id(table = name) + # so that quoted names (possibly incl. schema) can be passed to `name` e.g. + # name = dbQuoteIdentifier(conn, Id(schema = "sname", table = "tname")) + quoted <- dbQuoteIdentifier(conn, name) + id <- dbUnquoteIdentifier(conn, quoted)[[1]] exists_table(conn, id) } diff --git a/R/dbListFields_PqConnection_Id.R b/R/dbListFields_PqConnection_Id.R index 637c5ec5..0ba3659c 100644 --- a/R/dbListFields_PqConnection_Id.R +++ b/R/dbListFields_PqConnection_Id.R @@ -1,7 +1,7 @@ #' @rdname postgres-tables #' @usage NULL dbListFields_PqConnection_Id <- function(conn, name, ...) { - list_fields(conn, name) + list_fields(conn, id = name) } #' @rdname postgres-tables diff --git a/R/dbListObjects_PqConnection_ANY.R b/R/dbListObjects_PqConnection_ANY.R index 1224f51b..9331fe6f 100644 --- a/R/dbListObjects_PqConnection_ANY.R +++ b/R/dbListObjects_PqConnection_ANY.R @@ -13,10 +13,13 @@ dbListObjects_PqConnection_ANY <- function(conn, prefix = NULL, ...) { null_varchar <- "NULL::text" } query <- paste0( - "SELECT ", null_varchar, " AS schema, table_name AS table FROM INFORMATION_SCHEMA.tables\n", - "WHERE (table_schema = ANY(current_schemas(true))) AND (table_schema <> 'pg_catalog')\n", + "SELECT ", null_varchar, " AS schema, table_name AS table FROM ( \n", + list_tables(conn = conn, order_by = "table_type, table_name"), + ") as table_query \n", "UNION ALL\n", - "SELECT DISTINCT table_schema AS schema, ", null_varchar, " AS table FROM INFORMATION_SCHEMA.tables" + "SELECT DISTINCT table_schema AS schema, ", null_varchar, " AS table FROM ( \n", + list_tables(conn = conn, where_schema = "true"), + ") as schema_query;" ) } else { if (!is.list(prefix)) prefix <- list(prefix) @@ -27,10 +30,20 @@ dbListObjects_PqConnection_ANY <- function(conn, prefix = NULL, ...) { schemas <- vcapply(prefix[is_prefix], function(x) x@name[["schema"]]) if (length(schemas) > 0) { schema_strings <- dbQuoteString(conn, schemas) + where_schema <- + paste0( + "table_schema IN (", + paste(schema_strings, collapse = ", "), + ") \n" + ) query <- paste0( - "SELECT table_schema AS schema, table_name AS table FROM INFORMATION_SCHEMA.tables\n", - "WHERE ", - "(table_schema IN (", paste(schema_strings, collapse = ", "), "))" + "SELECT table_schema AS schema, table_name AS table FROM ( \n", + list_tables( + conn = conn, + where_schema = where_schema, + order_by = "table_type, table_name" + ), + ") as table_query" ) } } diff --git a/R/dbListTables_PqConnection.R b/R/dbListTables_PqConnection.R index e74aa001..2d408a9f 100644 --- a/R/dbListTables_PqConnection.R +++ b/R/dbListTables_PqConnection.R @@ -1,12 +1,9 @@ #' @rdname postgres-tables #' @usage NULL dbListTables_PqConnection <- function(conn, ...) { - query <- paste0( - "SELECT table_name FROM INFORMATION_SCHEMA.tables ", - "WHERE ", - "(table_schema = ANY(current_schemas(true))) AND (table_schema <> 'pg_catalog')" - ) - dbGetQuery(conn, query)[[1]] + query <- list_tables(conn = conn, order_by = "table_type, table_name") + + dbGetQuery(conn, query)[["table_name"]] } #' @rdname postgres-tables diff --git a/R/tables.R b/R/tables.R index 20a219be..b2cf637f 100644 --- a/R/tables.R +++ b/R/tables.R @@ -120,26 +120,72 @@ db_append_table <- function(conn, name, value, copy, warn) { nrow(value) } +list_tables <- function(conn, where_schema = NULL, where_table = NULL, order_by = NULL) { + + query <- paste0( + # information_schema.table docs: https://www.postgresql.org/docs/current/infoschema-tables.html + "SELECT table_schema, table_name \n", + "FROM information_schema.tables \n", + "WHERE TRUE \n" # dummy clause to be able to add additional ones with `AND` + ) + + if (is.null(where_schema)) { + # `true` in `current_schemas(true)` is necessary to get temporary tables + query <- paste0( + query, + " AND (table_schema = ANY(current_schemas(true))) \n", + " AND (table_schema <> 'pg_catalog') \n" + ) + } else { + query <- paste0(query, " AND ", where_schema) + } + + if (!is.null(where_table)) query <- paste0(query, " AND ", where_table) + + if (!is.null(order_by)) query <- paste0(query, "ORDER BY ", order_by) + + query +} + exists_table <- function(conn, id) { name <- id@name + stopifnot("table" %in% names(name)) + table_name <- dbQuoteString(conn, name[["table"]]) + where_table <- paste0("table_name = ", table_name, " \n") + if ("schema" %in% names(name)) { + schema_name <- dbQuoteString(conn, name[["schema"]]) + where_schema <- paste0("table_schema = ", schema_name, " \n") + } else { + where_schema <- NULL + } query <- paste0( - "SELECT COUNT(*) FROM ", - find_table(conn, name) + "SELECT EXISTS ( \n", + list_tables(conn, where_schema = where_schema, where_table = where_table), + ")" ) - - dbGetQuery(conn, query)[[1]] >= 1 + dbGetQuery(conn, query)[[1]] } -find_table <- function(conn, id, inf_table = "tables", only_first = FALSE) { +list_fields <- function(conn, id) { + name <- id@name + is_redshift <- is(conn, "RedshiftConnection") - if ("schema" %in% names(id)) { + # get relevant schema + if ("schema" %in% names(name)) { + # either the user provides the schema query <- paste0( "(SELECT 1 AS nr, ", - dbQuoteString(conn, id[["schema"]]), "::varchar", + dbQuoteString(conn, name[["schema"]]), "::varchar", " AS table_schema) t" ) + + # only_first not necessary, + # as there cannot be multiple tables with the same name in a single schema + only_first <- FALSE + + # or we have to look the table up in the schemas on the search path } else if (is_redshift) { # A variant of the Postgres version that uses CTEs and generate_series() # instead of generate_subscripts(), the latter is not supported on Redshift @@ -158,25 +204,31 @@ find_table <- function(conn, id, inf_table = "tables", only_first = FALSE) { ) only_first <- FALSE } else { - # https://stackoverflow.com/a/8767450/946850 + # Get `current_schemas()` in search_path order + # so $user and temp tables take precedence over the public schema (by default) + # https://www.postgresql.org/docs/current/ddl-schemas.html#DDL-SCHEMAS-PATH + # https://www.postgresql.org/docs/current/runtime-config-client.html#GUC-SEARCH-PATH + # How to unnest `current_schemas(true)` array with element number (works since v9.4): + # https://stackoverflow.com/a/8767450/2114932 query <- paste0( - "(SELECT nr, schemas[nr] AS table_schema FROM ", - "(SELECT *, generate_subscripts(schemas, 1) AS nr FROM ", - "(SELECT current_schemas(true) AS schemas) ", - "t) ", - "tt WHERE schemas[nr] <> 'pg_catalog') ", - "ttt" + "(", + "SELECT * FROM unnest(current_schemas(true)) WITH ORDINALITY AS tbl(table_schema, nr) \n", + "WHERE table_schema != 'pg_catalog'", + ") schemas_on_path" ) + only_first <- TRUE } - table <- dbQuoteString(conn, id[["table"]]) + # join columns info + table <- dbQuoteString(conn, name[["table"]]) query <- paste0( query, " ", - "INNER JOIN INFORMATION_SCHEMA.", inf_table, " USING (table_schema) ", + "INNER JOIN INFORMATION_SCHEMA.COLUMNS USING (table_schema) ", "WHERE table_name = ", table ) if (only_first) { + # we can only detect duplicate table names after we know in which schemas they are # https://stackoverflow.com/a/31814584/946850 query <- paste0( "(SELECT *, rank() OVER (ORDER BY nr) AS rnr ", @@ -185,7 +237,19 @@ find_table <- function(conn, id, inf_table = "tables", only_first = FALSE) { ) } - query + query <- paste0( + "SELECT column_name FROM ", + query, " ", + "ORDER BY ordinal_position" + ) + + fields <- dbGetQuery(conn, query)[[1]] + + if (length(fields) == 0) { + stop("Table ", dbQuoteIdentifier(conn, id), " not found.", call. = FALSE) + } + + fields } find_temp_schema <- function(conn, fail_if_missing = TRUE) { @@ -214,19 +278,3 @@ find_temp_schema <- function(conn, fail_if_missing = TRUE) { return(connection_get_temp_schema(conn@ptr)) } } - -list_fields <- function(conn, id) { - name <- id@name - - query <- find_table(conn, name, "columns", only_first = TRUE) - query <- paste0( - "SELECT column_name FROM ", - query, " ", - "ORDER BY ordinal_position" - ) - fields <- dbGetQuery(conn, query)[[1]] - if (length(fields) == 0) { - stop("Table ", dbQuoteIdentifier(conn, name), " not found.", call. = FALSE) - } - fields -}