diff --git a/scripts/sql/README.md b/scripts/sql/README.md new file mode 100644 index 00000000..35bf5abb --- /dev/null +++ b/scripts/sql/README.md @@ -0,0 +1,3 @@ +# `sql` + +This directory contains some SQL queries that could be useful in the future and were complex enough to track in version control. diff --git a/scripts/sql/duplicate-looks.sql b/scripts/sql/duplicate-looks.sql new file mode 100644 index 00000000..eaa23103 --- /dev/null +++ b/scripts/sql/duplicate-looks.sql @@ -0,0 +1,9 @@ + SELECT ARRAY_AGG("id") "id", MIN("id") "idmin" + FROM "Look" + GROUP BY "showId", "number" + HAVING COUNT(*) > 1; + + SELECT ARRAY_AGG("id") "id", "showId", "number", COUNT(*) + FROM "Look" + GROUP BY "showId", "number" + HAVING COUNT(*) > 1; diff --git a/scripts/sql/looks-with-images.sql b/scripts/sql/looks-with-images.sql new file mode 100644 index 00000000..c1d22fd7 --- /dev/null +++ b/scripts/sql/looks-with-images.sql @@ -0,0 +1,5 @@ +-- Looks with more than one image. +SELECT "Look"."id", COUNT("Image"."id") as "images_count" +FROM "Look" JOIN "Image" ON "Image"."lookId" = "Look"."id" +GROUP BY "Look"."id" +HAVING COUNT("Image"."id") > 1; diff --git a/scripts/sql/remove-special-chars-from-slugs.sql b/scripts/sql/remove-special-chars-from-slugs.sql new file mode 100644 index 00000000..5e8cf6cd --- /dev/null +++ b/scripts/sql/remove-special-chars-from-slugs.sql @@ -0,0 +1,68 @@ +CREATE EXTENSION IF NOT EXISTS unaccent; + +-- Normalize the brand slugs. +CREATE OR REPLACE TEMP VIEW "BrandWithSlug" AS +SELECT + REGEXP_REPLACE(NORMALIZE("slug", NFKD), '[\x0300-\x036f]', '', 'g') as "normalized_slug", + REGEXP_REPLACE(NORMALIZE("slug", NFKD), E'[\\x0300-\\x036f\\x2019\'"]', '', 'g') as "unquoted_normalized_slug", + UNACCENT("slug") as "unaccent_slug", + REGEXP_REPLACE(UNACCENT("slug"), E'[\'"]', '') as "unquoted_slug", + * +FROM "Brand"; +SELECT * FROM "BrandWithSlug"; + +-- See if there are any duplicate brands. +CREATE OR REPLACE TEMP VIEW "DuplicateBrands" AS +SELECT "unquoted_normalized_slug", ARRAY_AGG("BrandWithSlug"."id") AS "id", MAX("id") "idmax" +FROM "BrandWithSlug" +GROUP BY "unquoted_normalized_slug" +HAVING COUNT(*) > 1; +SELECT * FROM "DuplicateBrands"; + +-- See if there are any duplicate shows. +SELECT * FROM "Show" INNER JOIN "DuplicateBrands" ON "Show"."brandId" = ANY("DuplicateBrands"."id"); + +-- Combine duplicates. For every show: +-- (a) move all reviews to the last created show (i.e. the larger "ID"); +-- (b) add all look images to the last created show; +-- (c) punt on all other relations as they aren't populated yet; +-- (d) delete the duplicate show; +-- (e) delete the duplicate brand. + +-- Note this only works given the assumption that the most recently created brand +-- is also associated with the most recently created show, looks, images, etc. +-- +-- This assumption is generally correct as I import brands, shows, looks, and images +-- all at the same time. Should this change in the future, this migration will no longer +-- work (but then again, it won't have to because it will have already been applied). + +CREATE OR REPLACE TEMP VIEW "DuplicateShows" AS +SELECT ARRAY_AGG("Show"."id") "id", MAX("Show"."id") "idmax", "DuplicateBrands"."idmax" "brandId" +FROM "Show" +INNER JOIN "DuplicateBrands" ON "Show"."brandId" = ANY("DuplicateBrands"."id") +GROUP BY "DuplicateBrands"."idmax", "seasonId", "sex", "level" +HAVING COUNT(*) > 1; +SELECT * FROM "DuplicateShows"; + +CREATE OR REPLACE TEMP VIEW "DuplicateLooks" AS +SELECT ARRAY_AGG("Look"."id") "id", MAX("Look"."id") "idmax", "DuplicateShows"."idmax" "showId" +FROM "Look" +INNER JOIN "DuplicateShows" ON "Look"."showId" = ANY("DuplicateShows"."id") +GROUP BY "DuplicateShows"."idmax", "Look"."number" +HAVING COUNT(*) > 1; +SELECT * FROM "DuplicateLooks"; + +-- (a) move all reviews to the last created show (i.e. the larger "ID"); +UPDATE "Review" SET "showId" = "DuplicateShows"."idmax" FROM "DuplicateShows" +WHERE "Review"."showId" = ANY("DuplicateShows"."id") AND "Review"."showId" != "DuplicateShows"."idmax"; + +-- (b) add all look images to the last created show; +UPDATE "Image" SET "lookId" = "DuplicateLooks"."idmax" FROM "DuplicateLooks" +WHERE "Image"."lookId" = ANY("DuplicateLooks"."id") AND "Image"."lookId" != "DuplicateLooks"."idmax"; + +-- Delete duplicates (i.e. the smaller "ID" as we're keeping the larger "ID"). +DELETE FROM "Brand" USING "DuplicateBrands" +WHERE "Brand"."id" = ANY("DuplicateBrands"."id") AND "Brand"."id" != "DuplicateBrands"."idmax"; + +-- Update the brand slugs (to remove special characters). +UPDATE "Brand" SET "slug" = "unquoted_normalized_slug" FROM "BrandWithSlug" WHERE "BrandWithSlug"."id" = "Brand"."id"; diff --git a/scripts/sql/reviewed-by-person.sql b/scripts/sql/reviewed-by-person.sql new file mode 100644 index 00000000..ed9b85dc --- /dev/null +++ b/scripts/sql/reviewed-by-person.sql @@ -0,0 +1,4 @@ +SELECT "Show"."name", "Review"."publicationId" FROM "Show" +INNER JOIN "Review" ON "Review"."showId" = "Show"."id" +INNER JOIN "User" on "User"."id" = "Review"."authorId" +WHERE "User"."name" = 'Luisa Zargani' diff --git a/scripts/sql/shows-in-wwd.sql b/scripts/sql/shows-in-wwd.sql new file mode 100644 index 00000000..f8e54fce --- /dev/null +++ b/scripts/sql/shows-in-wwd.sql @@ -0,0 +1 @@ +SELECT "Show"."name" FROM "Review" INNER JOIN "Show" ON "Show"."id" = "Review"."showId" WHERE "Review"."publicationId" = 2 ORDER BY "Show"."name" ASC diff --git a/scripts/sql/vogue-shows-with-locations.sql b/scripts/sql/vogue-shows-with-locations.sql new file mode 100644 index 00000000..607cf45f --- /dev/null +++ b/scripts/sql/vogue-shows-with-locations.sql @@ -0,0 +1,14 @@ +-- Gets the shows that were originally imported from Vogue with locations, and +-- had the same locations in WWD and thus the records merged as expected. +-- +-- This query only returned 3 rows, meaning that I should probably simply omit +-- the location from the show name when merging from WWD (as there were more records +-- that were alike besides the location) and then manually merge those three. +-- +-- Hyke FALL 2023 WOMAN RTW TOKYO, Hyke SPRING 2022 WOMAN RTW TOKYO, Hyke SPRING 2023 WOMAN RTW TOKYO +SELECT * FROM ( + SELECT "Show"."name", "Show"."location", COUNT("Review"."id") as "review_count" + FROM "Show" + LEFT OUTER JOIN "Review" ON "Review"."showId" = "Show"."id" + GROUP BY "Show"."name", "Show"."location" +) "Shows" WHERE "review_count" > 1 AND "location" != 'PARIS' AND "location" IS NOT NULL