Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(scripts/sql): save useful SQL queries to file #140

Merged
merged 1 commit into from
Feb 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions scripts/sql/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# `sql`

This directory contains some SQL queries that could be useful in the future and were complex enough to track in version control.
9 changes: 9 additions & 0 deletions scripts/sql/duplicate-looks.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
SELECT ARRAY_AGG("id") "id", MIN("id") "idmin"
FROM "Look"
GROUP BY "showId", "number"
HAVING COUNT(*) > 1;

SELECT ARRAY_AGG("id") "id", "showId", "number", COUNT(*)
FROM "Look"
GROUP BY "showId", "number"
HAVING COUNT(*) > 1;
5 changes: 5 additions & 0 deletions scripts/sql/looks-with-images.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
-- Looks with more than one image.
SELECT "Look"."id", COUNT("Image"."id") as "images_count"
FROM "Look" JOIN "Image" ON "Image"."lookId" = "Look"."id"
GROUP BY "Look"."id"
HAVING COUNT("Image"."id") > 1;
68 changes: 68 additions & 0 deletions scripts/sql/remove-special-chars-from-slugs.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
CREATE EXTENSION IF NOT EXISTS unaccent;

-- Normalize the brand slugs.
CREATE OR REPLACE TEMP VIEW "BrandWithSlug" AS
SELECT
REGEXP_REPLACE(NORMALIZE("slug", NFKD), '[\x0300-\x036f]', '', 'g') as "normalized_slug",
REGEXP_REPLACE(NORMALIZE("slug", NFKD), E'[\\x0300-\\x036f\\x2019\'"]', '', 'g') as "unquoted_normalized_slug",
UNACCENT("slug") as "unaccent_slug",
REGEXP_REPLACE(UNACCENT("slug"), E'[\'"]', '') as "unquoted_slug",
*
FROM "Brand";
SELECT * FROM "BrandWithSlug";

-- See if there are any duplicate brands.
CREATE OR REPLACE TEMP VIEW "DuplicateBrands" AS
SELECT "unquoted_normalized_slug", ARRAY_AGG("BrandWithSlug"."id") AS "id", MAX("id") "idmax"
FROM "BrandWithSlug"
GROUP BY "unquoted_normalized_slug"
HAVING COUNT(*) > 1;
SELECT * FROM "DuplicateBrands";

-- See if there are any duplicate shows.
SELECT * FROM "Show" INNER JOIN "DuplicateBrands" ON "Show"."brandId" = ANY("DuplicateBrands"."id");

-- Combine duplicates. For every show:
-- (a) move all reviews to the last created show (i.e. the larger "ID");
-- (b) add all look images to the last created show;
-- (c) punt on all other relations as they aren't populated yet;
-- (d) delete the duplicate show;
-- (e) delete the duplicate brand.

-- Note this only works given the assumption that the most recently created brand
-- is also associated with the most recently created show, looks, images, etc.
--
-- This assumption is generally correct as I import brands, shows, looks, and images
-- all at the same time. Should this change in the future, this migration will no longer
-- work (but then again, it won't have to because it will have already been applied).

CREATE OR REPLACE TEMP VIEW "DuplicateShows" AS
SELECT ARRAY_AGG("Show"."id") "id", MAX("Show"."id") "idmax", "DuplicateBrands"."idmax" "brandId"
FROM "Show"
INNER JOIN "DuplicateBrands" ON "Show"."brandId" = ANY("DuplicateBrands"."id")
GROUP BY "DuplicateBrands"."idmax", "seasonId", "sex", "level"
HAVING COUNT(*) > 1;
SELECT * FROM "DuplicateShows";

CREATE OR REPLACE TEMP VIEW "DuplicateLooks" AS
SELECT ARRAY_AGG("Look"."id") "id", MAX("Look"."id") "idmax", "DuplicateShows"."idmax" "showId"
FROM "Look"
INNER JOIN "DuplicateShows" ON "Look"."showId" = ANY("DuplicateShows"."id")
GROUP BY "DuplicateShows"."idmax", "Look"."number"
HAVING COUNT(*) > 1;
SELECT * FROM "DuplicateLooks";

-- (a) move all reviews to the last created show (i.e. the larger "ID");
UPDATE "Review" SET "showId" = "DuplicateShows"."idmax" FROM "DuplicateShows"
WHERE "Review"."showId" = ANY("DuplicateShows"."id") AND "Review"."showId" != "DuplicateShows"."idmax";

-- (b) add all look images to the last created show;
UPDATE "Image" SET "lookId" = "DuplicateLooks"."idmax" FROM "DuplicateLooks"
WHERE "Image"."lookId" = ANY("DuplicateLooks"."id") AND "Image"."lookId" != "DuplicateLooks"."idmax";

-- Delete duplicates (i.e. the smaller "ID" as we're keeping the larger "ID").
DELETE FROM "Brand" USING "DuplicateBrands"
WHERE "Brand"."id" = ANY("DuplicateBrands"."id") AND "Brand"."id" != "DuplicateBrands"."idmax";

-- Update the brand slugs (to remove special characters).
UPDATE "Brand" SET "slug" = "unquoted_normalized_slug" FROM "BrandWithSlug" WHERE "BrandWithSlug"."id" = "Brand"."id";
4 changes: 4 additions & 0 deletions scripts/sql/reviewed-by-person.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
SELECT "Show"."name", "Review"."publicationId" FROM "Show"
INNER JOIN "Review" ON "Review"."showId" = "Show"."id"
INNER JOIN "User" on "User"."id" = "Review"."authorId"
WHERE "User"."name" = 'Luisa Zargani'
1 change: 1 addition & 0 deletions scripts/sql/shows-in-wwd.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SELECT "Show"."name" FROM "Review" INNER JOIN "Show" ON "Show"."id" = "Review"."showId" WHERE "Review"."publicationId" = 2 ORDER BY "Show"."name" ASC
14 changes: 14 additions & 0 deletions scripts/sql/vogue-shows-with-locations.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
-- Gets the shows that were originally imported from Vogue with locations, and
-- had the same locations in WWD and thus the records merged as expected.
--
-- This query only returned 3 rows, meaning that I should probably simply omit
-- the location from the show name when merging from WWD (as there were more records
-- that were alike besides the location) and then manually merge those three.
--
-- Hyke FALL 2023 WOMAN RTW TOKYO, Hyke SPRING 2022 WOMAN RTW TOKYO, Hyke SPRING 2023 WOMAN RTW TOKYO
SELECT * FROM (
SELECT "Show"."name", "Show"."location", COUNT("Review"."id") as "review_count"
FROM "Show"
LEFT OUTER JOIN "Review" ON "Review"."showId" = "Show"."id"
GROUP BY "Show"."name", "Show"."location"
) "Shows" WHERE "review_count" > 1 AND "location" != 'PARIS' AND "location" IS NOT NULL
Loading