From 931d8a5afb1b7b39818370459cbf517059997a69 Mon Sep 17 00:00:00 2001 From: Toby Crawley Date: Sun, 2 Jul 2023 10:07:51 -0400 Subject: [PATCH 1/4] Extract repo index gen to own ns This will allow us to use it from the async pre-compute logic. --- src/clojars/repo_indexing.clj | 107 +++++++++++++++++++++++++++++++ src/clojars/web/repo_listing.clj | 102 ++--------------------------- 2 files changed, 111 insertions(+), 98 deletions(-) create mode 100644 src/clojars/repo_indexing.clj diff --git a/src/clojars/repo_indexing.clj b/src/clojars/repo_indexing.clj new file mode 100644 index 00000000..bd6064b2 --- /dev/null +++ b/src/clojars/repo_indexing.clj @@ -0,0 +1,107 @@ +(ns clojars.repo-indexing + (:require + [clojars.s3 :as s3] + [clojars.web.common :as common] + [clojars.web.safe-hiccup :as safe-hiccup] + [clojure.string :as str] + [hiccup.element :as el])) + +(defn- last-segment + [path] + (peek (str/split path #"/"))) + +(defn- entry-line-dispatch + [entry] + (if (:Prefix entry) + :prefix + :file)) + +(defmulti entry-line + #'entry-line-dispatch) + +(def ^:private name-col-width 50) +(def ^:private date-col-width 16) +(def ^:private size-col-width 10) + +(defn- blanks + [max-len content-str] + (apply str (repeat (- max-len (count content-str)) " "))) + +(def ^:private dash "-") + +(defmethod entry-line :prefix + [{:keys [Prefix]}] + (let [suffix (format "%s/" (last-segment Prefix))] + (list + (el/link-to {:title suffix} suffix suffix) + (blanks name-col-width suffix) + (blanks date-col-width dash) + dash + (blanks size-col-width dash) + dash + "\n"))) + +(defmethod entry-line :file + [{:keys [Key LastModified Size]}] + (let [file-name (last-segment Key) + size (str Size)] + (list + (el/link-to {:title file-name} file-name file-name) + (blanks name-col-width file-name) + (common/format-date-with-time LastModified) + (blanks size-col-width size) + size + "\n"))) + +(defn generate-index + [path entries] + (safe-hiccup/html5 + {:lang "en"} + [:head + [:meta {:charset "utf-8"}] + [:meta {:name "viewport" :content "width=device-width,initial-scale=1"}] + [:title (format "Clojars Repository: %s" path)]] + [:body + [:header + [:h1 (or path "/")]] + [:hr] + [:main + [:pre#contents + (when (some? path) + (list + (el/link-to "../" "../") + "\n")) + (mapcat entry-line entries)]] + [:hr]])) + +(defn- sort-entries + [entries] + (sort + (fn [e1 e2] + (cond + (and (:Prefix e1) (:Prefix e2)) (compare (:Prefix e1) (:Prefix e2)) + (:Prefix e1) -1 + (:Prefix e2) 1 + :else (compare (:Key e1) (:Key e2)))) + entries)) + +(defn get-path-entries + [repo-bucket path] + (->> (s3/list-entries repo-bucket path) + ;; filter out index files + (remove (fn [{:keys [Key]}] + (and Key + (str/ends-with? Key "index.html")))) + (sort-entries))) + +(defn normalize-path + [path] + (let [path (cond + (str/blank? path) nil + (= "/" path) nil + (str/starts-with? path "/") (subs path 1) + :else path)] + (if (and (some? path) + (not (str/ends-with? path "/"))) + (str path "/") + path))) diff --git a/src/clojars/web/repo_listing.clj b/src/clojars/web/repo_listing.clj index 14a587c2..ebf1d040 100644 --- a/src/clojars/web/repo_listing.clj +++ b/src/clojars/web/repo_listing.clj @@ -1,13 +1,10 @@ (ns clojars.web.repo-listing (:require [clojars.maven :as maven] - [clojars.s3 :as s3] - [clojars.web.common :as common] + [clojars.repo-indexing :as repo-indexing] [clojars.web.safe-hiccup :as safe-hiccup] [clojure.edn :as edn] [clojure.java.io :as io] - [clojure.string :as str] - [hiccup.element :as el] [ring.util.response :as ring.response]) (:import (java.io @@ -17,97 +14,6 @@ (set! *warn-on-reflection* true) -(defn- sort-entries - [entries] - (sort - (fn [e1 e2] - (cond - (and (:Prefix e1) (:Prefix e2)) (compare (:Prefix e1) (:Prefix e2)) - (:Prefix e1) -1 - (:Prefix e2) 1 - :else (compare (:Key e1) (:Key e2)))) - entries)) - -(defn- last-segment - [path] - (peek (str/split path #"/"))) - -(defn- entry-line-dispatch - [entry] - (if (:Prefix entry) - :prefix - :file)) - -(defmulti entry-line - #'entry-line-dispatch) - -(def ^:private name-col-width 50) -(def ^:private date-col-width 16) -(def ^:private size-col-width 10) - -(defn- blanks - [max-len content-str] - (apply str (repeat (- max-len (count content-str)) " "))) - -(def ^:private dash "-") - -(defmethod entry-line :prefix - [{:keys [Prefix]}] - (let [suffix (format "%s/" (last-segment Prefix))] - (list - (el/link-to {:title suffix} suffix suffix) - (blanks name-col-width suffix) - (blanks date-col-width dash) - dash - (blanks size-col-width dash) - dash - "\n"))) - -(defmethod entry-line :file - [{:keys [Key LastModified Size]}] - (let [file-name (last-segment Key) - size (str Size)] - (list - (el/link-to {:title file-name} file-name file-name) - (blanks name-col-width file-name) - (common/format-date-with-time LastModified) - (blanks size-col-width size) - size - "\n"))) - -(defn- normalize-path - [path] - (let [path (cond - (str/blank? path) nil - (= "/" path) nil - (str/starts-with? path "/") (subs path 1) - :else path)] - (if (and (some? path) - (not (str/ends-with? path "/"))) - (str path "/") - path))) - -(defn- index - [path entries] - (safe-hiccup/html5 - {:lang "en"} - [:head - [:meta {:charset "utf-8"}] - [:meta {:name "viewport" :content "width=device-width,initial-scale=1"}] - [:title (format "Clojars Repository: %s" path)]] - [:body - [:header - [:h1 (or path "/")]] - [:hr] - [:main - [:pre#contents - (when (some? path) - (list - (el/link-to "../" "../") - "\n")) - (mapcat entry-line entries)]] - [:hr]])) - (def ^:private max-age 43200) ;; 12 hours ;; Public for use in tests @@ -159,8 +65,8 @@ (defn- response [repo-bucket path] - (if-some [entries (seq (sort-entries (s3/list-entries repo-bucket path)))] - (-> (index path entries) + (if-some [entries (seq (repo-indexing/get-path-entries repo-bucket path))] + (-> (repo-indexing/generate-index path entries) (ring.response/response) (ring.response/content-type "text/html;charset=utf-8")) not-found-response)) @@ -175,7 +81,7 @@ (defn index-for-path [{:keys [cache-path repo-bucket]} path] - (let [path (normalize-path path) + (let [path (repo-indexing/normalize-path path) [response age] (or (validate-path path) (get-cached-response cache-path path) (cache-response cache-path path (response repo-bucket path)))] From 71adb952f9ed76410ecc459ea41a157375861846 Mon Sep 17 00:00:00 2001 From: Toby Crawley Date: Sun, 2 Jul 2023 10:56:09 -0400 Subject: [PATCH 2/4] Extract retry logic to ns & improve ergonomics --- src/clojars/retry.clj | 27 +++++++++++++++++++++++++++ src/clojars/storage.clj | 26 ++++---------------------- 2 files changed, 31 insertions(+), 22 deletions(-) create mode 100644 src/clojars/retry.clj diff --git a/src/clojars/retry.clj b/src/clojars/retry.clj new file mode 100644 index 00000000..cfdfc987 --- /dev/null +++ b/src/clojars/retry.clj @@ -0,0 +1,27 @@ +(ns clojars.retry + (:require + [clojars.errors :as errors])) + +(defn retry* + [{:keys [n sleep jitter error-reporter] + :or {sleep 1000 + jitter 20 + n 3}} + f] + (loop [i n] + (if (> i 0) + (let [result (try + (f) + (catch Exception t + t))] + (if (instance? Exception result) + (do (errors/report-error error-reporter result) + (Thread/sleep (+ sleep (rand-int jitter))) + (recur (dec i))) + result)) + (throw (ex-info (format "Retry limit %s has been reached" n) + {:n n :sleep sleep :jitter jitter}))))) + +(defmacro retry + [opts & body] + `(retry* ~opts (fn [] ~@body))) diff --git a/src/clojars/storage.clj b/src/clojars/storage.clj index ae389082..0bd6309a 100644 --- a/src/clojars/storage.clj +++ b/src/clojars/storage.clj @@ -1,8 +1,8 @@ (ns clojars.storage (:require [clojars.cdn :as cdn] - [clojars.errors :as errors] [clojars.file-utils :as fu] + [clojars.retry :as retry] [clojars.s3 :as s3] [clojure.java.io :as io]) (:import @@ -97,24 +97,6 @@ (when (not= "ok" status) (throw (ex-info (format "Fastly purge failed for %s" path) resp)))))) -(defn- retry - [f {:keys [n sleep jitter error-reporter] - :or {sleep 1000 - jitter 20 - n 3}}] - (loop [i n] - (if (> i 0) - (let [result (try - (f) - (catch Exception t - t))] - (if (instance? Exception result) - (do (errors/report-error error-reporter result) - (Thread/sleep (+ sleep (rand-int jitter))) - (recur (dec i))) - result)) - (throw (ex-info (format "Retry limit %s has been reached" n) {:n n :sleep sleep :jitter jitter}))))) - (defrecord CDNStorage [error-reporter cdn-token cdn-url] Storage (-write-artifact [_ path _ _] @@ -122,9 +104,9 @@ ;; the last 24 hours, since fastly will cache the 404. Run in a ;; future so we don't have to wait for the request to finish to ;; complete the deploy. - (future (retry - #(purge cdn-token cdn-url path) - {:error-reporter error-reporter}))) + (future (retry/retry + {:error-reporter error-reporter} + (purge cdn-token cdn-url path)))) (remove-path [_ path] (purge cdn-token cdn-url path)) (path-exists? [_ _]) From cd2d213a04802816ec925da5869baaf3fe67ff5d Mon Sep 17 00:00:00 2001 From: Toby Crawley Date: Sun, 2 Jul 2023 12:27:09 -0400 Subject: [PATCH 3/4] Generate repo index files on deploy This will asynchronously generate repo index files for each level when a deploy occurs and write the index to s3 to be read by fastly. --- src/clojars/repo_indexing.clj | 35 +++++++++++++++- src/clojars/routes/repo.clj | 15 +++++++ src/clojars/s3.clj | 1 + src/clojars/system.clj | 3 ++ .../integration/repo_indexing_test.clj | 41 +++++++++++++++++++ test/clojars/integration/uploads_test.clj | 23 +++++++++++ test/clojars/test_helper.clj | 11 +++++ 7 files changed, 127 insertions(+), 2 deletions(-) create mode 100644 test/clojars/integration/repo_indexing_test.clj diff --git a/src/clojars/repo_indexing.clj b/src/clojars/repo_indexing.clj index bd6064b2..4ed4be5d 100644 --- a/src/clojars/repo_indexing.clj +++ b/src/clojars/repo_indexing.clj @@ -1,10 +1,18 @@ (ns clojars.repo-indexing (:require + [clojars.event :as event] + [clojars.retry :as retry] [clojars.s3 :as s3] [clojars.web.common :as common] [clojars.web.safe-hiccup :as safe-hiccup] + [clojure.java.io :as io] [clojure.string :as str] - [hiccup.element :as el])) + [hiccup.element :as el]) + (:import + (java.io + ByteArrayInputStream))) + +(set! *warn-on-reflection* true) (defn- last-segment [path] @@ -54,6 +62,7 @@ "\n"))) (defn generate-index + ^String [path entries] (safe-hiccup/html5 {:lang "en"} @@ -85,13 +94,15 @@ :else (compare (:Key e1) (:Key e2)))) entries)) +(def ^:private index-file-name "index.html") + (defn get-path-entries [repo-bucket path] (->> (s3/list-entries repo-bucket path) ;; filter out index files (remove (fn [{:keys [Key]}] (and Key - (str/ends-with? Key "index.html")))) + (str/ends-with? Key index-file-name)))) (sort-entries))) (defn normalize-path @@ -105,3 +116,23 @@ (not (str/ends-with? path "/"))) (str path "/") path))) + +(defn handler + [{:keys [error-reporter repo-bucket]} type {:as _data :keys [path]}] + (when (= :repo-path-needs-index type) + (let [path (normalize-path path)] + (retry/retry {:error-reporter error-reporter} + (when-some [entries (get-path-entries repo-bucket path)] + (let [key (if path + (format "%s%s" path index-file-name) + index-file-name) + index (generate-index path entries)] + (s3/put-object repo-bucket + key + (io/input-stream (ByteArrayInputStream. (.getBytes index))) + {:ContentType (s3/content-type key)}))))))) + +(defn repo-indexing-component + "Handles async repo-indexing for a path. Needs the error-reporter & repo-bucket components." + [] + (event/handler-component #'handler)) diff --git a/src/clojars/routes/repo.clj b/src/clojars/routes/repo.clj index 15605b86..2ac3fd2b 100644 --- a/src/clojars/routes/repo.clj +++ b/src/clojars/routes/repo.clj @@ -329,8 +329,23 @@ (when (= :single-use-status/yes (:single_use token)) (db/consume-deploy-token db (:id token))))) +(defn- gen-repo-paths + [{:as _version-data :keys [group-path name version]}] + (let [parts (conj (str/split group-path #"/") + name version)] + (reduce + (fn [acc part] + (conj acc + (if (str/blank? (peek acc)) + part + (format "%s/%s" (peek acc) part)))) + [""] + parts))) + (defn- emit-deploy-events [db event-emitter {:as version-data :keys [group]}] + (doseq [path (gen-repo-paths version-data)] + (event/emit event-emitter :repo-path-needs-index {:path path})) (doseq [user (db/group-active-users db group)] (event/emit event-emitter :version-deployed (assoc version-data :user user)))) diff --git a/src/clojars/s3.clj b/src/clojars/s3.clj index 3d4bab31..19fe342d 100644 --- a/src/clojars/s3.clj +++ b/src/clojars/s3.clj @@ -214,6 +214,7 @@ :clj :txt :eclipse-plugin :zip :gz "application/gzip" + :html "text/html" :jar "application/x-java-archive" :md5 :txt :pom :xml diff --git a/src/clojars/system.clj b/src/clojars/system.clj index 6aafc316..2b2470b6 100644 --- a/src/clojars/system.clj +++ b/src/clojars/system.clj @@ -12,6 +12,7 @@ [clojars.oauth.github :as github] [clojars.oauth.gitlab :as gitlab] [clojars.remote-service :as remote-service] + [clojars.repo-indexing :as repo-indexing] [clojars.ring-servlet-patch :as patch] [clojars.s3 :as s3] [clojars.search :as search] @@ -91,6 +92,7 @@ :mailer (simple-mailer (:mail config)) :notifications (notifications/notification-component) :repo-bucket (s3/s3-client (get-in config [:s3 :repo-bucket])) + :repo-indexer (repo-indexing/repo-indexing-component) :repo-lister (repo-listing/repo-lister (:cache-path config)) :storage (storage-component (:repo config) (:cdn-token config) (:cdn-url config)))) (component/system-using @@ -100,6 +102,7 @@ :event-emitter [:error-reporter] :http [:app] :notifications [:db :mailer] + :repo-indexer [:error-reporter :repo-bucket] :repo-lister [:repo-bucket] :event-receiver [:error-reporter] :storage [:error-reporter :repo-bucket]})))) diff --git a/test/clojars/integration/repo_indexing_test.clj b/test/clojars/integration/repo_indexing_test.clj new file mode 100644 index 00000000..6be40248 --- /dev/null +++ b/test/clojars/integration/repo_indexing_test.clj @@ -0,0 +1,41 @@ +(ns clojars.integration.repo-indexing-test + (:refer-clojure :exclude [key]) + (:require + [clojars.event :as event] + [clojars.s3 :as s3] + [clojars.test-helper :as help] + [clojure.java.io :as io] + [clojure.test :refer [deftest is testing use-fixtures]] + [matcher-combinators.test])) + +(use-fixtures :each help/run-test-app) + +(defn key + [path n] + (if path + (format "%s/%s" path n) + n)) + +(defn write-data + [repo-bucket path] + (doseq [k ["a.pom" "a.jar"]] + (s3/put-object repo-bucket (key path k) (io/input-stream (io/resource "fake.jar"))))) + +(defn index-key + [path] + (key path "index.html")) + +(deftest index-for-path-should-work-for-valid-paths + (let [{:keys [event-emitter repo-bucket]} help/system] + (doseq [path [nil "a" "abc/b" "a_b/c-1/1.0.4+Foo"] + :let [key (index-key path)]] + (testing (format "with path '%s'" path) + (write-data repo-bucket path) + (event/emit event-emitter :repo-path-needs-index {:path path}) + (when (is (help/wait-for-s3-key repo-bucket key)) + (when (is (s3/object-exists? repo-bucket key)) + (let [index (with-open [is (s3/get-object-stream repo-bucket key)] + (slurp is))] + (is (re-find #"a\.pom" index)) + (is (re-find #"a\.jar" index)) + (is (not (re-find #"index\.html" index)))))))))) diff --git a/test/clojars/integration/uploads_test.clj b/test/clojars/integration/uploads_test.clj index c795156e..ae06e421 100644 --- a/test/clojars/integration/uploads_test.clj +++ b/test/clojars/integration/uploads_test.clj @@ -1059,6 +1059,29 @@ (is (every? #(re-find #"https://clojars.org/org.clojars.dantheman/test/versions/0.0.1" %) bodies))))) +(deftest deploy-generates-repo-indexes + (-> (session (help/app)) + (register-as "donthemon" "test2@example.org" "password")) + (-> (session (help/app)) + (register-as "dantheman" "test@example.org" "password") + (visit "/groups/org.clojars.dantheman") + (fill-in [:#username] "donthemon") + (press "Add Member")) + (let [token (create-deploy-token (session (help/app)) "dantheman" "password" "testing")] + (deploy + {:coordinates '[org.clojars.dantheman/test "0.0.1"] + :jar-file (io/file (io/resource "test.jar")) + :pom-file (help/rewrite-pom (io/file (io/resource "test-0.0.1/test.pom")) + {:groupId "org.clojars.dantheman"}) + :password token}) + (let [{:keys [repo-bucket]} help/system + paths ["" "org/" "org/clojars/" "org/clojars/dantheman/" + "org/clojars/dantheman/test/" "org/clojars/dantheman/test/0.0.1/"]] + (is (help/wait-for-s3-key repo-bucket (format "%sindex.html" (peek paths)))) + (doseq [path paths] + (testing (format "with a path of '%s'" path) + (is (s3/object-exists? repo-bucket (format "%sindex.html" path)))))))) + (deftest deploy-sends-notification-emails-only-when-enabled (-> (session (help/app)) (register-as "donthemon" "test2@example.org" "password") diff --git a/test/clojars/test_helper.clj b/test/clojars/test_helper.clj index 43e82822..bceaa869 100644 --- a/test/clojars/test_helper.clj +++ b/test/clojars/test_helper.clj @@ -197,3 +197,14 @@ `(with-redefs [shell/sh (constantly {:out (TXT-vec->str ~txt-records) :exit 0})] ~@body)) + +(defn wait-for-s3-key + [bucket key] + (loop [attempt 0] + (if (s3/object-exists? bucket key) + true + (if (< attempt 10) + (do + (Thread/sleep 1000) + (recur (inc attempt))) + false)))) From 34e136ae32d66e84a489602376515b8abba3260e Mon Sep 17 00:00:00 2001 From: Toby Crawley Date: Sun, 2 Jul 2023 12:38:38 -0400 Subject: [PATCH 4/4] Fix flaky test This test wasn't properly waiting for the add emails to be sent. --- src/clojars/email.clj | 14 ++++++++------ test/clojars/integration/users_test.clj | 5 +++++ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/clojars/email.clj b/src/clojars/email.clj index 2b171750..21c0a1e3 100644 --- a/src/clojars/email.clj +++ b/src/clojars/email.clj @@ -46,12 +46,14 @@ (reset! email-latch (CountDownLatch. n)))) (defn wait-for-mock-emails - "Blocks for up to 100ms waiting for `n` emails to be sent via the mock, - where `n` was passed to `expect-mock-emails` (defaulting to 1 if not called). - Returns true if `n` reached within that time. Reset with `expect-mock-emails` - between tests using the same system." - [] - (.await @email-latch 100 TimeUnit/MILLISECONDS)) + "Blocks for up to `wait-ms` (default: 100ms) waiting for `n` emails to be sent + via the mock, where `n` was passed to `expect-mock-emails` (defaulting to 1 if + not called). Returns true if `n` reached within that time. Reset with + `expect-mock-emails` between tests using the same system." + ([] + (wait-for-mock-emails 100)) + ([wait-ms] + (.await @email-latch wait-ms TimeUnit/MILLISECONDS))) (defn mock-mailer [] (expect-mock-emails) diff --git a/test/clojars/integration/users_test.clj b/test/clojars/integration/users_test.clj index f3e9da70..27c8aa1f 100644 --- a/test/clojars/integration/users_test.clj +++ b/test/clojars/integration/users_test.clj @@ -357,6 +357,11 @@ (register-as "dantheman" "test@example.org" "password") (visit "/groups/org.clojars.dantheman") (fill-in [:#username] "fixture") + ((fn [session] + ;; clear the add emails + (email/expect-mock-emails 2) + (email/wait-for-mock-emails 1000) + session)) (press "Add Member") ((fn [session] (email/expect-mock-emails 2) session)) (press "Remove Member"))