🎉 Add garbage collection task for file thumbnails

And additionally, rename the current task to file-gc
to match the real purpose of the task.
This commit is contained in:
Andrey Antukh 2022-03-23 10:59:20 +01:00 committed by Alonso Torres
parent b87e3c22b3
commit 2832736826
9 changed files with 352 additions and 219 deletions

View file

@ -233,14 +233,14 @@
([ds table params opts]
(exec-one! ds
(sql/insert table params opts)
(assoc opts :return-keys true))))
(merge {:return-keys true} opts))))
(defn insert-multi!
([ds table cols rows] (insert-multi! ds table cols rows nil))
([ds table cols rows opts]
(exec! ds
(sql/insert-multi table cols rows opts)
(assoc opts :return-keys true))))
(merge {:return-keys true} opts))))
(defn update!
([ds table params where] (update! ds table params where nil))

View file

@ -189,7 +189,7 @@
:pool (ig/ref :app.db/pool)
:entries
[{:cron #app/cron "0 0 0 * * ?" ;; daily
:task :file-media-gc}
:task :file-gc}
{:cron #app/cron "0 0 * * * ?" ;; hourly
:task :file-xlog-gc}
@ -231,7 +231,7 @@
:tasks
{:sendmail (ig/ref :app.emails/sendmail-handler)
:objects-gc (ig/ref :app.tasks.objects-gc/handler)
:file-media-gc (ig/ref :app.tasks.file-media-gc/handler)
:file-gc (ig/ref :app.tasks.file-gc/handler)
:file-xlog-gc (ig/ref :app.tasks.file-xlog-gc/handler)
:storage-deleted-gc (ig/ref :app.storage/gc-deleted-task)
:storage-touched-gc (ig/ref :app.storage/gc-touched-task)
@ -262,7 +262,7 @@
:storage (ig/ref :app.storage/storage)
:max-age cf/deletion-delay}
:app.tasks.file-media-gc/handler
:app.tasks.file-gc/handler
{:pool (ig/ref :app.db/pool)
:max-age cf/deletion-delay}

View file

@ -58,8 +58,9 @@
(db/insert! conn :file-profile-rel))))
(defn create-file
[conn {:keys [id name project-id is-shared data deleted-at]
[conn {:keys [id name project-id is-shared data deleted-at revn]
:or {is-shared false
revn 0
deleted-at nil}
:as params}]
(let [id (or id (:id data) (uuid/next))
@ -68,6 +69,7 @@
{:id id
:project-id project-id
:name name
:revn revn
:is-shared is-shared
:data (blob/encode data)
:deleted-at deleted-at})]
@ -500,13 +502,13 @@
;; --- Mutation: Upsert file thumbnail
(def sql:upsert-file-thumbnail
"insert into file_thumbnail(file_id, revn, data, props)
values (?, ?, ?, ?)
"insert into file_thumbnail (file_id, revn, data, props)
values (?, ?, ?, ?::jsonb)
on conflict(file_id, revn) do
update set data = ?, updated_at=now();")
update set data = ?, props=?, updated_at=now();")
(s/def ::revn ::us/integer)
(s/def ::props (s/map-of ::us/keyword any?))
(s/def ::revn ::us/integer)
(s/def ::props map?)
(s/def ::upsert-file-thumbnail
(s/keys :req-un [::profile-id ::file-id ::revn ::data ::props]))
@ -516,5 +518,5 @@
(files/check-edition-permissions! conn profile-id file-id)
(let [props (db/tjson (or props {}))]
(db/exec-one! conn [sql:upsert-file-thumbnail
file-id revn data props data])
file-id revn data props data props])
nil)))

View file

@ -440,7 +440,7 @@
(let [params (cond-> {:file-id file-id}
frame-id (assoc :frame-id frame-id))
rows (db/query pool :file-frame-thumbnail params)]
(d/group-by :frame-id :data rows)))
(d/index-by :frame-id :data rows)))
;; --- QUERY: get file thumbnail
@ -465,10 +465,11 @@
(ex/raise :type :not-found
:code :file-thumbnail-not-found))
(with-meta {:data (:data row)
:props (some-> (:props row) db/decode-transit-pgobject)
:revn (:revn row)
:file-id (:file-id row)}
(with-meta
{:data (:data row)
:props (some-> (:props row) db/decode-transit-pgobject)
:revn (:revn row)
:file-id (:file-id row)}
{:transform-response (rpch/http-cache {:max-age (* 1000 60 60)})})))
;; --- Helpers

View file

@ -0,0 +1,164 @@
;; This Source Code Form is subject to the terms of the Mozilla Public
;; License, v. 2.0. If a copy of the MPL was not distributed with this
;; file, You can obtain one at http://mozilla.org/MPL/2.0/.
;;
;; Copyright (c) UXBOX Labs SL
(ns app.tasks.file-gc
"A maintenance task that is responsible of: purge unused file media,
clean unused frame thumbnails and remove old file thumbnails. The
file is eligible to be garbage collected after some period of
inactivity (the default threshold is 72h)."
(:require
[app.common.data :as d]
[app.common.logging :as l]
[app.common.pages.helpers :as cph]
[app.common.pages.migrations :as pmg]
[app.db :as db]
[app.util.blob :as blob]
[app.util.time :as dt]
[clojure.spec.alpha :as s]
[integrant.core :as ig]))
(declare ^:private retrieve-candidates)
(declare ^:private process-file)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; HANDLER
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(s/def ::max-age ::dt/duration)
(defmethod ig/pre-init-spec ::handler [_]
(s/keys :req-un [::db/pool ::max-age]))
(defmethod ig/init-key ::handler
[_ {:keys [pool] :as cfg}]
(fn [_]
(db/with-atomic [conn pool]
(let [cfg (assoc cfg :conn conn)]
(loop [total 0
files (retrieve-candidates cfg)]
(if-let [file (first files)]
(do
(process-file cfg file)
(recur (inc total)
(rest files)))
(do
(l/debug :msg "finished processing files" :processed total)
{:processed total})))))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; IMPL
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(def ^:private
sql:retrieve-candidates-chunk
"select f.id,
f.data,
f.revn,
f.modified_at
from file as f
where f.has_media_trimmed is false
and f.modified_at < now() - ?::interval
and f.modified_at < ?
order by f.modified_at desc
limit 1
for update skip locked")
(defn- retrieve-candidates
[{:keys [conn max-age] :as cfg}]
(let [interval (db/interval max-age)
get-chunk
(fn [cursor]
(let [rows (db/exec! conn [sql:retrieve-candidates-chunk interval cursor])]
[(some->> rows peek :modified-at) (seq rows)]))]
(sequence cat (d/iteration get-chunk
:vf second
:kf first
:initk (dt/now)))))
(defn- collect-used-media
[data]
(let [xform (comp
(map :objects)
(mapcat vals)
(keep (fn [{:keys [type] :as obj}]
(case type
:path (get-in obj [:fill-image :id])
:image (get-in obj [:metadata :id])
nil))))
pages (concat
(vals (:pages-index data))
(vals (:components data)))]
(-> #{}
(into xform pages)
(into (keys (:media data))))))
(defn- clean-file-media!
"Performs the garbage collection of file media objects."
[conn file-id data]
(let [used (collect-used-media data)
unused (->> (db/query conn :file-media-object {:file-id file-id})
(remove #(contains? used (:id %))))]
(doseq [mobj unused]
(l/debug :hint "delete file media object"
:id (:id mobj)
:media-id (:media-id mobj)
:thumbnail-id (:thumbnail-id mobj))
;; NOTE: deleting the file-media-object in the database
;; automatically marks as touched the referenced storage
;; objects. The touch mechanism is needed because many files can
;; point to the same storage objects and we can't just delete
;; them.
(db/delete! conn :file-media-object {:id (:id mobj)}))))
(defn- collect-frames
[data]
(let [xform (comp
(map :objects)
(mapcat vals)
(filter cph/frame-shape?)
(keep :id))
pages (concat
(vals (:pages-index data))
(vals (:components data)))]
(into #{} xform pages)))
(defn- clean-file-frame-thumbnails!
[conn file-id data]
(let [sql (str "delete from file_frame_thumbnail "
" where file_id=? and not (frame_id=ANY(?))")
ids (->> (collect-frames data)
(db/create-array conn "uuid"))
res (db/exec-one! conn [sql file-id ids])]
(l/debug :hint "delete frame thumbnails" :total (:next.jdbc/update-count res))))
(defn- clean-file-thumbnails!
[conn file-id revn]
(let [sql (str "delete from file_thumbnail "
" where file_id=? and revn < ?")
res (db/exec-one! conn [sql file-id revn])]
(l/debug :hint "delete file thumbnails" :total (:next.jdbc/update-count res))))
(defn- process-file
[{:keys [conn] :as cfg} {:keys [id data revn modified-at] :as file}]
(l/debug :hint "processing file" :id id :modified-at modified-at)
(let [data (-> (blob/decode data)
(assoc :id id)
(pmg/migrate-data))]
(clean-file-media! conn id data)
(clean-file-frame-thumbnails! conn id data)
(clean-file-thumbnails! conn id revn)
;; Mark file as trimmed
(db/update! conn :file
{:has-media-trimmed true}
{:id id})
nil))

View file

@ -1,139 +0,0 @@
;; This Source Code Form is subject to the terms of the Mozilla Public
;; License, v. 2.0. If a copy of the MPL was not distributed with this
;; file, You can obtain one at http://mozilla.org/MPL/2.0/.
;;
;; Copyright (c) UXBOX Labs SL
(ns app.tasks.file-media-gc
"A maintenance task that is responsible to purge the unused media
objects from files. A file is eligible to be garbage collected
after some period of inactivity (the default threshold is 72h)."
(:require
[app.common.logging :as l]
[app.common.pages.helpers :as cph]
[app.common.pages.migrations :as pmg]
[app.db :as db]
[app.util.blob :as blob]
[app.util.time :as dt]
[clojure.spec.alpha :as s]
[integrant.core :as ig]))
(declare process-file)
(declare retrieve-candidates)
(s/def ::max-age ::dt/duration)
(defmethod ig/pre-init-spec ::handler [_]
(s/keys :req-un [::db/pool ::max-age]))
(defmethod ig/init-key ::handler
[_ {:keys [pool] :as cfg}]
(fn [_]
(db/with-atomic [conn pool]
(let [cfg (assoc cfg :conn conn)]
(loop [n 0]
(let [files (retrieve-candidates cfg)]
(if (seq files)
(do
(run! (partial process-file cfg) files)
(recur (+ n (count files))))
(do
(l/debug :msg "finished processing files" :processed n)
{:processed n}))))))))
(def ^:private
sql:retrieve-candidates-chunk
"select f.id,
f.data,
extract(epoch from (now() - f.modified_at))::bigint as age
from file as f
where f.has_media_trimmed is false
and f.modified_at < now() - ?::interval
order by f.modified_at asc
limit 10
for update skip locked")
(defn- retrieve-candidates
[{:keys [conn max-age] :as cfg}]
(let [interval (db/interval max-age)]
(->> (db/exec! conn [sql:retrieve-candidates-chunk interval])
(mapv (fn [{:keys [age] :as row}]
(assoc row :age (dt/duration {:seconds age})))))))
(def ^:private
collect-media-xf
(comp
(map :objects)
(mapcat vals)
(keep (fn [{:keys [type] :as obj}]
(case type
:path (get-in obj [:fill-image :id])
:image (get-in obj [:metadata :id])
nil)))))
(defn- collect-used-media
[data]
(let [pages (concat
(vals (:pages-index data))
(vals (:components data)))]
(-> #{}
(into collect-media-xf pages)
(into (keys (:media data))))))
(def ^:private
collect-frames-xf
(comp
(map :objects)
(mapcat vals)
(filter cph/frame-shape?)
(keep :id)))
(defn- collect-frames
[data]
(let [pages (concat
(vals (:pages-index data))
(vals (:components data)))]
(into #{} collect-frames-xf pages)))
(defn- process-file
[{:keys [conn] :as cfg} {:keys [id data age] :as file}]
(let [data (-> (blob/decode data)
(assoc :id id)
(pmg/migrate-data))]
(let [used (collect-used-media data)
unused (->> (db/query conn :file-media-object {:file-id id})
(remove #(contains? used (:id %))))]
(l/debug :hint "processing file"
:id id
:age age
:to-delete (count unused))
;; Mark file as trimmed
(db/update! conn :file
{:has-media-trimmed true}
{:id id})
(doseq [mobj unused]
(l/debug :hint "deleting media object"
:id (:id mobj)
:media-id (:media-id mobj)
:thumbnail-id (:thumbnail-id mobj))
;; NOTE: deleting the file-media-object in the database
;; automatically marks as touched the referenced storage
;; objects. The touch mechanism is needed because many files can
;; point to the same storage objects and we can't just delete
;; them.
(db/delete! conn :file-media-object {:id (:id mobj)})))
(let [sql (str "delete from file_frame_thumbnail "
" where file_id = ? and not (frame_id = ANY(?))")
ids (->> (collect-frames data)
(db/create-array conn "uuid"))]
;; delete the unused frame thumbnails
(db/exec! conn [sql (:id file) ids]))
nil))