penpot/backend/src/app/tasks/file_media_gc.clj
2020-12-21 12:15:53 +01:00

110 lines
3.3 KiB
Clojure

;; This Source Code Form is subject to the terms of the Mozilla Public
;; License, v. 2.0. If a copy of the MPL was not distributed with this
;; file, You can obtain one at http://mozilla.org/MPL/2.0/.
;;
;; This Source Code Form is "Incompatible With Secondary Licenses", as
;; defined by the Mozilla Public License, v. 2.0.
;;
;; Copyright (c) 2020 UXBOX Labs SL
(ns app.tasks.file-media-gc
"A maintenance task that is responsible to purge the unused media
objects from files. A file is ellegible to be garbage collected
after some period of inactivity (the default threshold is 72h)."
(:require
[app.common.pages.migrations :as pmg]
[app.config :as cfg]
[app.db :as db]
[app.metrics :as mtx]
[app.tasks :as tasks]
[app.util.blob :as blob]
[app.util.time :as dt]
[clojure.tools.logging :as log]))
(defn decode-row
[{:keys [data] :as row}]
(cond-> row
(bytes? data) (assoc :data (blob/decode data))))
(def sql:retrieve-candidates-chunk
"select f.id,
f.data,
extract(epoch from (now() - f.modified_at))::bigint as age
from file as f
where f.has_media_trimmed is false
and f.modified_at < now() - ?::interval
order by f.modified_at asc
limit 10
for update skip locked")
(defn retrieve-candidates
"Retrieves a list of files that are candidates to be garbage
collected."
[conn]
(let [threshold (:file-trimming-threshold cfg/config)
interval (db/interval threshold)]
(->> (db/exec! conn [sql:retrieve-candidates-chunk interval])
(map (fn [{:keys [age] :as row}]
(assoc row :age (dt/duration {:seconds age})))))))
(def collect-media-xf
(comp
(map :objects)
(mapcat vals)
(filter #(= :image (:type %)))
(map :metadata)
(map :id)))
(defn- collect-used-media
[data]
(-> #{}
(into collect-media-xf (vals (:pages-index data)))
(into collect-media-xf (vals (:components data)))
(into (keys (:media data)))))
(defn- process-file
[conn {:keys [id data age] :as file}]
(let [data (-> (blob/decode data)
(assoc :id id)
(pmg/migrate-data))
used (collect-used-media data)
unused (->> (db/query conn :media-object {:file-id id})
(remove #(contains? used (:id %))))]
(log/infof "processing file: id='%s' age='%s' to-delete=%s" id age (count unused))
;; Mark file as trimmed
(db/update! conn :file
{:has-media-trimmed true}
{:id id})
(doseq [mobj unused]
(log/debugf "schduling object deletion: id='%s' path='%s' delay='%s'"
(:id mobj) (:path mobj) cfg/default-deletion-delay)
(tasks/submit! conn {:name "delete-object"
:delay cfg/default-deletion-delay
:props {:id id :type :media-object}})
;; Mark object as deleted
(db/update! conn :media-object
{:deleted-at (dt/now)}
{:id id}))
nil))
(defn handler
[_task]
(db/with-atomic [conn db/pool]
(loop []
(let [files (retrieve-candidates conn)]
(when (seq files)
(run! (partial process-file conn) files)
(recur))))))
(mtx/instrument-with-summary!
{:var #'handler
:id "tasks__file_media_gc"
:help "Timing of task: file_media_gc"})