mirror of
https://github.com/penpot/penpot.git
synced 2025-06-02 22:01:39 +02:00
✨ Add safety mechanism for direct object deletion
The main objective is prevent deletion of objects that can leave unreachable orphan objects which we are unable to correctly track. Additionally, this commit includes: 1. Properly implement safe cascade deletion of all participating tables on soft deletion in the objects-gc task; 2. Make the file thumbnail related tables also participate in the touch/refcount mechanism applyign to the same safety checks; 3. Add helper for db query lazy iteration using PostgreSQL support for server side cursors; 4. Fix efficiency issues on gc related task using server side cursors instead of custom chunked iteration for processing data. The problem resided when a large chunk of rows that has identical value on the deleted_at column and the chunk size is small (the default); when the custom chunked iteration only reads a first N items and skip the rest of the set to the next run. This has caused many objects to remain pending to be eliminated, taking up space for longer than expected. The server side cursor based iteration does not has this problem and iterates correctly over all objects. 5. Fix refcount issues on font variant deletion RPC methods
This commit is contained in:
parent
e6fb96c4c2
commit
addb392ecc
37 changed files with 1918 additions and 1026 deletions
|
@ -9,8 +9,6 @@
|
|||
(:require
|
||||
[app.common.data :as d]
|
||||
[app.common.data.macros :as dm]
|
||||
[app.common.exceptions :as ex]
|
||||
[app.common.logging :as l]
|
||||
[app.common.spec :as us]
|
||||
[app.common.uuid :as uuid]
|
||||
[app.db :as db]
|
||||
|
@ -228,225 +226,3 @@
|
|||
|
||||
(dm/export impl/resolve-backend)
|
||||
(dm/export impl/calculate-hash)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; Garbage Collection: Permanently delete objects
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; A task responsible to permanently delete already marked as deleted
|
||||
;; storage files. The storage objects are practically never marked to
|
||||
;; be deleted directly by the api call. The touched-gc is responsible
|
||||
;; of collecting the usage of the object and mark it as deleted. Only
|
||||
;; the TMP files are are created with expiration date in future.
|
||||
|
||||
(declare sql:retrieve-deleted-objects-chunk)
|
||||
|
||||
(defmethod ig/pre-init-spec ::gc-deleted-task [_]
|
||||
(s/keys :req [::storage ::db/pool]))
|
||||
|
||||
(defmethod ig/prep-key ::gc-deleted-task
|
||||
[_ cfg]
|
||||
(assoc cfg ::min-age (dt/duration {:hours 2})))
|
||||
|
||||
(defmethod ig/init-key ::gc-deleted-task
|
||||
[_ {:keys [::db/pool ::storage ::min-age]}]
|
||||
(letfn [(get-to-delete-chunk [cursor]
|
||||
(let [sql (str "select s.* "
|
||||
" from storage_object as s "
|
||||
" where s.deleted_at is not null "
|
||||
" and s.deleted_at < ? "
|
||||
" order by s.deleted_at desc "
|
||||
" limit 25")
|
||||
rows (db/exec! pool [sql cursor])]
|
||||
[(some-> rows peek :deleted-at)
|
||||
(some->> (seq rows) (d/group-by #(-> % :backend keyword) :id #{}) seq)]))
|
||||
|
||||
(get-to-delete-chunks [min-age]
|
||||
(d/iteration get-to-delete-chunk
|
||||
:initk (dt/minus (dt/now) min-age)
|
||||
:vf second
|
||||
:kf first))
|
||||
|
||||
(delete-in-bulk! [backend-id ids]
|
||||
(try
|
||||
(db/with-atomic [conn pool]
|
||||
(let [sql "delete from storage_object where id = ANY(?)"
|
||||
ids' (db/create-array conn "uuid" ids)
|
||||
|
||||
total (-> (db/exec-one! conn [sql ids'])
|
||||
(db/get-update-count))]
|
||||
|
||||
(-> (impl/resolve-backend storage backend-id)
|
||||
(impl/del-objects-in-bulk ids))
|
||||
|
||||
(doseq [id ids]
|
||||
(l/dbg :hint "gc-deleted: permanently delete storage object" :backend backend-id :id id))
|
||||
|
||||
total))
|
||||
|
||||
(catch Throwable cause
|
||||
(l/err :hint "gc-deleted: unexpected error on bulk deletion"
|
||||
:ids (vec ids)
|
||||
:cause cause)
|
||||
0)))]
|
||||
|
||||
(fn [params]
|
||||
(let [min-age (or (some-> params :min-age dt/duration) min-age)]
|
||||
(loop [total 0
|
||||
chunks (get-to-delete-chunks min-age)]
|
||||
(if-let [[backend-id ids] (first chunks)]
|
||||
(let [deleted (delete-in-bulk! backend-id ids)]
|
||||
(recur (+ total deleted)
|
||||
(rest chunks)))
|
||||
(do
|
||||
(l/inf :hint "gc-deleted: task finished"
|
||||
:min-age (dt/format-duration min-age)
|
||||
:total total)
|
||||
{:deleted total})))))))
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; Garbage Collection: Analyze touched objects
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; This task is part of the garbage collection process of storage
|
||||
;; objects and is responsible on analyzing the touched objects and
|
||||
;; mark them for deletion if corresponds.
|
||||
;;
|
||||
;; For example: when file_media_object is deleted, the depending
|
||||
;; storage_object are marked as touched. This means that some files
|
||||
;; that depend on a concrete storage_object are no longer exists and
|
||||
;; maybe this storage_object is no longer necessary and can be
|
||||
;; eligible for elimination. This task periodically analyzes touched
|
||||
;; objects and mark them as freeze (means that has other references
|
||||
;; and the object is still valid) or deleted (no more references to
|
||||
;; this object so is ready to be deleted).
|
||||
|
||||
(declare sql:retrieve-file-media-object-nrefs)
|
||||
(declare sql:retrieve-file-object-thumbnail-nrefs)
|
||||
(declare sql:retrieve-profile-nrefs)
|
||||
(declare sql:retrieve-team-font-variant-nrefs)
|
||||
(declare sql:retrieve-touched-objects-chunk)
|
||||
|
||||
(defmethod ig/pre-init-spec ::gc-touched-task [_]
|
||||
(s/keys :req [::db/pool]))
|
||||
|
||||
(defmethod ig/init-key ::gc-touched-task
|
||||
[_ {:keys [::db/pool]}]
|
||||
(letfn [(get-team-font-variant-nrefs [conn id]
|
||||
(-> (db/exec-one! conn [sql:retrieve-team-font-variant-nrefs id id id id]) :nrefs))
|
||||
|
||||
(get-file-media-object-nrefs [conn id]
|
||||
(-> (db/exec-one! conn [sql:retrieve-file-media-object-nrefs id id]) :nrefs))
|
||||
|
||||
(get-profile-nrefs [conn id]
|
||||
(-> (db/exec-one! conn [sql:retrieve-profile-nrefs id id]) :nrefs))
|
||||
|
||||
(get-file-object-thumbnails [conn id]
|
||||
(-> (db/exec-one! conn [sql:retrieve-file-object-thumbnail-nrefs id]) :nrefs))
|
||||
|
||||
(mark-freeze-in-bulk [conn ids]
|
||||
(db/exec-one! conn ["update storage_object set touched_at=null where id = ANY(?)"
|
||||
(db/create-array conn "uuid" ids)]))
|
||||
|
||||
(mark-delete-in-bulk [conn ids]
|
||||
(db/exec-one! conn ["update storage_object set deleted_at=now(), touched_at=null where id = ANY(?)"
|
||||
(db/create-array conn "uuid" ids)]))
|
||||
|
||||
;; NOTE: A getter that retrieves the key witch will be used
|
||||
;; for group ids; previously we have no value, then we
|
||||
;; introduced the `:reference` prop, and then it is renamed
|
||||
;; to `:bucket` and now is string instead. This is
|
||||
;; implemented in this way for backward comaptibilty.
|
||||
|
||||
;; NOTE: we use the "file-media-object" as default value for
|
||||
;; backward compatibility because when we deploy it we can
|
||||
;; have old backend instances running in the same time as
|
||||
;; the new one and we can still have storage-objects created
|
||||
;; without bucket value. And we know that if it does not
|
||||
;; have value, it means :file-media-object.
|
||||
|
||||
(get-bucket [{:keys [metadata]}]
|
||||
(or (some-> metadata :bucket)
|
||||
(some-> metadata :reference d/name)
|
||||
"file-media-object"))
|
||||
|
||||
(retrieve-touched-chunk [conn cursor]
|
||||
(let [rows (->> (db/exec! conn [sql:retrieve-touched-objects-chunk cursor])
|
||||
(mapv #(d/update-when % :metadata db/decode-transit-pgobject)))]
|
||||
(when (seq rows)
|
||||
[(-> rows peek :created-at)
|
||||
(d/group-by get-bucket :id #{} rows)])))
|
||||
|
||||
(retrieve-touched [conn]
|
||||
(d/iteration (partial retrieve-touched-chunk conn)
|
||||
:initk (dt/now)
|
||||
:vf second
|
||||
:kf first))
|
||||
|
||||
(process-objects! [conn get-fn ids bucket]
|
||||
(loop [to-freeze #{}
|
||||
to-delete #{}
|
||||
ids (seq ids)]
|
||||
(if-let [id (first ids)]
|
||||
(let [nrefs (get-fn conn id)]
|
||||
(if (pos? nrefs)
|
||||
(do
|
||||
(l/debug :hint "gc-touched: processing storage object"
|
||||
:id id :status "freeze"
|
||||
:bucket bucket :refs nrefs)
|
||||
(recur (conj to-freeze id) to-delete (rest ids)))
|
||||
(do
|
||||
(l/debug :hint "gc-touched: processing storage object"
|
||||
:id id :status "delete"
|
||||
:bucket bucket :refs nrefs)
|
||||
(recur to-freeze (conj to-delete id) (rest ids)))))
|
||||
(do
|
||||
(some->> (seq to-freeze) (mark-freeze-in-bulk conn))
|
||||
(some->> (seq to-delete) (mark-delete-in-bulk conn))
|
||||
[(count to-freeze) (count to-delete)]))))]
|
||||
|
||||
(fn [_]
|
||||
(db/with-atomic [conn pool]
|
||||
(loop [to-freeze 0
|
||||
to-delete 0
|
||||
groups (retrieve-touched conn)]
|
||||
(if-let [[bucket ids] (first groups)]
|
||||
(let [[f d] (case bucket
|
||||
"file-media-object" (process-objects! conn get-file-media-object-nrefs ids bucket)
|
||||
"team-font-variant" (process-objects! conn get-team-font-variant-nrefs ids bucket)
|
||||
"file-object-thumbnail" (process-objects! conn get-file-object-thumbnails ids bucket)
|
||||
"profile" (process-objects! conn get-profile-nrefs ids bucket)
|
||||
(ex/raise :type :internal
|
||||
:code :unexpected-unknown-reference
|
||||
:hint (dm/fmt "unknown reference %" bucket)))]
|
||||
(recur (+ to-freeze (long f))
|
||||
(+ to-delete (long d))
|
||||
(rest groups)))
|
||||
(do
|
||||
(l/info :hint "gc-touched: task finished" :to-freeze to-freeze :to-delete to-delete)
|
||||
{:freeze to-freeze :delete to-delete})))))))
|
||||
|
||||
(def sql:retrieve-touched-objects-chunk
|
||||
"SELECT so.*
|
||||
FROM storage_object AS so
|
||||
WHERE so.touched_at IS NOT NULL
|
||||
AND so.created_at < ?
|
||||
ORDER by so.created_at DESC
|
||||
LIMIT 500;")
|
||||
|
||||
(def sql:retrieve-file-media-object-nrefs
|
||||
"select ((select count(*) from file_media_object where media_id = ?) +
|
||||
(select count(*) from file_media_object where thumbnail_id = ?)) as nrefs")
|
||||
|
||||
(def sql:retrieve-file-object-thumbnail-nrefs
|
||||
"select (select count(*) from file_tagged_object_thumbnail where media_id = ?) as nrefs")
|
||||
|
||||
(def sql:retrieve-team-font-variant-nrefs
|
||||
"select ((select count(*) from team_font_variant where woff1_file_id = ?) +
|
||||
(select count(*) from team_font_variant where woff2_file_id = ?) +
|
||||
(select count(*) from team_font_variant where otf_file_id = ?) +
|
||||
(select count(*) from team_font_variant where ttf_file_id = ?)) as nrefs")
|
||||
|
||||
(def sql:retrieve-profile-nrefs
|
||||
"select ((select count(*) from profile where photo_id = ?) +
|
||||
(select count(*) from team where photo_id = ?)) as nrefs")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue