mirror of
https://github.com/penpot/penpot.git
synced 2025-07-15 07:46:13 +02:00
✨ Add safety mechanism for direct object deletion
The main objective is prevent deletion of objects that can leave unreachable orphan objects which we are unable to correctly track. Additionally, this commit includes: 1. Properly implement safe cascade deletion of all participating tables on soft deletion in the objects-gc task; 2. Make the file thumbnail related tables also participate in the touch/refcount mechanism applyign to the same safety checks; 3. Add helper for db query lazy iteration using PostgreSQL support for server side cursors; 4. Fix efficiency issues on gc related task using server side cursors instead of custom chunked iteration for processing data. The problem resided when a large chunk of rows that has identical value on the deleted_at column and the chunk size is small (the default); when the custom chunked iteration only reads a first N items and skip the rest of the set to the next run. This has caused many objects to remain pending to be eliminated, taking up space for longer than expected. The server side cursor based iteration does not has this problem and iterates correctly over all objects. 5. Fix refcount issues on font variant deletion RPC methods
This commit is contained in:
parent
e6fb96c4c2
commit
addb392ecc
37 changed files with 1918 additions and 1026 deletions
128
backend/src/app/storage/gc_deleted.clj
Normal file
128
backend/src/app/storage/gc_deleted.clj
Normal file
|
@ -0,0 +1,128 @@
|
|||
;; This Source Code Form is subject to the terms of the Mozilla Public
|
||||
;; License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
;; file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
;;
|
||||
;; Copyright (c) KALEIDOS INC
|
||||
|
||||
(ns app.storage.gc-deleted
|
||||
"A task responsible to permanently delete already marked as deleted
|
||||
storage files. The storage objects are practically never marked to
|
||||
be deleted directly by the api call.
|
||||
|
||||
The touched-gc is responsible of collecting the usage of the object
|
||||
and mark it as deleted. Only the TMP files are are created with
|
||||
expiration date in future."
|
||||
(:require
|
||||
[app.common.data :as d]
|
||||
[app.common.logging :as l]
|
||||
[app.db :as db]
|
||||
[app.storage :as-alias sto]
|
||||
[app.storage.impl :as impl]
|
||||
[app.util.time :as dt]
|
||||
[clojure.spec.alpha :as s]
|
||||
[integrant.core :as ig]))
|
||||
|
||||
(def ^:private sql:lock-sobjects
|
||||
"SELECT id FROM storage_object
|
||||
WHERE id = ANY(?::uuid[])
|
||||
FOR UPDATE
|
||||
SKIP LOCKED")
|
||||
|
||||
(defn- lock-ids
|
||||
"Perform a select before delete for proper object locking and
|
||||
prevent concurrent operations and we proceed only with successfully
|
||||
locked objects."
|
||||
[conn ids]
|
||||
(let [ids (db/create-array conn "uuid" ids)]
|
||||
(->> (db/exec! conn [sql:lock-sobjects ids])
|
||||
(into #{} (map :id))
|
||||
(not-empty))))
|
||||
|
||||
|
||||
(def ^:private sql:delete-sobjects
|
||||
"DELETE FROM storage_object
|
||||
WHERE id = ANY(?::uuid[])")
|
||||
|
||||
(defn- delete-sobjects!
|
||||
[conn ids]
|
||||
(let [ids (db/create-array conn "uuid" ids)]
|
||||
(-> (db/exec-one! conn [sql:delete-sobjects ids])
|
||||
(db/get-update-count))))
|
||||
|
||||
|
||||
(defn- delete-in-bulk!
|
||||
[cfg backend-id ids]
|
||||
;; We run the deletion on a separate transaction. This is
|
||||
;; because if some exception is raised inside procesing
|
||||
;; one chunk, it does not affects the rest of the chunks.
|
||||
(try
|
||||
(db/tx-run! cfg
|
||||
(fn [{:keys [::db/conn ::sto/storage]}]
|
||||
(when-let [ids (lock-ids conn ids)]
|
||||
(let [total (delete-sobjects! conn ids)]
|
||||
|
||||
(-> (impl/resolve-backend storage backend-id)
|
||||
(impl/del-objects-in-bulk ids))
|
||||
|
||||
(doseq [id ids]
|
||||
(l/dbg :hint "permanently delete storage object"
|
||||
:id (str id)
|
||||
:backend (name backend-id)))
|
||||
|
||||
total))))
|
||||
(catch Throwable cause
|
||||
(l/err :hint "unexpected error on bulk deletion"
|
||||
:ids ids
|
||||
:cause cause))))
|
||||
|
||||
|
||||
(defn- group-by-backend
|
||||
[items]
|
||||
(d/group-by (comp keyword :backend) :id #{} items))
|
||||
|
||||
(def ^:private sql:get-deleted-sobjects
|
||||
"SELECT s.* FROM storage_object AS s
|
||||
WHERE s.deleted_at IS NOT NULL
|
||||
AND s.deleted_at < now() - ?::interval
|
||||
ORDER BY s.deleted_at ASC")
|
||||
|
||||
(defn- get-buckets
|
||||
[conn min-age]
|
||||
(let [age (db/interval min-age)]
|
||||
(sequence
|
||||
(comp (partition-all 25)
|
||||
(mapcat group-by-backend))
|
||||
(db/cursor conn [sql:get-deleted-sobjects age]))))
|
||||
|
||||
|
||||
(defn- clean-deleted!
|
||||
[{:keys [::db/conn ::min-age] :as cfg}]
|
||||
(reduce (fn [total [backend-id ids]]
|
||||
(let [deleted (delete-in-bulk! cfg backend-id ids)]
|
||||
(+ total (or deleted 0))))
|
||||
0
|
||||
(get-buckets conn min-age)))
|
||||
|
||||
|
||||
(defmethod ig/pre-init-spec ::handler [_]
|
||||
(s/keys :req [::sto/storage ::db/pool]))
|
||||
|
||||
(defmethod ig/prep-key ::handler
|
||||
[_ cfg]
|
||||
(assoc cfg ::min-age (dt/duration {:hours 2})))
|
||||
|
||||
(defmethod ig/init-key ::handler
|
||||
[_ {:keys [::min-age] :as cfg}]
|
||||
(fn [params]
|
||||
(let [min-age (dt/duration (or (:min-age params) min-age))]
|
||||
(db/tx-run! cfg (fn [cfg]
|
||||
(let [cfg (assoc cfg ::min-age min-age)
|
||||
total (clean-deleted! cfg)]
|
||||
|
||||
(l/inf :hint "task finished"
|
||||
:min-age (dt/format-duration min-age)
|
||||
:total total)
|
||||
|
||||
{:deleted total}))))))
|
||||
|
||||
|
208
backend/src/app/storage/gc_touched.clj
Normal file
208
backend/src/app/storage/gc_touched.clj
Normal file
|
@ -0,0 +1,208 @@
|
|||
;; This Source Code Form is subject to the terms of the Mozilla Public
|
||||
;; License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
;; file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
;;
|
||||
;; Copyright (c) KALEIDOS INC
|
||||
|
||||
(ns app.storage.gc-touched
|
||||
"This task is part of the garbage collection process of storage
|
||||
objects and is responsible on analyzing the touched objects and mark
|
||||
them for deletion if corresponds.
|
||||
|
||||
For example: when file_media_object is deleted, the depending
|
||||
storage_object are marked as touched. This means that some files
|
||||
that depend on a concrete storage_object are no longer exists and
|
||||
maybe this storage_object is no longer necessary and can be eligible
|
||||
for elimination. This task periodically analyzes touched objects and
|
||||
mark them as freeze (means that has other references and the object
|
||||
is still valid) or deleted (no more references to this object so is
|
||||
ready to be deleted)."
|
||||
(:require
|
||||
[app.common.data :as d]
|
||||
[app.common.data.macros :as dm]
|
||||
[app.common.exceptions :as ex]
|
||||
[app.common.logging :as l]
|
||||
[app.db :as db]
|
||||
[app.storage :as-alias sto]
|
||||
[app.storage.impl :as impl]
|
||||
[clojure.spec.alpha :as s]
|
||||
[integrant.core :as ig]))
|
||||
|
||||
(def ^:private sql:get-team-font-variant-nrefs
|
||||
"SELECT ((SELECT count(*) FROM team_font_variant WHERE woff1_file_id = ?) +
|
||||
(SELECT count(*) FROM team_font_variant WHERE woff2_file_id = ?) +
|
||||
(SELECT count(*) FROM team_font_variant WHERE otf_file_id = ?) +
|
||||
(SELECT count(*) FROM team_font_variant WHERE ttf_file_id = ?)) AS nrefs")
|
||||
|
||||
(defn- get-team-font-variant-nrefs
|
||||
[conn id]
|
||||
(-> (db/exec-one! conn [sql:get-team-font-variant-nrefs id id id id])
|
||||
(get :nrefs)))
|
||||
|
||||
|
||||
(def ^:private
|
||||
sql:get-file-media-object-nrefs
|
||||
"SELECT ((SELECT count(*) FROM file_media_object WHERE media_id = ?) +
|
||||
(SELECT count(*) FROM file_media_object WHERE thumbnail_id = ?)) AS nrefs")
|
||||
|
||||
(defn- get-file-media-object-nrefs
|
||||
[conn id]
|
||||
(-> (db/exec-one! conn [sql:get-file-media-object-nrefs id id])
|
||||
(get :nrefs)))
|
||||
|
||||
|
||||
(def ^:private sql:get-profile-nrefs
|
||||
"SELECT ((SELECT count(*) FROM profile WHERE photo_id = ?) +
|
||||
(SELECT count(*) FROM team WHERE photo_id = ?)) AS nrefs")
|
||||
|
||||
(defn- get-profile-nrefs
|
||||
[conn id]
|
||||
(-> (db/exec-one! conn [sql:get-profile-nrefs id id])
|
||||
(get :nrefs)))
|
||||
|
||||
|
||||
(def ^:private
|
||||
sql:get-file-object-thumbnail-nrefs
|
||||
"SELECT (SELECT count(*) FROM file_tagged_object_thumbnail WHERE media_id = ?) AS nrefs")
|
||||
|
||||
(defn- get-file-object-thumbnails
|
||||
[conn id]
|
||||
(-> (db/exec-one! conn [sql:get-file-object-thumbnail-nrefs id])
|
||||
(get :nrefs)))
|
||||
|
||||
|
||||
(def ^:private
|
||||
sql:get-file-thumbnail-nrefs
|
||||
"SELECT (SELECT count(*) FROM file_thumbnail WHERE media_id = ?) AS nrefs")
|
||||
|
||||
(defn- get-file-thumbnails
|
||||
[conn id]
|
||||
(-> (db/exec-one! conn [sql:get-file-thumbnail-nrefs id])
|
||||
(get :nrefs)))
|
||||
|
||||
|
||||
(def ^:private sql:mark-freeze-in-bulk
|
||||
"UPDATE storage_object
|
||||
SET touched_at = NULL
|
||||
WHERE id = ANY(?::uuid[])")
|
||||
|
||||
(defn- mark-freeze-in-bulk!
|
||||
[conn ids]
|
||||
(let [ids (db/create-array conn "uuid" ids)]
|
||||
(db/exec-one! conn [sql:mark-freeze-in-bulk ids])))
|
||||
|
||||
|
||||
(def ^:private sql:mark-delete-in-bulk
|
||||
"UPDATE storage_object
|
||||
SET deleted_at = now(),
|
||||
touched_at = NULL
|
||||
WHERE id = ANY(?::uuid[])")
|
||||
|
||||
(defn- mark-delete-in-bulk!
|
||||
[conn ids]
|
||||
(let [ids (db/create-array conn "uuid" ids)]
|
||||
(db/exec-one! conn [sql:mark-delete-in-bulk ids])))
|
||||
|
||||
;; NOTE: A getter that retrieves the key which will be used for group
|
||||
;; ids; previously we have no value, then we introduced the
|
||||
;; `:reference` prop, and then it is renamed to `:bucket` and now is
|
||||
;; string instead. This is implemented in this way for backward
|
||||
;; comaptibilty.
|
||||
|
||||
;; NOTE: we use the "file-media-object" as default value for
|
||||
;; backward compatibility because when we deploy it we can
|
||||
;; have old backend instances running in the same time as
|
||||
;; the new one and we can still have storage-objects created
|
||||
;; without bucket value. And we know that if it does not
|
||||
;; have value, it means :file-media-object.
|
||||
|
||||
(defn- lookup-bucket
|
||||
[{:keys [metadata]}]
|
||||
(or (some-> metadata :bucket)
|
||||
(some-> metadata :reference d/name)
|
||||
"file-media-object"))
|
||||
|
||||
(defn- process-objects!
|
||||
[conn get-fn ids bucket]
|
||||
(loop [to-freeze #{}
|
||||
to-delete #{}
|
||||
ids (seq ids)]
|
||||
(if-let [id (first ids)]
|
||||
(let [nrefs (get-fn conn id)]
|
||||
(if (pos? nrefs)
|
||||
(do
|
||||
(l/debug :hint "processing object"
|
||||
:id (str id)
|
||||
:status "freeze"
|
||||
:bucket bucket :refs nrefs)
|
||||
(recur (conj to-freeze id) to-delete (rest ids)))
|
||||
(do
|
||||
(l/debug :hint "processing object"
|
||||
:id (str id)
|
||||
:status "delete"
|
||||
:bucket bucket :refs nrefs)
|
||||
(recur to-freeze (conj to-delete id) (rest ids)))))
|
||||
(do
|
||||
(some->> (seq to-freeze) (mark-freeze-in-bulk! conn))
|
||||
(some->> (seq to-delete) (mark-delete-in-bulk! conn))
|
||||
[(count to-freeze) (count to-delete)]))))
|
||||
|
||||
(defn- process-bucket!
|
||||
[conn bucket ids]
|
||||
(case bucket
|
||||
"file-media-object" (process-objects! conn get-file-media-object-nrefs ids bucket)
|
||||
"team-font-variant" (process-objects! conn get-team-font-variant-nrefs ids bucket)
|
||||
"file-object-thumbnail" (process-objects! conn get-file-object-thumbnails ids bucket)
|
||||
"file-thumbnail" (process-objects! conn get-file-thumbnails ids bucket)
|
||||
"profile" (process-objects! conn get-profile-nrefs ids bucket)
|
||||
(ex/raise :type :internal
|
||||
:code :unexpected-unknown-reference
|
||||
:hint (dm/fmt "unknown reference %" bucket))))
|
||||
|
||||
|
||||
(def ^:private
|
||||
sql:get-touched-storage-objects
|
||||
"SELECT so.*
|
||||
FROM storage_object AS so
|
||||
WHERE so.touched_at IS NOT NULL
|
||||
ORDER BY touched_at ASC
|
||||
FOR UPDATE
|
||||
SKIP LOCKED")
|
||||
|
||||
(defn- group-by-bucket
|
||||
[row]
|
||||
(d/group-by lookup-bucket :id #{} row))
|
||||
|
||||
(defn- get-buckets
|
||||
[conn]
|
||||
(sequence
|
||||
(comp (map impl/decode-row)
|
||||
(partition-all 25)
|
||||
(mapcat group-by-bucket))
|
||||
(db/cursor conn sql:get-touched-storage-objects)))
|
||||
|
||||
(defn- process-touched!
|
||||
[{:keys [::db/conn]}]
|
||||
(loop [buckets (get-buckets conn)
|
||||
freezed 0
|
||||
deleted 0]
|
||||
(if-let [[bucket ids] (first buckets)]
|
||||
(let [[nfo ndo] (process-bucket! conn bucket ids)]
|
||||
(recur (rest buckets)
|
||||
(+ freezed nfo)
|
||||
(+ deleted ndo)))
|
||||
(do
|
||||
(l/inf :hint "task finished"
|
||||
:to-freeze freezed
|
||||
:to-delete deleted)
|
||||
|
||||
{:freeze freezed :delete deleted}))))
|
||||
|
||||
(defmethod ig/pre-init-spec ::handler [_]
|
||||
(s/keys :req [::db/pool]))
|
||||
|
||||
(defmethod ig/init-key ::handler
|
||||
[_ cfg]
|
||||
(fn [_]
|
||||
(db/tx-run! cfg process-touched!)))
|
||||
|
|
@ -9,7 +9,7 @@
|
|||
(:require
|
||||
[app.common.data.macros :as dm]
|
||||
[app.common.exceptions :as ex]
|
||||
[app.db :as-alias db]
|
||||
[app.db :as db]
|
||||
[app.storage :as-alias sto]
|
||||
[buddy.core.codecs :as bc]
|
||||
[buddy.core.hash :as bh]
|
||||
|
@ -22,6 +22,13 @@
|
|||
java.nio.file.Path
|
||||
java.util.UUID))
|
||||
|
||||
(defn decode-row
|
||||
"Decode the storage-object row fields"
|
||||
[{:keys [metadata] :as row}]
|
||||
(cond-> row
|
||||
(some? metadata)
|
||||
(assoc :metadata (db/decode-transit-pgobject metadata))))
|
||||
|
||||
;; --- API Definition
|
||||
|
||||
(defmulti put-object (fn [cfg _ _] (::sto/type cfg)))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue