Add deduplication for file object thumbnails

This commit is contained in:
Andrey Antukh 2023-11-24 10:40:56 +01:00
parent 6d49e1cac5
commit d82ebdc034
7 changed files with 278 additions and 70 deletions

View file

@ -844,7 +844,7 @@
::l/sync? true))))))
(defmethod read-section :v1/sobjects
[{:keys [::sto/storage ::db/conn ::input ::overwrite?]}]
[{:keys [::sto/storage ::db/conn ::input ::overwrite? ::timestamp]}]
(let [storage (media/configure-assets-storage storage)
ids (read-obj! input)
thumb? (into #{} (map :media-id) (:thumbnails @*state*))]
@ -865,15 +865,14 @@
content (-> (sto/content resource size)
(sto/wrap-with-hash hash))
params (assoc mdata ::sto/content content)
params (-> mdata
(assoc ::sto/content content)
(assoc ::sto/deduplicate? true)
(assoc ::sto/touched-at timestamp))
params (if (thumb? id)
(-> params
(assoc ::sto/deduplicate? false)
(assoc :bucket "file-object-thumbnail"))
(-> params
(assoc ::sto/deduplicate? true)
(assoc ::sto/touched-at (dt/now))
(assoc :bucket "file-media-object")))
(assoc params :bucket "file-object-thumbnail")
(assoc params :bucket "file-media-object"))
sobject (sto/put-object! storage params)]
@ -893,7 +892,7 @@
(let [file-id (lookup-index (:file-id item))]
(if (= file-id (:file-id item))
(l/warn :hint "ignoring file media object" :file-id (str (:file-id item)) ::l/sync? true)
(l/warn :hint "ignoring file media object" :file-id (str file-id) ::l/sync? true)
(db/insert! conn :file-media-object
(-> item
(assoc :file-id file-id)
@ -908,8 +907,7 @@
:media-id (str (:media-id item))
:object-id (:object-id item)
::l/sync? true)
(db/insert! conn :file-tagged-object-thumbnail
(update item :media-id lookup-index)
(db/insert! conn :file-tagged-object-thumbnail item
{:on-conflict-do-nothing overwrite?})))))
(defn- lookup-index

View file

@ -249,7 +249,8 @@
(sto/wrap-with-hash hash))
media (sto/put-object! storage
{::sto/content data
::sto/deduplicate? false
::sto/deduplicate? true
::sto/touched-at (dt/now)
:content-type mtype
:bucket "file-object-thumbnail"})]
@ -292,7 +293,7 @@
:object-id object-id}
{::db/for-update? true})]
(sto/del-object! storage media-id)
(sto/touch-object! storage media-id)
(db/delete! conn :file-tagged-object-thumbnail
{:file-id file-id
:object-id object-id})

View file

@ -322,10 +322,11 @@
;; and the object is still valid) or deleted (no more references to
;; this object so is ready to be deleted).
(declare sql:retrieve-touched-objects-chunk)
(declare sql:retrieve-file-media-object-nrefs)
(declare sql:retrieve-team-font-variant-nrefs)
(declare sql:retrieve-file-object-thumbnail-nrefs)
(declare sql:retrieve-profile-nrefs)
(declare sql:retrieve-team-font-variant-nrefs)
(declare sql:retrieve-touched-objects-chunk)
(defmethod ig/pre-init-spec ::gc-touched-task [_]
(s/keys :req [::db/pool]))
@ -341,6 +342,9 @@
(get-profile-nrefs [conn id]
(-> (db/exec-one! conn [sql:retrieve-profile-nrefs id id]) :nrefs))
(get-file-object-thumbnails [conn id]
(-> (db/exec-one! conn [sql:retrieve-file-object-thumbnail-nrefs id]) :nrefs))
(mark-freeze-in-bulk [conn ids]
(db/exec-one! conn ["update storage_object set touched_at=null where id = ANY(?)"
(db/create-array conn "uuid" ids)]))
@ -410,9 +414,10 @@
groups (retrieve-touched conn)]
(if-let [[bucket ids] (first groups)]
(let [[f d] (case bucket
"file-media-object" (process-objects! conn get-file-media-object-nrefs ids bucket)
"team-font-variant" (process-objects! conn get-team-font-variant-nrefs ids bucket)
"profile" (process-objects! conn get-profile-nrefs ids bucket)
"file-media-object" (process-objects! conn get-file-media-object-nrefs ids bucket)
"team-font-variant" (process-objects! conn get-team-font-variant-nrefs ids bucket)
"file-object-thumbnail" (process-objects! conn get-file-object-thumbnails ids bucket)
"profile" (process-objects! conn get-profile-nrefs ids bucket)
(ex/raise :type :internal
:code :unexpected-unknown-reference
:hint (dm/fmt "unknown reference %" bucket)))]
@ -435,6 +440,9 @@
"select ((select count(*) from file_media_object where media_id = ?) +
(select count(*) from file_media_object where thumbnail_id = ?)) as nrefs")
(def sql:retrieve-file-object-thumbnail-nrefs
"select (select count(*) from file_tagged_object_thumbnail where media_id = ?) as nrefs")
(def sql:retrieve-team-font-variant-nrefs
"select ((select count(*) from team_font_variant where woff1_file_id = ?) +
(select count(*) from team_font_variant where woff2_file_id = ?) +

View file

@ -118,15 +118,15 @@
;; have fill-image attribute (which initially
;; designed for :path shapes).
(sequence
(keep :id)
(concat [(:fill-image obj)
(:metadata obj)]
(map :fill-image (:fills obj))
(map :stroke-image (:strokes obj))
(->> (:content obj)
(tree-seq map? :children)
(mapcat :fills)
(map :fill-image)))))))
(keep :id)
(concat [(:fill-image obj)
(:metadata obj)]
(map :fill-image (:fills obj))
(map :stroke-image (:strokes obj))
(->> (:content obj)
(tree-seq map? :children)
(mapcat :fills)
(map :fill-image)))))))
pages (concat
(vals (:pages-index data))
(vals (:components data)))]
@ -142,10 +142,10 @@
(remove #(contains? used (:id %))))]
(doseq [mobj unused]
(l/debug :hint "delete file media object"
:id (:id mobj)
:media-id (:media-id mobj)
:thumbnail-id (:thumbnail-id mobj))
(l/dbg :hint "delete file media object"
:id (:id mobj)
:media-id (:media-id mobj)
:thumbnail-id (:thumbnail-id mobj))
;; NOTE: deleting the file-media-object in the database
;; automatically marks as touched the referenced storage
@ -154,21 +154,23 @@
;; them.
(db/delete! conn :file-media-object {:id (:id mobj)}))))
(defn- clean-file-tagged-object-thumbnails!
(defn- clean-file-object-thumbnails!
[{:keys [::db/conn ::sto/storage]} file-id data]
(let [stored (->> (db/query conn :file_tagged_object_thumbnail
(let [stored (->> (db/query conn :file-tagged-object-thumbnail
{:file-id file-id}
{:columns [:object-id]})
(into #{} (map :object-id)))
using (into #{}
(mapcat
(fn [{:keys [id objects]}]
(->> (ctt/get-frames objects)
(mapcat
#(vector
(thc/fmt-object-id file-id id (:id %) "frame")
(thc/fmt-object-id file-id id (:id %) "component"))))))
(comp
(mapcat (fn [{:keys [id objects]}]
(->> (ctt/get-frames objects)
(map #(assoc % :page-id id)))))
(mapcat (fn [{:keys [id page-id]}]
(list
(thc/fmt-object-id file-id page-id id "frame")
(thc/fmt-object-id file-id page-id id "component")))))
(vals (:pages-index data)))
unused (set/difference stored using)]
@ -179,15 +181,15 @@
" returning media_id")
res (db/exec! conn [sql file-id (db/create-array conn "text" unused)])]
(l/dbg :hint "delete file object thumbnails"
:file-id (str file-id)
:total (count res))
(doseq [media-id (into #{} (keep :media-id) res)]
;; Mark as deleted the storage object related with the
;; photo-id field.
(l/trace :hint "mark storage object as deleted" :id media-id)
(sto/del-object! storage media-id))
(l/debug :hint "delete file object thumbnails"
:file-id file-id
:total (count res))))))
(l/trc :hint "touch file object thumbnail storage object" :id (str media-id))
(sto/touch-object! storage media-id))))))
(defn- clean-file-thumbnails!
[{:keys [::db/conn ::sto/storage]} file-id revn]
@ -197,15 +199,15 @@
res (db/exec! conn [sql file-id revn])]
(when (seq res)
(l/dbg :hint "delete file thumbnails"
:file-id (str file-id)
:total (count res))
(doseq [media-id (into #{} (keep :media-id) res)]
;; Mark as deleted the storage object related with the
;; media-id field.
(l/trace :hint "mark storage object as deleted" :id media-id)
(sto/del-object! storage media-id))
(l/debug :hint "delete file thumbnails"
:file-id file-id
:total (count res)))))
(l/trc :hint "delete file thumbnail storage object" :id (str media-id))
(sto/del-object! storage media-id)))))
(def ^:private
sql:get-files-for-library
@ -250,7 +252,7 @@
(mapv :id))]
(when (seq unused)
(l/debug :hint "clean deleted components" :total (count unused))
(l/dbg :hint "clean deleted components" :total (count unused))
(let [data (reduce ctkl/delete-component data unused)]
(db/update! conn :file
@ -283,12 +285,12 @@
rows (db/exec! conn [sql file-id used])]
(doseq [fragment-id (map :id rows)]
(l/trace :hint "remove unused file data fragment" :id (str fragment-id))
(l/trc :hint "remove unused file data fragment" :id (str fragment-id))
(db/delete! conn :file-data-fragment {:id fragment-id :file-id file-id})))))
(defn- process-file
[{:keys [::db/conn] :as cfg} {:keys [id data revn modified-at features] :as file}]
(l/debug :hint "processing file" :id id :modified-at modified-at)
(l/dbg :hint "processing file" :id id :modified-at modified-at)
(binding [pmap/*load-fn* (partial files/load-pointer conn id)
pmap/*tracked* (atom {})]
@ -297,7 +299,7 @@
(pmg/migrate-data))]
(clean-file-media! conn id data)
(clean-file-tagged-object-thumbnails! cfg id data)
(clean-file-object-thumbnails! cfg id data)
(clean-file-thumbnails! cfg id revn)
(clean-deleted-components! conn id data)