mirror of
https://github.com/penpot/penpot.git
synced 2025-05-07 15:05:59 +02:00
✨ Improve worker logging
This commit is contained in:
parent
1e8226a3fc
commit
a23d1908e9
1 changed files with 79 additions and 75 deletions
|
@ -87,10 +87,10 @@
|
||||||
|
|
||||||
(defmethod ig/init-key ::registry
|
(defmethod ig/init-key ::registry
|
||||||
[_ {:keys [::mtx/metrics ::tasks]}]
|
[_ {:keys [::mtx/metrics ::tasks]}]
|
||||||
(l/info :hint "registry initialized" :tasks (count tasks))
|
(l/inf :hint "registry initialized" :tasks (count tasks))
|
||||||
(reduce-kv (fn [registry k v]
|
(reduce-kv (fn [registry k v]
|
||||||
(let [tname (name k)]
|
(let [tname (name k)]
|
||||||
(l/trace :hint "register task" :name tname)
|
(l/trc :hint "register task" :name tname)
|
||||||
(assoc registry tname (wrap-task-handler metrics tname v))))
|
(assoc registry tname (wrap-task-handler metrics tname v))))
|
||||||
{}
|
{}
|
||||||
tasks))
|
tasks))
|
||||||
|
@ -141,18 +141,18 @@
|
||||||
|
|
||||||
(px/thread
|
(px/thread
|
||||||
{:name "penpot/executors-monitor" :virtual true}
|
{:name "penpot/executors-monitor" :virtual true}
|
||||||
(l/info :hint "monitor: started" :name name)
|
(l/inf :hint "monitor: started" :name name)
|
||||||
(try
|
(try
|
||||||
(loop [steals 0]
|
(loop [steals 0]
|
||||||
(when-not (px/shutdown? executor)
|
(when-not (px/shutdown? executor)
|
||||||
(px/sleep interval)
|
(px/sleep interval)
|
||||||
(recur (long (monitor! executor steals)))))
|
(recur (long (monitor! executor steals)))))
|
||||||
(catch InterruptedException _cause
|
(catch InterruptedException _cause
|
||||||
(l/debug :hint "monitor: interrupted" :name name))
|
(l/trc :hint "monitor: interrupted" :name name))
|
||||||
(catch Throwable cause
|
(catch Throwable cause
|
||||||
(l/error :hint "monitor: unexpected error" :name name :cause cause))
|
(l/err :hint "monitor: unexpected error" :name name :cause cause))
|
||||||
(finally
|
(finally
|
||||||
(l/info :hint "monitor: terminated" :name name))))))
|
(l/inf :hint "monitor: terminated" :name name))))))
|
||||||
|
|
||||||
(defmethod ig/halt-key! ::monitor
|
(defmethod ig/halt-key! ::monitor
|
||||||
[_ thread]
|
[_ thread]
|
||||||
|
@ -207,7 +207,7 @@
|
||||||
(db/create-array conn "uuid" ids)]]
|
(db/create-array conn "uuid" ids)]]
|
||||||
|
|
||||||
(db/exec-one! conn sql)
|
(db/exec-one! conn sql)
|
||||||
(l/dbg :hist "dispatcher: queue tasks"
|
(l/trc :hist "dispatcher: queue tasks"
|
||||||
:queue queue
|
:queue queue
|
||||||
:tasks (count ids)
|
:tasks (count ids)
|
||||||
:queued res)))
|
:queued res)))
|
||||||
|
@ -225,35 +225,35 @@
|
||||||
(cond
|
(cond
|
||||||
(rds/exception? cause)
|
(rds/exception? cause)
|
||||||
(do
|
(do
|
||||||
(l/warn :hint "dispatcher: redis exception (will retry in an instant)" :cause cause)
|
(l/wrn :hint "dispatcher: redis exception (will retry in an instant)" :cause cause)
|
||||||
(px/sleep (::rds/timeout rconn)))
|
(px/sleep (::rds/timeout rconn)))
|
||||||
|
|
||||||
(db/sql-exception? cause)
|
(db/sql-exception? cause)
|
||||||
(do
|
(do
|
||||||
(l/warn :hint "dispatcher: database exception (will retry in an instant)" :cause cause)
|
(l/wrn :hint "dispatcher: database exception (will retry in an instant)" :cause cause)
|
||||||
(px/sleep (::rds/timeout rconn)))
|
(px/sleep (::rds/timeout rconn)))
|
||||||
|
|
||||||
:else
|
:else
|
||||||
(do
|
(do
|
||||||
(l/error :hint "dispatcher: unhandled exception (will retry in an instant)" :cause cause)
|
(l/err :hint "dispatcher: unhandled exception (will retry in an instant)" :cause cause)
|
||||||
(px/sleep (::rds/timeout rconn)))))))
|
(px/sleep (::rds/timeout rconn)))))))
|
||||||
|
|
||||||
(dispatcher []
|
(dispatcher []
|
||||||
(l/info :hint "dispatcher: started")
|
(l/inf :hint "dispatcher: started")
|
||||||
(try
|
(try
|
||||||
(dm/with-open [rconn (rds/connect redis)]
|
(dm/with-open [rconn (rds/connect redis)]
|
||||||
(loop []
|
(loop []
|
||||||
(run-batch! rconn)
|
(run-batch! rconn)
|
||||||
(recur)))
|
(recur)))
|
||||||
(catch InterruptedException _
|
(catch InterruptedException _
|
||||||
(l/trace :hint "dispatcher: interrupted"))
|
(l/trc :hint "dispatcher: interrupted"))
|
||||||
(catch Throwable cause
|
(catch Throwable cause
|
||||||
(l/error :hint "dispatcher: unexpected exception" :cause cause))
|
(l/err :hint "dispatcher: unexpected exception" :cause cause))
|
||||||
(finally
|
(finally
|
||||||
(l/info :hint "dispatcher: terminated"))))]
|
(l/inf :hint "dispatcher: terminated"))))]
|
||||||
|
|
||||||
(if (db/read-only? pool)
|
(if (db/read-only? pool)
|
||||||
(l/warn :hint "dispatcher: not started (db is read-only)")
|
(l/wrn :hint "dispatcher: not started (db is read-only)")
|
||||||
(px/fn->thread dispatcher :name "penpot/worker/dispatcher" :virtual true))))
|
(px/fn->thread dispatcher :name "penpot/worker/dispatcher" :virtual true))))
|
||||||
|
|
||||||
(defmethod ig/halt-key! ::dispatcher
|
(defmethod ig/halt-key! ::dispatcher
|
||||||
|
@ -286,7 +286,7 @@
|
||||||
(let [queue (d/name queue)
|
(let [queue (d/name queue)
|
||||||
cfg (assoc cfg ::queue queue)]
|
cfg (assoc cfg ::queue queue)]
|
||||||
(if (db/read-only? pool)
|
(if (db/read-only? pool)
|
||||||
(l/warn :hint "worker: not started (db is read-only)" :queue queue :parallelism parallelism)
|
(l/wrn :hint "worker: not started (db is read-only)" :queue queue :parallelism parallelism)
|
||||||
(doall
|
(doall
|
||||||
(->> (range parallelism)
|
(->> (range parallelism)
|
||||||
(map #(assoc cfg ::worker-id %))
|
(map #(assoc cfg ::worker-id %))
|
||||||
|
@ -300,7 +300,7 @@
|
||||||
[{:keys [::rds/redis ::worker-id ::queue] :as cfg}]
|
[{:keys [::rds/redis ::worker-id ::queue] :as cfg}]
|
||||||
(px/thread
|
(px/thread
|
||||||
{:name (format "penpot/worker/runner:%s" worker-id)}
|
{:name (format "penpot/worker/runner:%s" worker-id)}
|
||||||
(l/info :hint "worker: started" :worker-id worker-id :queue queue)
|
(l/inf :hint "worker: started" :worker-id worker-id :queue queue)
|
||||||
(try
|
(try
|
||||||
(dm/with-open [rconn (rds/connect redis)]
|
(dm/with-open [rconn (rds/connect redis)]
|
||||||
(let [tenant (cf/get :tenant "main")
|
(let [tenant (cf/get :tenant "main")
|
||||||
|
@ -320,14 +320,14 @@
|
||||||
:worker-id worker-id
|
:worker-id worker-id
|
||||||
:queue queue))
|
:queue queue))
|
||||||
(catch Throwable cause
|
(catch Throwable cause
|
||||||
(l/error :hint "worker: unexpected exception"
|
(l/err :hint "worker: unexpected exception"
|
||||||
:worker-id worker-id
|
:worker-id worker-id
|
||||||
:queue queue
|
:queue queue
|
||||||
:cause cause))
|
:cause cause))
|
||||||
(finally
|
(finally
|
||||||
(l/info :hint "worker: terminated"
|
(l/inf :hint "worker: terminated"
|
||||||
:worker-id worker-id
|
:worker-id worker-id
|
||||||
:queue queue)))))
|
:queue queue)))))
|
||||||
|
|
||||||
(defn- run-worker-loop!
|
(defn- run-worker-loop!
|
||||||
[{:keys [::db/pool ::rds/rconn ::timeout ::queue ::registry ::worker-id]}]
|
[{:keys [::db/pool ::rds/rconn ::timeout ::queue ::registry ::worker-id]}]
|
||||||
|
@ -368,19 +368,19 @@
|
||||||
(let [task-id (t/decode payload)]
|
(let [task-id (t/decode payload)]
|
||||||
(if (uuid? task-id)
|
(if (uuid? task-id)
|
||||||
task-id
|
task-id
|
||||||
(l/error :hint "worker: received unexpected payload (uuid expected)"
|
(l/err :hint "worker: received unexpected payload (uuid expected)"
|
||||||
:payload task-id)))
|
:payload task-id)))
|
||||||
(catch Throwable cause
|
(catch Throwable cause
|
||||||
(l/error :hint "worker: unable to decode payload"
|
(l/err :hint "worker: unable to decode payload"
|
||||||
:payload payload
|
:payload payload
|
||||||
:length (alength payload)
|
:length (alength payload)
|
||||||
:cause cause))))
|
:cause cause))))
|
||||||
|
|
||||||
(handle-task [{:keys [name] :as task}]
|
(handle-task [{:keys [name] :as task}]
|
||||||
(let [task-fn (get registry name)]
|
(let [task-fn (get registry name)]
|
||||||
(if task-fn
|
(if task-fn
|
||||||
(task-fn task)
|
(task-fn task)
|
||||||
(l/warn :hint "no task handler found" :name name))
|
(l/wrn :hint "no task handler found" :name name))
|
||||||
{:status :completed :task task}))
|
{:status :completed :task task}))
|
||||||
|
|
||||||
(handle-task-exception [cause task]
|
(handle-task-exception [cause task]
|
||||||
|
@ -395,9 +395,9 @@
|
||||||
(= ::noop (:strategy edata))
|
(= ::noop (:strategy edata))
|
||||||
(assoc :inc-by 0))
|
(assoc :inc-by 0))
|
||||||
(do
|
(do
|
||||||
(l/error :hint "worker: unhandled exception on task"
|
(l/err :hint "worker: unhandled exception on task"
|
||||||
::l/context (get-error-context cause task)
|
::l/context (get-error-context cause task)
|
||||||
:cause cause)
|
:cause cause)
|
||||||
(if (>= (:retry-num task) (:max-retries task))
|
(if (>= (:retry-num task) (:max-retries task))
|
||||||
{:status :failed :task task :error cause}
|
{:status :failed :task task :error cause}
|
||||||
{:status :retry :task task :error cause})))))
|
{:status :retry :task task :error cause})))))
|
||||||
|
@ -414,26 +414,26 @@
|
||||||
(if (or (db/connection-error? task)
|
(if (or (db/connection-error? task)
|
||||||
(db/serialization-error? task))
|
(db/serialization-error? task))
|
||||||
(do
|
(do
|
||||||
(l/warn :hint "worker: connection error on retrieving task from database (retrying in some instants)"
|
(l/wrn :hint "worker: connection error on retrieving task from database (retrying in some instants)"
|
||||||
:worker-id worker-id
|
:worker-id worker-id
|
||||||
:cause task)
|
:cause task)
|
||||||
(px/sleep (::rds/timeout rconn))
|
(px/sleep (::rds/timeout rconn))
|
||||||
(recur (get-task task-id)))
|
(recur (get-task task-id)))
|
||||||
(do
|
(do
|
||||||
(l/error :hint "worker: unhandled exception on retrieving task from database (retrying in some instants)"
|
(l/err :hint "worker: unhandled exception on retrieving task from database (retrying in some instants)"
|
||||||
:worker-id worker-id
|
:worker-id worker-id
|
||||||
:cause task)
|
:cause task)
|
||||||
(px/sleep (::rds/timeout rconn))
|
(px/sleep (::rds/timeout rconn))
|
||||||
(recur (get-task task-id))))
|
(recur (get-task task-id))))
|
||||||
|
|
||||||
(nil? task)
|
(nil? task)
|
||||||
(l/warn :hint "worker: no task found on the database"
|
(l/wrn :hint "worker: no task found on the database"
|
||||||
:worker-id worker-id
|
:worker-id worker-id
|
||||||
:task-id task-id)
|
:task-id task-id)
|
||||||
|
|
||||||
:else
|
:else
|
||||||
(try
|
(try
|
||||||
(l/dbg :hint "worker: executing task"
|
(l/trc :hint "executing task"
|
||||||
:name (:name task)
|
:name (:name task)
|
||||||
:id (str (:id task))
|
:id (str (:id task))
|
||||||
:queue queue
|
:queue queue
|
||||||
|
@ -459,13 +459,13 @@
|
||||||
(if (or (db/connection-error? cause)
|
(if (or (db/connection-error? cause)
|
||||||
(db/serialization-error? cause))
|
(db/serialization-error? cause))
|
||||||
(do
|
(do
|
||||||
(l/warn :hint "worker: database exeption on processing task result (retrying in some instants)"
|
(l/wrn :hint "worker: database exeption on processing task result (retrying in some instants)"
|
||||||
:cause cause)
|
:cause cause)
|
||||||
(px/sleep (::rds/timeout rconn))
|
(px/sleep (::rds/timeout rconn))
|
||||||
(recur result))
|
(recur result))
|
||||||
(do
|
(do
|
||||||
(l/error :hint "worker: unhandled exception on processing task result (retrying in some instants)"
|
(l/err :hint "worker: unhandled exception on processing task result (retrying in some instants)"
|
||||||
:cause cause)
|
:cause cause)
|
||||||
(px/sleep (::rds/timeout rconn))
|
(px/sleep (::rds/timeout rconn))
|
||||||
(recur result))))))]
|
(recur result))))))]
|
||||||
|
|
||||||
|
@ -481,12 +481,12 @@
|
||||||
(catch Exception cause
|
(catch Exception cause
|
||||||
(if (rds/timeout-exception? cause)
|
(if (rds/timeout-exception? cause)
|
||||||
(do
|
(do
|
||||||
(l/error :hint "worker: redis pop operation timeout, consider increasing redis timeout (will retry in some instants)"
|
(l/err :hint "worker: redis pop operation timeout, consider increasing redis timeout (will retry in some instants)"
|
||||||
:timeout timeout
|
:timeout timeout
|
||||||
:cause cause)
|
:cause cause)
|
||||||
(px/sleep timeout))
|
(px/sleep timeout))
|
||||||
|
|
||||||
(l/error :hint "worker: unhandled exception" :cause cause))))))
|
(l/err :hint "worker: unhandled exception" :cause cause))))))
|
||||||
|
|
||||||
(defn- get-error-context
|
(defn- get-error-context
|
||||||
[_ item]
|
[_ item]
|
||||||
|
@ -517,7 +517,7 @@
|
||||||
(defmethod ig/init-key ::cron
|
(defmethod ig/init-key ::cron
|
||||||
[_ {:keys [::entries ::registry ::db/pool] :as cfg}]
|
[_ {:keys [::entries ::registry ::db/pool] :as cfg}]
|
||||||
(if (db/read-only? pool)
|
(if (db/read-only? pool)
|
||||||
(l/warn :hint "cron: not started (db is read-only)")
|
(l/wrn :hint "cron: not started (db is read-only)")
|
||||||
(let [running (atom #{})
|
(let [running (atom #{})
|
||||||
entries (->> entries
|
entries (->> entries
|
||||||
(filter some?)
|
(filter some?)
|
||||||
|
@ -540,22 +540,22 @@
|
||||||
|
|
||||||
cfg (assoc cfg ::entries entries ::running running)]
|
cfg (assoc cfg ::entries entries ::running running)]
|
||||||
|
|
||||||
(l/info :hint "cron: started" :tasks (count entries))
|
(l/inf :hint "cron: started" :tasks (count entries))
|
||||||
(synchronize-cron-entries! cfg)
|
(synchronize-cron-entries! cfg)
|
||||||
|
|
||||||
(->> (filter some? entries)
|
(->> (filter some? entries)
|
||||||
(run! (partial schedule-cron-task cfg)))
|
(run! (partial schedule-cron-task cfg)))
|
||||||
|
|
||||||
(reify
|
(reify
|
||||||
clojure.lang.IDeref
|
clojure.lang.IDeref
|
||||||
(deref [_] @running)
|
(deref [_] @running)
|
||||||
|
|
||||||
java.lang.AutoCloseable
|
java.lang.AutoCloseable
|
||||||
(close [_]
|
(close [_]
|
||||||
(l/info :hint "cron: terminated")
|
(l/inf :hint "cron: terminated")
|
||||||
(doseq [item @running]
|
(doseq [item @running]
|
||||||
(when-not (.isDone ^Future item)
|
(when-not (.isDone ^Future item)
|
||||||
(.cancel ^Future item true))))))))
|
(.cancel ^Future item true))))))))
|
||||||
|
|
||||||
(defmethod ig/halt-key! ::cron
|
(defmethod ig/halt-key! ::cron
|
||||||
[_ instance]
|
[_ instance]
|
||||||
|
@ -571,7 +571,7 @@
|
||||||
[{:keys [::db/pool ::entries]}]
|
[{:keys [::db/pool ::entries]}]
|
||||||
(db/with-atomic [conn pool]
|
(db/with-atomic [conn pool]
|
||||||
(doseq [{:keys [id cron]} entries]
|
(doseq [{:keys [id cron]} entries]
|
||||||
(l/trace :hint "register cron task" :id id :cron (str cron))
|
(l/trc :hint "register cron task" :id id :cron (str cron))
|
||||||
(db/exec-one! conn [sql:upsert-cron-task id (str cron) (str cron)]))))
|
(db/exec-one! conn [sql:upsert-cron-task id (str cron) (str cron)]))))
|
||||||
|
|
||||||
(defn- lock-scheduled-task!
|
(defn- lock-scheduled-task!
|
||||||
|
@ -589,7 +589,7 @@
|
||||||
(db/exec-one! conn ["SET statement_timeout=0;"])
|
(db/exec-one! conn ["SET statement_timeout=0;"])
|
||||||
(db/exec-one! conn ["SET idle_in_transaction_session_timeout=0;"])
|
(db/exec-one! conn ["SET idle_in_transaction_session_timeout=0;"])
|
||||||
(when (lock-scheduled-task! conn id)
|
(when (lock-scheduled-task! conn id)
|
||||||
(l/trace :hint "cron: execute task" :task-id id)
|
(l/dbg :hint "cron: execute task" :task-id id)
|
||||||
((:fn task) task))
|
((:fn task) task))
|
||||||
(db/rollback! conn))
|
(db/rollback! conn))
|
||||||
|
|
||||||
|
@ -598,9 +598,9 @@
|
||||||
|
|
||||||
(catch Throwable cause
|
(catch Throwable cause
|
||||||
(binding [l/*context* (get-error-context cause task)]
|
(binding [l/*context* (get-error-context cause task)]
|
||||||
(l/error :hint "cron: unhandled exception on running task"
|
(l/err :hint "cron: unhandled exception on running task"
|
||||||
:task-id id
|
:task-id id
|
||||||
:cause cause)))
|
:cause cause)))
|
||||||
(finally
|
(finally
|
||||||
(when-not (px/interrupted? :current)
|
(when-not (px/interrupted? :current)
|
||||||
(schedule-cron-task cfg task))))))
|
(schedule-cron-task cfg task))))))
|
||||||
|
@ -610,12 +610,16 @@
|
||||||
(s/assert dt/cron? cron)
|
(s/assert dt/cron? cron)
|
||||||
(let [now (dt/now)
|
(let [now (dt/now)
|
||||||
next (dt/next-valid-instant-from cron now)]
|
next (dt/next-valid-instant-from cron now)]
|
||||||
(inst-ms (dt/diff now next))))
|
(dt/diff now next)))
|
||||||
|
|
||||||
(defn- schedule-cron-task
|
(defn- schedule-cron-task
|
||||||
[{:keys [::running] :as cfg} {:keys [cron] :as task}]
|
[{:keys [::running] :as cfg} {:keys [cron id] :as task}]
|
||||||
(let [ft (px/schedule! (ms-until-valid cron)
|
(let [ts (ms-until-valid cron)
|
||||||
(partial execute-cron-task cfg task))]
|
ft (px/schedule! ts (partial execute-cron-task cfg task))]
|
||||||
|
|
||||||
|
(l/dbg :hint "cron: schedule task" :task-id id
|
||||||
|
:ts (dt/format-duration ts)
|
||||||
|
:at (dt/format-instant (dt/in-future ts)))
|
||||||
(swap! running #(into #{ft} (filter p/pending?) %))))
|
(swap! running #(into #{ft} (filter p/pending?) %))))
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue