Skip to content

Commit a598877

Browse files
authored
Cron job to cleanup hook_task table (#13080)
Close **Prune hook_task Table (#10741)** Added a cron job to delete webhook deliveries in the hook_task table. It can be turned on/off and the schedule controlled globally via app.ini. The data can be deleted by either the age of the delivery which is the default or by deleting the all but the most recent deliveries _per webhook_. Note: I had previously submitted pr #11416 but I closed it when I realized that I had deleted per repository instead of per webhook. Also, I decided allowing the settings to be overridden via the ui was overkill. Also this version allows the deletion by age which is probably what most people would want.
1 parent 0f726ca commit a598877

File tree

7 files changed

+251
-0
lines changed

7 files changed

+251
-0
lines changed

custom/conf/app.example.ini

+15
Original file line numberDiff line numberDiff line change
@@ -1023,6 +1023,21 @@ SCHEDULE = @every 24h
10231023
; deleted branches than OLDER_THAN ago are subject to deletion
10241024
OLDER_THAN = 24h
10251025

1026+
; Cleanup hook_task table
1027+
[cron.cleanup_hook_task_table]
1028+
; Whether to enable the job
1029+
ENABLED = true
1030+
; Whether to always run at start up time (if ENABLED)
1031+
RUN_AT_START = false
1032+
; Time interval for job to run
1033+
SCHEDULE = @every 24h
1034+
; OlderThan or PerWebhook. How the records are removed, either by age (i.e. how long ago hook_task record was delivered) or by the number to keep per webhook (i.e. keep most recent x deliveries per webhook).
1035+
CLEANUP_TYPE = OlderThan
1036+
; If CLEANUP_TYPE is set to OlderThan, then any delivered hook_task records older than this expression will be deleted.
1037+
OLDER_THAN = 168h
1038+
; If CLEANUP_TYPE is set to PerWebhook, this is number of hook_task records to keep for a webhook (i.e. keep the most recent x deliveries).
1039+
NUMBER_TO_KEEP = 10
1040+
10261041
; Extended cron task - not enabled by default
10271042

10281043
; Delete all unactivated accounts

docs/content/doc/advanced/config-cheat-sheet.en-us.md

+9
Original file line numberDiff line numberDiff line change
@@ -694,6 +694,15 @@ NB: You must `REDIRECT_MACARON_LOG` and have `DISABLE_ROUTER_LOG` set to `false`
694694
- `RUN_AT_START`: **true**: Run repository statistics check at start time.
695695
- `SCHEDULE`: **@every 24h**: Cron syntax for scheduling repository statistics check.
696696

697+
### Cron - Cleanup hook_task Table (`cron.cleanup_hook_task_table`)
698+
699+
- `ENABLED`: **true**: Enable cleanup hook_task job.
700+
- `RUN_AT_START`: **false**: Run cleanup hook_task at start time (if ENABLED).
701+
- `SCHEDULE`: **@every 24h**: Cron syntax for cleaning hook_task table.
702+
- `CLEANUP_TYPE` **OlderThan** OlderThan or PerWebhook Method to cleanup hook_task, either by age (i.e. how long ago hook_task record was delivered) or by the number to keep per webhook (i.e. keep most recent x deliveries per webhook).
703+
- `OLDER_THAN`: **168h**: If CLEANUP_TYPE is set to OlderThan, then any delivered hook_task records older than this expression will be deleted.
704+
- `NUMBER_TO_KEEP`: **10**: If CLEANUP_TYPE is set to PerWebhook, this is number of hook_task records to keep for a webhook (i.e. keep the most recent x deliveries).
705+
697706
#### Cron - Update Migration Poster ID (`cron.update_migration_poster_id`)
698707

699708
- `SCHEDULE`: **@every 24h** : Interval as a duration between each synchronization, it will always attempt synchronization when the instance starts.

models/webhook.go

+87
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
package models
77

88
import (
9+
"context"
910
"encoding/json"
1011
"fmt"
1112
"strings"
@@ -39,6 +40,26 @@ func ToHookContentType(name string) HookContentType {
3940
return hookContentTypes[name]
4041
}
4142

43+
// HookTaskCleanupType is the type of cleanup to perform on hook_task
44+
type HookTaskCleanupType int
45+
46+
const (
47+
// OlderThan hook_task rows will be cleaned up by the age of the row
48+
OlderThan HookTaskCleanupType = iota
49+
// PerWebhook hook_task rows will be cleaned up by leaving the most recent deliveries for each webhook
50+
PerWebhook
51+
)
52+
53+
var hookTaskCleanupTypes = map[string]HookTaskCleanupType{
54+
"OlderThan": OlderThan,
55+
"PerWebhook": PerWebhook,
56+
}
57+
58+
// ToHookTaskCleanupType returns HookTaskCleanupType by given name.
59+
func ToHookTaskCleanupType(name string) HookTaskCleanupType {
60+
return hookTaskCleanupTypes[name]
61+
}
62+
4263
// Name returns the name of a given web hook's content type
4364
func (t HookContentType) Name() string {
4465
switch t {
@@ -738,3 +759,69 @@ func FindRepoUndeliveredHookTasks(repoID int64) ([]*HookTask, error) {
738759
}
739760
return tasks, nil
740761
}
762+
763+
// CleanupHookTaskTable deletes rows from hook_task as needed.
764+
func CleanupHookTaskTable(ctx context.Context, cleanupType HookTaskCleanupType, olderThan time.Duration, numberToKeep int) error {
765+
log.Trace("Doing: CleanupHookTaskTable")
766+
767+
if cleanupType == OlderThan {
768+
deleteOlderThan := time.Now().Add(-olderThan).UnixNano()
769+
deletes, err := x.
770+
Where("is_delivered = ? and delivered < ?", true, deleteOlderThan).
771+
Delete(new(HookTask))
772+
if err != nil {
773+
return err
774+
}
775+
log.Trace("Deleted %d rows from hook_task", deletes)
776+
} else if cleanupType == PerWebhook {
777+
hookIDs := make([]int64, 0, 10)
778+
err := x.Table("webhook").
779+
Where("id > 0").
780+
Cols("id").
781+
Find(&hookIDs)
782+
if err != nil {
783+
return err
784+
}
785+
for _, hookID := range hookIDs {
786+
select {
787+
case <-ctx.Done():
788+
return ErrCancelledf("Before deleting hook_task records for hook id %d", hookID)
789+
default:
790+
}
791+
if err = deleteDeliveredHookTasksByWebhook(hookID, numberToKeep); err != nil {
792+
return err
793+
}
794+
}
795+
}
796+
log.Trace("Finished: CleanupHookTaskTable")
797+
return nil
798+
}
799+
800+
func deleteDeliveredHookTasksByWebhook(hookID int64, numberDeliveriesToKeep int) error {
801+
log.Trace("Deleting hook_task rows for webhook %d, keeping the most recent %d deliveries", hookID, numberDeliveriesToKeep)
802+
var deliveryDates = make([]int64, 0, 10)
803+
err := x.Table("hook_task").
804+
Where("hook_task.hook_id = ? AND hook_task.is_delivered = ? AND hook_task.delivered is not null", hookID, true).
805+
Cols("hook_task.delivered").
806+
Join("INNER", "webhook", "hook_task.hook_id = webhook.id").
807+
OrderBy("hook_task.delivered desc").
808+
Limit(1, int(numberDeliveriesToKeep)).
809+
Find(&deliveryDates)
810+
if err != nil {
811+
return err
812+
}
813+
814+
if len(deliveryDates) > 0 {
815+
deletes, err := x.
816+
Where("hook_id = ? and is_delivered = ? and delivered <= ?", hookID, true, deliveryDates[0]).
817+
Delete(new(HookTask))
818+
if err != nil {
819+
return err
820+
}
821+
log.Trace("Deleted %d hook_task rows for webhook %d", deletes, hookID)
822+
} else {
823+
log.Trace("No hook_task rows to delete for webhook %d", hookID)
824+
}
825+
826+
return nil
827+
}

models/webhook_test.go

+114
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@
55
package models
66

77
import (
8+
"context"
89
"encoding/json"
910
"testing"
11+
"time"
1012

1113
api "code.gitea.io/gitea/modules/structs"
1214

@@ -223,3 +225,115 @@ func TestUpdateHookTask(t *testing.T) {
223225
assert.NoError(t, UpdateHookTask(hook))
224226
AssertExistsAndLoadBean(t, hook)
225227
}
228+
229+
func TestCleanupHookTaskTable_PerWebhook_DeletesDelivered(t *testing.T) {
230+
assert.NoError(t, PrepareTestDatabase())
231+
hookTask := &HookTask{
232+
RepoID: 3,
233+
HookID: 3,
234+
Typ: GITEA,
235+
URL: "http://www.example.com/unit_test",
236+
Payloader: &api.PushPayload{},
237+
IsDelivered: true,
238+
Delivered: time.Now().UnixNano(),
239+
}
240+
AssertNotExistsBean(t, hookTask)
241+
assert.NoError(t, CreateHookTask(hookTask))
242+
AssertExistsAndLoadBean(t, hookTask)
243+
244+
assert.NoError(t, CleanupHookTaskTable(context.Background(), PerWebhook, 168*time.Hour, 0))
245+
AssertNotExistsBean(t, hookTask)
246+
}
247+
248+
func TestCleanupHookTaskTable_PerWebhook_LeavesUndelivered(t *testing.T) {
249+
assert.NoError(t, PrepareTestDatabase())
250+
hookTask := &HookTask{
251+
RepoID: 2,
252+
HookID: 4,
253+
Typ: GITEA,
254+
URL: "http://www.example.com/unit_test",
255+
Payloader: &api.PushPayload{},
256+
IsDelivered: false,
257+
}
258+
AssertNotExistsBean(t, hookTask)
259+
assert.NoError(t, CreateHookTask(hookTask))
260+
AssertExistsAndLoadBean(t, hookTask)
261+
262+
assert.NoError(t, CleanupHookTaskTable(context.Background(), PerWebhook, 168*time.Hour, 0))
263+
AssertExistsAndLoadBean(t, hookTask)
264+
}
265+
266+
func TestCleanupHookTaskTable_PerWebhook_LeavesMostRecentTask(t *testing.T) {
267+
assert.NoError(t, PrepareTestDatabase())
268+
hookTask := &HookTask{
269+
RepoID: 2,
270+
HookID: 4,
271+
Typ: GITEA,
272+
URL: "http://www.example.com/unit_test",
273+
Payloader: &api.PushPayload{},
274+
IsDelivered: true,
275+
Delivered: time.Now().UnixNano(),
276+
}
277+
AssertNotExistsBean(t, hookTask)
278+
assert.NoError(t, CreateHookTask(hookTask))
279+
AssertExistsAndLoadBean(t, hookTask)
280+
281+
assert.NoError(t, CleanupHookTaskTable(context.Background(), PerWebhook, 168*time.Hour, 1))
282+
AssertExistsAndLoadBean(t, hookTask)
283+
}
284+
285+
func TestCleanupHookTaskTable_OlderThan_DeletesDelivered(t *testing.T) {
286+
assert.NoError(t, PrepareTestDatabase())
287+
hookTask := &HookTask{
288+
RepoID: 3,
289+
HookID: 3,
290+
Typ: GITEA,
291+
URL: "http://www.example.com/unit_test",
292+
Payloader: &api.PushPayload{},
293+
IsDelivered: true,
294+
Delivered: time.Now().AddDate(0, 0, -8).UnixNano(),
295+
}
296+
AssertNotExistsBean(t, hookTask)
297+
assert.NoError(t, CreateHookTask(hookTask))
298+
AssertExistsAndLoadBean(t, hookTask)
299+
300+
assert.NoError(t, CleanupHookTaskTable(context.Background(), OlderThan, 168*time.Hour, 0))
301+
AssertNotExistsBean(t, hookTask)
302+
}
303+
304+
func TestCleanupHookTaskTable_OlderThan_LeavesUndelivered(t *testing.T) {
305+
assert.NoError(t, PrepareTestDatabase())
306+
hookTask := &HookTask{
307+
RepoID: 2,
308+
HookID: 4,
309+
Typ: GITEA,
310+
URL: "http://www.example.com/unit_test",
311+
Payloader: &api.PushPayload{},
312+
IsDelivered: false,
313+
}
314+
AssertNotExistsBean(t, hookTask)
315+
assert.NoError(t, CreateHookTask(hookTask))
316+
AssertExistsAndLoadBean(t, hookTask)
317+
318+
assert.NoError(t, CleanupHookTaskTable(context.Background(), OlderThan, 168*time.Hour, 0))
319+
AssertExistsAndLoadBean(t, hookTask)
320+
}
321+
322+
func TestCleanupHookTaskTable_OlderThan_LeavesTaskEarlierThanAgeToDelete(t *testing.T) {
323+
assert.NoError(t, PrepareTestDatabase())
324+
hookTask := &HookTask{
325+
RepoID: 2,
326+
HookID: 4,
327+
Typ: GITEA,
328+
URL: "http://www.example.com/unit_test",
329+
Payloader: &api.PushPayload{},
330+
IsDelivered: true,
331+
Delivered: time.Now().AddDate(0, 0, -6).UnixNano(),
332+
}
333+
AssertNotExistsBean(t, hookTask)
334+
assert.NoError(t, CreateHookTask(hookTask))
335+
AssertExistsAndLoadBean(t, hookTask)
336+
337+
assert.NoError(t, CleanupHookTaskTable(context.Background(), OlderThan, 168*time.Hour, 0))
338+
AssertExistsAndLoadBean(t, hookTask)
339+
}

modules/cron/setting.go

+8
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,14 @@ type UpdateExistingConfig struct {
4040
UpdateExisting bool
4141
}
4242

43+
// CleanupHookTaskConfig represents a cron task with settings to cleanup hook_task
44+
type CleanupHookTaskConfig struct {
45+
BaseConfig
46+
CleanupType string
47+
OlderThan time.Duration
48+
NumberToKeep int
49+
}
50+
4351
// GetSchedule returns the schedule for the base config
4452
func (b *BaseConfig) GetSchedule() string {
4553
return b.Schedule

modules/cron/tasks_basic.go

+17
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,22 @@ func registerUpdateMigrationPosterID() {
109109
})
110110
}
111111

112+
func registerCleanupHookTaskTable() {
113+
RegisterTaskFatal("cleanup_hook_task_table", &CleanupHookTaskConfig{
114+
BaseConfig: BaseConfig{
115+
Enabled: true,
116+
RunAtStart: false,
117+
Schedule: "@every 24h",
118+
},
119+
CleanupType: "OlderThan",
120+
OlderThan: 168 * time.Hour,
121+
NumberToKeep: 10,
122+
}, func(ctx context.Context, _ *models.User, config Config) error {
123+
realConfig := config.(*CleanupHookTaskConfig)
124+
return models.CleanupHookTaskTable(ctx, models.ToHookTaskCleanupType(realConfig.CleanupType), realConfig.OlderThan, realConfig.NumberToKeep)
125+
})
126+
}
127+
112128
func initBasicTasks() {
113129
registerUpdateMirrorTask()
114130
registerRepoHealthCheck()
@@ -119,4 +135,5 @@ func initBasicTasks() {
119135
if !setting.Repository.DisableMigrations {
120136
registerUpdateMigrationPosterID()
121137
}
138+
registerCleanupHookTaskTable()
122139
}

options/locale/locale_en-US.ini

+1
Original file line numberDiff line numberDiff line change
@@ -2065,6 +2065,7 @@ dashboard.resync_all_sshprincipals.desc = (Not needed for the built-in SSH serve
20652065
dashboard.resync_all_hooks = Resynchronize pre-receive, update and post-receive hooks of all repositories.
20662066
dashboard.reinit_missing_repos = Reinitialize all missing Git repositories for which records exist
20672067
dashboard.sync_external_users = Synchronize external user data
2068+
dashboard.cleanup_hook_task_table = Cleanup hook_task table
20682069
dashboard.server_uptime = Server Uptime
20692070
dashboard.current_goroutine = Current Goroutines
20702071
dashboard.current_memory_usage = Current Memory Usage

0 commit comments

Comments
 (0)