feat(retention): added image retention policies (#1866)

feat(metaDB): add more image statistics info

Signed-off-by: Petu Eusebiu <peusebiu@cisco.com>
This commit is contained in:
peusebiu
2023-11-01 18:16:18 +02:00
committed by GitHub
parent a79d79a03a
commit 9074f8483b
71 changed files with 3454 additions and 745 deletions
+29
View File
@@ -0,0 +1,29 @@
package retention
import (
mTypes "zotregistry.io/zot/pkg/meta/types"
"zotregistry.io/zot/pkg/retention/types"
)
func GetCandidates(repoMeta mTypes.RepoMeta) []*types.Candidate {
candidates := make([]*types.Candidate, 0)
// collect all statistic of repo's manifests
for tag, desc := range repoMeta.Tags {
for digestStr, stats := range repoMeta.Statistics {
if digestStr == desc.Digest {
candidate := &types.Candidate{
MediaType: desc.MediaType,
DigestStr: digestStr,
Tag: tag,
PushTimestamp: stats.PushTimestamp,
PullTimestamp: stats.LastPullTimestamp,
}
candidates = append(candidates, candidate)
}
}
}
return candidates
}
+39
View File
@@ -0,0 +1,39 @@
package retention
import "regexp"
type RegexMatcher struct {
compiled map[string]*regexp.Regexp
}
func NewRegexMatcher() *RegexMatcher {
return &RegexMatcher{
make(map[string]*regexp.Regexp, 0),
}
}
// MatchesListOfRegex is used by retention, it return true if list of regexes is empty.
func (r *RegexMatcher) MatchesListOfRegex(name string, regexes []string) bool {
if len(regexes) == 0 {
// empty regexes matches everything in retention logic
return true
}
for _, regex := range regexes {
if tagReg, ok := r.compiled[regex]; ok {
if tagReg.MatchString(name) {
return true
}
} else {
// all are compilable because they are checked at startup
if tagReg, err := regexp.Compile(regex); err == nil {
r.compiled[regex] = tagReg
if tagReg.MatchString(name) {
return true
}
}
}
}
return false
}
+272
View File
@@ -0,0 +1,272 @@
package retention
import (
"fmt"
glob "github.com/bmatcuk/doublestar/v4"
ispec "github.com/opencontainers/image-spec/specs-go/v1"
zerr "zotregistry.io/zot/errors"
"zotregistry.io/zot/pkg/api/config"
zcommon "zotregistry.io/zot/pkg/common"
zlog "zotregistry.io/zot/pkg/log"
mTypes "zotregistry.io/zot/pkg/meta/types"
"zotregistry.io/zot/pkg/retention/types"
)
const (
// reasons for gc.
filteredByTagRules = "didn't meet any tag retention rule"
filteredByTagNames = "didn't meet any tag 'patterns' rules"
// reasons for retention.
retainedStrFormat = "retained by %s policy"
)
type candidatesRules struct {
candidates []*types.Candidate
// tag retention rules
rules []types.Rule
}
type policyManager struct {
config config.ImageRetention
regex *RegexMatcher
log zlog.Logger
auditLog *zlog.Logger
}
func NewPolicyManager(config config.ImageRetention, log zlog.Logger, auditLog *zlog.Logger) policyManager {
return policyManager{
config: config,
regex: NewRegexMatcher(),
log: log,
auditLog: auditLog,
}
}
func (p policyManager) HasDeleteUntagged(repo string) bool {
if policy, err := p.getRepoPolicy(repo); err == nil {
if policy.DeleteUntagged != nil {
return *policy.DeleteUntagged
}
return true
}
// default
return false
}
func (p policyManager) HasDeleteReferrer(repo string) bool {
if policy, err := p.getRepoPolicy(repo); err == nil {
return policy.DeleteReferrers
}
// default
return false
}
func (p policyManager) HasTagRetention(repo string) bool {
if policy, err := p.getRepoPolicy(repo); err == nil {
return len(policy.KeepTags) > 0
}
// default
return false
}
func (p policyManager) getRules(tagPolicy config.KeepTagsPolicy) []types.Rule {
rules := make([]types.Rule, 0)
if tagPolicy.MostRecentlyPulledCount != 0 {
rules = append(rules, NewLatestPull(tagPolicy.MostRecentlyPulledCount))
}
if tagPolicy.MostRecentlyPushedCount != 0 {
rules = append(rules, NewLatestPush(tagPolicy.MostRecentlyPushedCount))
}
if tagPolicy.PulledWithin != nil {
rules = append(rules, NewDaysPull(*tagPolicy.PulledWithin))
}
if tagPolicy.PushedWithin != nil {
rules = append(rules, NewDaysPush(*tagPolicy.PushedWithin))
}
return rules
}
func (p policyManager) GetRetainedTags(repoMeta mTypes.RepoMeta, index ispec.Index) []string {
repo := repoMeta.Name
matchedByName := make([]string, 0)
candidates := GetCandidates(repoMeta)
retainTags := make([]string, 0)
// we need to make sure tags for which we can not find statistics in repoDB are not removed
actualTags := getIndexTags(index)
// find tags which are not in candidates list, if they are not in repoDB we want to keep them
for _, tag := range actualTags {
found := false
for _, candidate := range candidates {
if candidate.Tag == tag {
found = true
}
}
if !found {
p.log.Info().Str("module", "retention").
Bool("dry-run", p.config.DryRun).
Str("repository", repo).
Str("tag", tag).
Str("decision", "keep").
Str("reason", "tag statistics not found").Msg("will keep tag")
retainTags = append(retainTags, tag)
}
}
// group all tags by tag policy
grouped := p.groupCandidatesByTagPolicy(repo, candidates)
for _, candidates := range grouped {
retainCandidates := candidates.candidates // copy
// tag rules
rules := candidates.rules
for _, retainedByName := range retainCandidates {
matchedByName = append(matchedByName, retainedByName.Tag)
}
rulesCandidates := make([]*types.Candidate, 0)
// we retain candidates if any of the below rules are met (OR logic between rules)
for _, rule := range rules {
ruleCandidates := rule.Perform(retainCandidates)
rulesCandidates = append(rulesCandidates, ruleCandidates...)
}
// if we applied any rule
if len(rules) > 0 {
retainCandidates = rulesCandidates
} // else we retain just the one matching name rule
for _, retainCandidate := range retainCandidates {
// there may be duplicates
if !zcommon.Contains(retainTags, retainCandidate.Tag) {
// format reason log msg
reason := fmt.Sprintf(retainedStrFormat, retainCandidate.RetainedBy)
logAction(repo, "keep", reason, retainCandidate, p.config.DryRun, &p.log)
retainTags = append(retainTags, retainCandidate.Tag)
}
}
}
// log tags which will be removed
for _, candidateInfo := range candidates {
if !zcommon.Contains(retainTags, candidateInfo.Tag) {
var reason string
if zcommon.Contains(matchedByName, candidateInfo.Tag) {
reason = filteredByTagRules
} else {
reason = filteredByTagNames
}
logAction(repo, "delete", reason, candidateInfo, p.config.DryRun, &p.log)
if p.auditLog != nil {
logAction(repo, "delete", reason, candidateInfo, p.config.DryRun, p.auditLog)
}
}
}
return retainTags
}
func (p policyManager) getRepoPolicy(repo string) (config.RetentionPolicy, error) {
for _, policy := range p.config.Policies {
for _, pattern := range policy.Repositories {
matched, err := glob.Match(pattern, repo)
if err == nil && matched {
return policy, nil
}
}
}
return config.RetentionPolicy{}, zerr.ErrRetentionPolicyNotFound
}
func (p policyManager) getTagPolicy(tag string, tagPolicies []config.KeepTagsPolicy,
) (config.KeepTagsPolicy, int, error) {
for idx, tagPolicy := range tagPolicies {
if p.regex.MatchesListOfRegex(tag, tagPolicy.Patterns) {
return tagPolicy, idx, nil
}
}
return config.KeepTagsPolicy{}, -1, zerr.ErrRetentionPolicyNotFound
}
// groups candidates by tag policies, tags which don't match any policy are automatically excluded from this map.
func (p policyManager) groupCandidatesByTagPolicy(repo string, candidates []*types.Candidate,
) map[int]candidatesRules {
candidatesByTagPolicy := make(map[int]candidatesRules)
// no need to check for error, at this point we have both repo policy for this repo and non nil tags policy
repoPolicy, _ := p.getRepoPolicy(repo)
for _, candidateInfo := range candidates {
tagPolicy, tagPolicyID, err := p.getTagPolicy(candidateInfo.Tag, repoPolicy.KeepTags)
if err != nil {
// no tag policy found for the current candidate, skip it (will be gc'ed)
continue
}
candidateInfo.RetainedBy = "patterns"
if _, ok := candidatesByTagPolicy[tagPolicyID]; !ok {
candidatesRules := candidatesRules{candidates: []*types.Candidate{candidateInfo}}
candidatesRules.rules = p.getRules(tagPolicy)
candidatesByTagPolicy[tagPolicyID] = candidatesRules
} else {
candidatesRules := candidatesByTagPolicy[tagPolicyID]
candidatesRules.candidates = append(candidatesRules.candidates, candidateInfo)
candidatesByTagPolicy[tagPolicyID] = candidatesRules
}
}
return candidatesByTagPolicy
}
func logAction(repo, decision, reason string, candidate *types.Candidate, dryRun bool, log *zlog.Logger) {
log.Info().Str("module", "retention").
Bool("dry-run", dryRun).
Str("repository", repo).
Str("mediaType", candidate.MediaType).
Str("digest", candidate.DigestStr).
Str("tag", candidate.Tag).
Str("lastPullTimestamp", candidate.PullTimestamp.String()).
Str("pushTimestamp", candidate.PushTimestamp.String()).
Str("decision", decision).
Str("reason", reason).Msg("applied policy")
}
func getIndexTags(index ispec.Index) []string {
tags := make([]string, 0)
for _, desc := range index.Manifests {
tag, ok := desc.Annotations[ispec.AnnotationRefName]
if ok {
tags = append(tags, tag)
}
}
return tags
}
+140
View File
@@ -0,0 +1,140 @@
package retention
import (
"fmt"
"sort"
"time"
"zotregistry.io/zot/pkg/retention/types"
)
const (
// rules name.
daysPullName = "pulledWithin"
daysPushName = "pushedWithin"
latestPullName = "mostRecentlyPulledCount"
latestPushName = "mostRecentlyPushedCount"
)
// rules implementatio
type DaysPull struct {
duration time.Duration
}
func NewDaysPull(duration time.Duration) DaysPull {
return DaysPull{duration: duration}
}
func (dp DaysPull) Name() string {
return fmt.Sprintf("%s:%d", daysPullName, dp.duration)
}
func (dp DaysPull) Perform(candidates []*types.Candidate) []*types.Candidate {
filtered := make([]*types.Candidate, 0)
timestamp := time.Now().Add(-dp.duration)
for _, candidate := range candidates {
// we check pushtimestamp because we don't want to delete tags pushed after timestamp
// ie: if the tag doesn't meet PulledWithin: "3days" and the image is 1day old then do not remove!
if candidate.PullTimestamp.After(timestamp) || candidate.PushTimestamp.After(timestamp) {
candidate.RetainedBy = dp.Name()
filtered = append(filtered, candidate)
}
}
return filtered
}
type DaysPush struct {
duration time.Duration
}
func NewDaysPush(duration time.Duration) DaysPush {
return DaysPush{duration: duration}
}
func (dp DaysPush) Name() string {
return fmt.Sprintf("%s:%d", daysPushName, dp.duration)
}
func (dp DaysPush) Perform(candidates []*types.Candidate) []*types.Candidate {
filtered := make([]*types.Candidate, 0)
timestamp := time.Now().Add(-dp.duration)
for _, candidate := range candidates {
if candidate.PushTimestamp.After(timestamp) {
candidate.RetainedBy = dp.Name()
filtered = append(filtered, candidate)
}
}
return filtered
}
type latestPull struct {
count int
}
func NewLatestPull(count int) latestPull {
return latestPull{count: count}
}
func (lp latestPull) Name() string {
return fmt.Sprintf("%s:%d", latestPullName, lp.count)
}
func (lp latestPull) Perform(candidates []*types.Candidate) []*types.Candidate {
sort.Slice(candidates, func(i, j int) bool {
return candidates[i].PullTimestamp.After(candidates[j].PullTimestamp)
})
// take top count candidates
upper := lp.count
if lp.count > len(candidates) {
upper = len(candidates)
}
candidates = candidates[:upper]
for _, candidate := range candidates {
candidate.RetainedBy = lp.Name()
}
return candidates
}
type latestPush struct {
count int
}
func NewLatestPush(count int) latestPush {
return latestPush{count: count}
}
func (lp latestPush) Name() string {
return fmt.Sprintf("%s:%d", latestPushName, lp.count)
}
func (lp latestPush) Perform(candidates []*types.Candidate) []*types.Candidate {
sort.Slice(candidates, func(i, j int) bool {
return candidates[i].PushTimestamp.After(candidates[j].PushTimestamp)
})
// take top count candidates
upper := lp.count
if lp.count > len(candidates) {
upper = len(candidates)
}
candidates = candidates[:upper]
for _, candidate := range candidates {
candidate.RetainedBy = lp.Name()
}
return candidates
}
+30
View File
@@ -0,0 +1,30 @@
package types
import (
"time"
ispec "github.com/opencontainers/image-spec/specs-go/v1"
mTypes "zotregistry.io/zot/pkg/meta/types"
)
type Candidate struct {
DigestStr string
MediaType string
Tag string
PushTimestamp time.Time
PullTimestamp time.Time
RetainedBy string
}
type PolicyManager interface {
HasDeleteReferrer(repo string) bool
HasDeleteUntagged(repo string) bool
HasTagRetention(repo string) bool
GetRetainedTags(repoMeta mTypes.RepoMeta, index ispec.Index) []string
}
type Rule interface {
Name() string
Perform(candidates []*Candidate) []*Candidate
}