cache clean implemented

This commit is contained in:
2026-05-29 03:47:10 -06:00
parent 248c1370d7
commit cca084d696
2 changed files with 250 additions and 0 deletions
+115
View File
@@ -0,0 +1,115 @@
package cache
import (
"log/slog"
"path/filepath"
"slices"
"strings"
"syscall"
"time"
"github.com/dustin/go-humanize"
)
type fileEntry struct {
path string
size int64
atime time.Time
}
func (c *Cache) Clean() error {
// age always triggers eviction
// if cache is still over max size after age eviction
// evict until size is sizeThreshold % of max size
sizeThreshold := c.cfg.MaxCacheSize * 3 / 4
cacheSize, cachedFiles, err := c.getCachedFiles()
if err != nil {
return err
}
// remove any pkgs over max age, update cache size while doing
evictBefore := time.Now().Add(-c.cfg.MaxCacheAge)
i := 0
rsize := int64(0)
for _, f := range cachedFiles {
if !f.atime.Before(evictBefore) {
break
}
if err := c.cr.Remove(f.path); err != nil {
slog.Error("failed to remove file: %v", "file", f.path, "err", err)
continue
}
cacheSize -= f.size
rsize += f.size
i++
}
cachedFiles = cachedFiles[i:]
slog.Info("evicted aged out pkgs", "num", i, "size", humanize.Bytes(uint64(rsize)))
// if cache size < max size we're done
if cacheSize < c.cfg.MaxCacheSize {
return nil
}
// remove oldest files until cache size < threshold
i = 0
rsize = 0
for i < len(cachedFiles) && cacheSize > sizeThreshold {
f := cachedFiles[i]
if err := c.cr.Remove(f.path); err != nil {
slog.Error("failed to remove file: %v", "file", f.path, "err", err)
continue
}
cacheSize -= f.size
rsize += f.size
i++
}
slog.Info("evicted aged out pkgs", "num", i, "size", humanize.Bytes(uint64(rsize)))
return nil
}
func (c *Cache) getCachedFiles() (int64, []fileEntry, error) {
// returns total cache size and sorted slice, oldest first,
// of all fileEntry where name is the relative path
// from the cacheroot
cacheSize := int64(0)
cachedFiles := []fileEntry{}
for _, repo := range c.cfg.mirroredRepos {
relPath := filepath.Join(repo, "os/x86_64")
f, err := c.cr.Open(relPath)
files, err := f.ReadDir(-1)
if err != nil {
return 0, nil, err
}
if err := f.Close(); err != nil {
return 0, nil, err
}
for _, f := range files {
fInfo, err := f.Info()
if err != nil {
return 0, nil, err
}
if strings.HasSuffix(f.Name(), ".gz") || strings.HasSuffix(f.Name(), ".db") {
continue
}
name := filepath.Join(repo, repoArch, f.Name())
size := fInfo.Size()
stat_t := fInfo.Sys().(*syscall.Stat_t)
atime := time.Unix(stat_t.Atim.Sec, stat_t.Atim.Nsec)
entry := fileEntry{
path: name,
size: size,
atime: atime,
}
cachedFiles = append(cachedFiles, entry)
cacheSize += size
}
}
slices.SortFunc(cachedFiles, func(a, b fileEntry) int {
return a.atime.Compare(b.atime)
})
return cacheSize, cachedFiles, nil
}
+135
View File
@@ -0,0 +1,135 @@
package cache
import (
"bytes"
"fmt"
"os"
"path/filepath"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func newTestCachedPkgs(t *testing.T, prefix, name string, num, size int64, age time.Time, cr *os.Root) {
t.Helper()
for range num {
diffStamp := time.Now().UnixNano()
tmpFileName := fmt.Sprintf("%s%s-pkg-%d.pkg.tar.zst", prefix, name, diffStamp)
fileSize := size / num
contents := bytes.Repeat([]byte("a"), int(fileSize))
fmt.Printf("creating file: %s\n", tmpFileName)
if err := cr.WriteFile(tmpFileName, []byte(contents), 0644); err != nil {
t.Fatalf("failed to create testPkgs: %v", err)
}
if err := cr.Chtimes(tmpFileName, age, age); err != nil {
t.Fatalf("failed to set time stamp: %v", err)
}
}
}
func TestGetCachedPkgs(t *testing.T) {
t.Run("get only files ending in .pkg.tar.zst", func(t *testing.T) {
c := newTestCache(t, []string{"http//example.com"})
// populate cache and store names of expected files
prefix := filepath.Join("core", repoArch)
contents := "testfile"
expectedFiles := []string{}
expectedSize := int64(0)
for i := range 5 {
testFilePath := filepath.Join(prefix, fmt.Sprintf("pkg-%d.pkg.tar.zst", i))
expectedFiles = append(expectedFiles, testFilePath)
expectedSize += int64(len(contents))
if err := c.cr.WriteFile(testFilePath, []byte(contents), 0644); err != nil {
t.Fatalf("failed to create testfiles")
}
}
//
testFilePath := filepath.Join(prefix, "core.db.tar.gz")
if err := c.cr.WriteFile(testFilePath, []byte("testfile"), 0644); err != nil {
t.Fatalf("failed to create testfile")
}
size, files, err := c.getCachedFiles()
require.NoError(t, err)
assert.Equal(t, expectedSize, size)
for _, f := range files {
assert.Contains(t, expectedFiles, f.path)
}
})
}
func TestClean(t *testing.T) {
t.Run("remove based on age", func(t *testing.T) {
c := newTestCache(t, []string{"http://example.com"})
//populate cacheA
numFiles := int64(5)
prefix := filepath.Join("core", repoArch)
evictTime := time.Now().Add(-c.cfg.MaxCacheAge * 2)
newTestCachedPkgs(t, prefix, "/evict", numFiles, int64(10), evictTime, c.cr)
newTestCachedPkgs(t, prefix, "/keep", numFiles, c.cfg.MaxCacheSize/2, time.Now(), c.cr)
err := c.Clean()
require.NoError(t, err)
size, files, err := c.getCachedFiles()
if err != nil {
t.Fatal("failed to get cached files")
}
assert.LessOrEqual(t, size, c.cfg.MaxCacheSize)
assert.Len(t, files, int(numFiles))
for _, f := range files {
fmt.Printf("checking file %s\n", f.path)
assert.Contains(t, f.path, "keep")
assert.NotContains(t, f.path, "evict")
}
})
t.Run("evict based on size", func(t *testing.T) {
c := newTestCache(t, []string{"http://example.com"})
//populate cache
numFiles := int64(5)
prefix := filepath.Join("core", repoArch)
evictTime := time.Now().Add(-time.Millisecond * 100)
newTestCachedPkgs(t, prefix, "/evict", numFiles, c.cfg.MaxCacheSize, evictTime, c.cr)
newTestCachedPkgs(t, prefix, "/keep", numFiles, c.cfg.MaxCacheSize*3/4, time.Now(), c.cr)
err := c.Clean()
require.NoError(t, err)
size, files, err := c.getCachedFiles()
if err != nil {
t.Fatal("failed to get cached files")
}
assert.LessOrEqual(t, size, c.cfg.MaxCacheSize)
assert.Len(t, files, int(numFiles))
for _, f := range files {
fmt.Printf("checking file %s\n", f.path)
assert.Contains(t, f.path, "keep")
assert.NotContains(t, f.path, "evict")
}
})
t.Run("evict based on age and size", func(t *testing.T) {
c := newTestCache(t, []string{"http://example.com"})
//populate cache
numFiles := int64(5)
prefix := filepath.Join("core", repoArch)
evictTime := time.Now().Add(-c.cfg.MaxCacheAge * 2)
newTestCachedPkgs(t, prefix, "/evict-age", numFiles, c.cfg.MaxCacheSize, evictTime, c.cr)
newTestCachedPkgs(t, prefix, "/evict-size", numFiles, c.cfg.MaxCacheSize, time.Now(), c.cr)
newTestCachedPkgs(t, prefix, "/keep", numFiles, c.cfg.MaxCacheSize*3/4, time.Now(), c.cr)
err := c.Clean()
require.NoError(t, err)
size, files, err := c.getCachedFiles()
if err != nil {
t.Fatal("failed to get cached files")
}
assert.LessOrEqual(t, size, c.cfg.MaxCacheSize)
assert.Len(t, files, int(numFiles))
for _, f := range files {
fmt.Printf("checking file %s\n", f.path)
assert.Contains(t, f.path, "keep")
assert.NotContains(t, f.path, "evict")
}
})
}