diff --git a/internal/cache/clean.go b/internal/cache/clean.go new file mode 100644 index 0000000..ba35935 --- /dev/null +++ b/internal/cache/clean.go @@ -0,0 +1,115 @@ +package cache + +import ( + "log/slog" + "path/filepath" + "slices" + "strings" + "syscall" + "time" + + "github.com/dustin/go-humanize" +) + +type fileEntry struct { + path string + size int64 + atime time.Time +} + +func (c *Cache) Clean() error { + // age always triggers eviction + // if cache is still over max size after age eviction + // evict until size is sizeThreshold % of max size + sizeThreshold := c.cfg.MaxCacheSize * 3 / 4 + + cacheSize, cachedFiles, err := c.getCachedFiles() + if err != nil { + return err + } + + // remove any pkgs over max age, update cache size while doing + evictBefore := time.Now().Add(-c.cfg.MaxCacheAge) + i := 0 + rsize := int64(0) + for _, f := range cachedFiles { + if !f.atime.Before(evictBefore) { + break + } + if err := c.cr.Remove(f.path); err != nil { + slog.Error("failed to remove file: %v", "file", f.path, "err", err) + continue + } + cacheSize -= f.size + rsize += f.size + i++ + } + cachedFiles = cachedFiles[i:] + slog.Info("evicted aged out pkgs", "num", i, "size", humanize.Bytes(uint64(rsize))) + + // if cache size < max size we're done + if cacheSize < c.cfg.MaxCacheSize { + return nil + } + + // remove oldest files until cache size < threshold + i = 0 + rsize = 0 + for i < len(cachedFiles) && cacheSize > sizeThreshold { + f := cachedFiles[i] + if err := c.cr.Remove(f.path); err != nil { + slog.Error("failed to remove file: %v", "file", f.path, "err", err) + continue + } + cacheSize -= f.size + rsize += f.size + i++ + } + slog.Info("evicted aged out pkgs", "num", i, "size", humanize.Bytes(uint64(rsize))) + + return nil +} + +func (c *Cache) getCachedFiles() (int64, []fileEntry, error) { + // returns total cache size and sorted slice, oldest first, + // of all fileEntry where name is the relative path + // from the cacheroot + cacheSize := int64(0) + cachedFiles := []fileEntry{} + for _, repo := range c.cfg.mirroredRepos { + relPath := filepath.Join(repo, "os/x86_64") + f, err := c.cr.Open(relPath) + files, err := f.ReadDir(-1) + if err != nil { + return 0, nil, err + } + if err := f.Close(); err != nil { + return 0, nil, err + } + for _, f := range files { + fInfo, err := f.Info() + if err != nil { + return 0, nil, err + } + if strings.HasSuffix(f.Name(), ".gz") || strings.HasSuffix(f.Name(), ".db") { + continue + } + name := filepath.Join(repo, repoArch, f.Name()) + size := fInfo.Size() + stat_t := fInfo.Sys().(*syscall.Stat_t) + atime := time.Unix(stat_t.Atim.Sec, stat_t.Atim.Nsec) + entry := fileEntry{ + path: name, + size: size, + atime: atime, + } + cachedFiles = append(cachedFiles, entry) + cacheSize += size + + } + } + slices.SortFunc(cachedFiles, func(a, b fileEntry) int { + return a.atime.Compare(b.atime) + }) + return cacheSize, cachedFiles, nil +} diff --git a/internal/cache/clean_test.go b/internal/cache/clean_test.go new file mode 100644 index 0000000..4279a33 --- /dev/null +++ b/internal/cache/clean_test.go @@ -0,0 +1,135 @@ +package cache + +import ( + "bytes" + "fmt" + "os" + "path/filepath" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func newTestCachedPkgs(t *testing.T, prefix, name string, num, size int64, age time.Time, cr *os.Root) { + t.Helper() + for range num { + diffStamp := time.Now().UnixNano() + tmpFileName := fmt.Sprintf("%s%s-pkg-%d.pkg.tar.zst", prefix, name, diffStamp) + fileSize := size / num + contents := bytes.Repeat([]byte("a"), int(fileSize)) + fmt.Printf("creating file: %s\n", tmpFileName) + if err := cr.WriteFile(tmpFileName, []byte(contents), 0644); err != nil { + t.Fatalf("failed to create testPkgs: %v", err) + } + if err := cr.Chtimes(tmpFileName, age, age); err != nil { + t.Fatalf("failed to set time stamp: %v", err) + } + } +} + +func TestGetCachedPkgs(t *testing.T) { + t.Run("get only files ending in .pkg.tar.zst", func(t *testing.T) { + c := newTestCache(t, []string{"http//example.com"}) + // populate cache and store names of expected files + prefix := filepath.Join("core", repoArch) + contents := "testfile" + expectedFiles := []string{} + expectedSize := int64(0) + for i := range 5 { + testFilePath := filepath.Join(prefix, fmt.Sprintf("pkg-%d.pkg.tar.zst", i)) + expectedFiles = append(expectedFiles, testFilePath) + expectedSize += int64(len(contents)) + if err := c.cr.WriteFile(testFilePath, []byte(contents), 0644); err != nil { + t.Fatalf("failed to create testfiles") + } + } + // + testFilePath := filepath.Join(prefix, "core.db.tar.gz") + if err := c.cr.WriteFile(testFilePath, []byte("testfile"), 0644); err != nil { + t.Fatalf("failed to create testfile") + } + size, files, err := c.getCachedFiles() + require.NoError(t, err) + assert.Equal(t, expectedSize, size) + for _, f := range files { + assert.Contains(t, expectedFiles, f.path) + } + }) +} + +func TestClean(t *testing.T) { + t.Run("remove based on age", func(t *testing.T) { + c := newTestCache(t, []string{"http://example.com"}) + //populate cacheA + numFiles := int64(5) + prefix := filepath.Join("core", repoArch) + evictTime := time.Now().Add(-c.cfg.MaxCacheAge * 2) + newTestCachedPkgs(t, prefix, "/evict", numFiles, int64(10), evictTime, c.cr) + newTestCachedPkgs(t, prefix, "/keep", numFiles, c.cfg.MaxCacheSize/2, time.Now(), c.cr) + + err := c.Clean() + require.NoError(t, err) + size, files, err := c.getCachedFiles() + if err != nil { + t.Fatal("failed to get cached files") + } + assert.LessOrEqual(t, size, c.cfg.MaxCacheSize) + assert.Len(t, files, int(numFiles)) + for _, f := range files { + fmt.Printf("checking file %s\n", f.path) + assert.Contains(t, f.path, "keep") + assert.NotContains(t, f.path, "evict") + } + }) + + t.Run("evict based on size", func(t *testing.T) { + c := newTestCache(t, []string{"http://example.com"}) + //populate cache + numFiles := int64(5) + prefix := filepath.Join("core", repoArch) + evictTime := time.Now().Add(-time.Millisecond * 100) + newTestCachedPkgs(t, prefix, "/evict", numFiles, c.cfg.MaxCacheSize, evictTime, c.cr) + newTestCachedPkgs(t, prefix, "/keep", numFiles, c.cfg.MaxCacheSize*3/4, time.Now(), c.cr) + + err := c.Clean() + require.NoError(t, err) + size, files, err := c.getCachedFiles() + if err != nil { + t.Fatal("failed to get cached files") + } + assert.LessOrEqual(t, size, c.cfg.MaxCacheSize) + assert.Len(t, files, int(numFiles)) + for _, f := range files { + fmt.Printf("checking file %s\n", f.path) + assert.Contains(t, f.path, "keep") + assert.NotContains(t, f.path, "evict") + } + }) + + t.Run("evict based on age and size", func(t *testing.T) { + c := newTestCache(t, []string{"http://example.com"}) + //populate cache + numFiles := int64(5) + prefix := filepath.Join("core", repoArch) + evictTime := time.Now().Add(-c.cfg.MaxCacheAge * 2) + newTestCachedPkgs(t, prefix, "/evict-age", numFiles, c.cfg.MaxCacheSize, evictTime, c.cr) + newTestCachedPkgs(t, prefix, "/evict-size", numFiles, c.cfg.MaxCacheSize, time.Now(), c.cr) + newTestCachedPkgs(t, prefix, "/keep", numFiles, c.cfg.MaxCacheSize*3/4, time.Now(), c.cr) + + err := c.Clean() + require.NoError(t, err) + size, files, err := c.getCachedFiles() + if err != nil { + t.Fatal("failed to get cached files") + } + assert.LessOrEqual(t, size, c.cfg.MaxCacheSize) + assert.Len(t, files, int(numFiles)) + for _, f := range files { + fmt.Printf("checking file %s\n", f.path) + assert.Contains(t, f.path, "keep") + assert.NotContains(t, f.path, "evict") + } + }) +}