dedupe: Add dedupe largest functionality - fixes #2269

s3-about
Richard Yang 2018-04-21 22:57:08 +01:00 committed by Nick Craig-Wood
parent da4a5e1fb3
commit a81ec00a8c
3 changed files with 34 additions and 0 deletions

View File

@ -90,6 +90,7 @@ Dedupe can be run non interactively using the ` + "`" + `--dedupe-mode` + "`" +
* ` + "`" + `--dedupe-mode first` + "`" + ` - removes identical files then keeps the first one.
* ` + "`" + `--dedupe-mode newest` + "`" + ` - removes identical files then keeps the newest one.
* ` + "`" + `--dedupe-mode oldest` + "`" + ` - removes identical files then keeps the oldest one.
* ` + "`" + `--dedupe-mode largest` + "`" + ` - removes identical files then keeps the largest one.
* ` + "`" + `--dedupe-mode rename` + "`" + ` - removes identical files then renames the rest to be different.
For example to rename all the identically named photos in your Google Photos directory, do

View File

@ -119,6 +119,7 @@ const (
DeduplicateNewest // choose the newest object
DeduplicateOldest // choose the oldest object
DeduplicateRename // rename the objects
DeduplicateLargest // choose the largest object
)
func (x DeduplicateMode) String() string {
@ -135,6 +136,8 @@ func (x DeduplicateMode) String() string {
return "oldest"
case DeduplicateRename:
return "rename"
case DeduplicateLargest:
return "largest"
}
return "unknown"
}
@ -154,6 +157,8 @@ func (x *DeduplicateMode) Set(s string) error {
*x = DeduplicateOldest
case "rename":
*x = DeduplicateRename
case "largest":
*x = DeduplicateLargest
default:
return errors.Errorf("Unknown mode for dedupe %q.", s)
}
@ -260,6 +265,7 @@ func Deduplicate(f fs.Fs, mode DeduplicateMode) error {
if err != nil {
return err
}
for remote, objs := range files {
if len(objs) > 1 {
fs.Logf(remote, "Found %d duplicates - deleting identical copies", len(objs))
@ -281,6 +287,17 @@ func Deduplicate(f fs.Fs, mode DeduplicateMode) error {
dedupeDeleteAllButOne(0, remote, objs)
case DeduplicateRename:
dedupeRename(remote, objs)
case DeduplicateLargest:
size, largest, largestIndex := int64(0), int64(-1), -1
for i, obj := range objs {
size = obj.Size()
if size > largest {
largest, largestIndex = size, i
}
}
if largestIndex > -1 {
dedupeDeleteAllButOne(largestIndex, remote, objs)
}
case DeduplicateSkip:
// skip
default:

View File

@ -131,6 +131,22 @@ func TestDeduplicateOldest(t *testing.T) {
fstest.CheckItems(t, r.Fremote, file1)
}
func TestDeduplicateLargest(t *testing.T) {
r := fstest.NewRun(t)
defer r.Finalise()
skipIfCantDedupe(t, r.Fremote)
file1 := r.WriteUncheckedObject("one", "This is one", t1)
file2 := r.WriteUncheckedObject("one", "This is one too", t2)
file3 := r.WriteUncheckedObject("one", "This is another one", t3)
r.CheckWithDuplicates(t, file1, file2, file3)
err := operations.Deduplicate(r.Fremote, operations.DeduplicateLargest)
require.NoError(t, err)
fstest.CheckItems(t, r.Fremote, file3)
}
func TestDeduplicateRename(t *testing.T) {
r := fstest.NewRun(t)
defer r.Finalise()