package main import ( "database/sql" _ "github.com/mattn/go-sqlite3" "log" "io" "fmt" "os" "crypto/sha1" "time" "path/filepath" "strconv" "encoding/hex" "encoding/binary" "hash" "reflect" "flag" "strings" ) const sampleSize = 16 * 1024 const sampleThreshold = 48 * 1024 var dbFilePath string; var verbose bool func checkBlacklist(pathToCheck string) bool { var blackList []string blackList = append(blackList, os.Args[0]) blackList = append(blackList, "./"+dbFilePath) for _, v := range blackList { // TODO: ENTER CORRECT PATH (HINT: IT IS NOT .) ========================================== if v == "./"+pathToCheck { return false } } return true } func copyHash(src hash.Hash) hash.Hash { typ := reflect.TypeOf(src) val := reflect.ValueOf(src) if typ.Kind() == reflect.Ptr { typ = typ.Elem() val = val.Elem() } elem := reflect.New(typ).Elem() elem.Set(val) return elem.Addr().Interface().(hash.Hash) } func hashFileVeryQuickly (file *os.File, fileSize int64, targetHash *hash.Hash) error { if fileSize > int64(sampleThreshold) { (*targetHash).Reset() // samples beginning from file buffer := make([]byte, int64(sampleSize)) file.Read(buffer) (*targetHash).Write(buffer) // samples middle of file file.Seek(fileSize/2-int64(sampleSize)/2, 0) file.Read(buffer) (*targetHash).Write(buffer) // samples end of file file.Seek(int64(-sampleSize), 2) file.Read(buffer) (*targetHash).Write(buffer) } buffer := make([]byte, 8) // puts filesize into hash binary.PutVarint(buffer, fileSize) (*targetHash).Write(buffer) return nil } func hashFileWithBothHashes (path string, fileSize int64) (string, string) { f, err := os.Open(path) if err != nil { log.Fatal(err) } defer f.Close() h := sha1.New() if _, err := io.Copy(h, f); err != nil { log.Fatal(err) } qh := copyHash(h) hashFileVeryQuickly(f, fileSize, &qh) return hex.EncodeToString(qh.Sum(nil)), hex.EncodeToString(h.Sum(nil)) } func hashFileWithQuickHash (path string, fileSize int64) string { f, err := os.Open(path) if err != nil { log.Fatal(err) } defer f.Close() qh := sha1.New() if fileSize < int64(sampleThreshold) { if _, err := io.Copy(qh, f); err != nil { log.Fatal(err) } } hashFileVeryQuickly(f, fileSize, &qh) return hex.EncodeToString(qh.Sum(nil)) } func initialiseDatabase(dbFilePath string) error { os.Remove(dbFilePath) db, err := sql.Open("sqlite3", dbFilePath) if err != nil { log.Fatal(err) } defer db.Close() sqlStatement := ` create table hashes ( hash_sha1 text not null primary key, hash_quick text not null, filename text not null, filesize_bytes integer not null, path text not null, changedate datetime not null );` _, err = db.Exec(sqlStatement) if err != nil { log.Printf("%q: %s\n", err, sqlStatement) return err } else { return nil } } func createHashDatabase (searchDir, dbFilePath string) { initialiseDatabase(dbFilePath) db, dberr := sql.Open("sqlite3", dbFilePath) if dberr != nil { log.Fatal(dberr) } defer db.Close() err := filepath.Walk(searchDir, func(path string, info os.FileInfo, err error) error { if err != nil { log.Fatal(err) return err } if !info.IsDir() && checkBlacklist(path) { quickSum, hashSum := hashFileWithBothHashes(path, info.Size()) if verbose { fmt.Printf("Name: %s\n", info.Name()) fmt.Printf("SHA1: %s\n", hashSum) fmt.Printf("qSHA1: %s\n", quickSum) fmt.Printf("Size: %d\n", info.Size()) fmt.Printf("Time: %s\n\n", info.ModTime().Format(time.RFC3339)) } // save file information into database sqlStatement := "INSERT INTO hashes (hash_sha1, hash_quick, filename, filesize_bytes, path, changedate )"+ "VALUES ('"+hashSum+"','"+quickSum+"','"+info.Name()+"','"+strconv.FormatInt(info.Size(),10)+"','"+path+"','"+info.ModTime().Format(time.RFC3339)+"');" _, err = db.Exec(sqlStatement) if err != nil { if strings.Contains(err.Error(), "UNIQUE constraint failed: hashes.hash_sha1") { fmt.Println("duplicate found:\n"+path+"\n") } else { log.Printf("%q: %s\n", err, sqlStatement) return nil } } } return nil }) if err != nil { log.Fatal(err) } } func copyFilesToHashedTarget(src, dest, dbFilePath string) error { db, dberr := sql.Open("sqlite3", dbFilePath) if dberr != nil { log.Fatal(dberr) } defer db.Close() err := filepath.Walk(src, func(path string, info os.FileInfo, err error) error { if err != nil { log.Fatal(err) return err } if !info.IsDir() && checkBlacklist(path) { quickSum := hashFileWithQuickHash(path, info.Size()) sqlStatement := "SELECT path FROM hashes WHERE hash_quick = '"+quickSum+"';" var targetPath string; err := db.QueryRow(sqlStatement).Scan(&targetPath) if err == nil { } else if err == sql.ErrNoRows { // no duplicate found: output source file list fmt.Println(path) } else { log.Printf("%q: %s\n", err, sqlStatement) return nil } } return nil }) if err != nil { log.Fatal(err) } return nil; } func main() { // define command line flags flag.StringVar(&dbFilePath, "f", "sneakerhash.db", "path to hash database file") flag.BoolVar(&verbose, "v", false, "verbosely list files and their hashes") // parse command line flags flag.Parse() // command line arguments var destDir, srcDir string srcDir = "" if flag.NArg() > 0 { destDir = flag.Arg(0) if flag.NArg() > 1 { srcDir = flag.Arg(1) } } else { destDir = "." } if srcDir == "" { createHashDatabase(destDir, dbFilePath) } else { copyFilesToHashedTarget(srcDir, destDir, dbFilePath) } }