diff --git a/sneakerhash.go b/sneakerhash.go index ba8bba6..ac1b63c 100755 --- a/sneakerhash.go +++ b/sneakerhash.go @@ -15,10 +15,28 @@ import ( "encoding/binary" "hash" "reflect" + "flag" + "strings" ) const sampleSize = 16 * 1024 const sampleThreshold = 48 * 1024 +var dbFilePath string; +var verbose bool + +func checkBlacklist(pathToCheck string) bool { + var blackList []string + blackList = append(blackList, os.Args[0]) + blackList = append(blackList, "./"+dbFilePath) + + for _, v := range blackList { + // TODO: ENTER CORRECT PATH (HINT: IT IS NOT .) ========================================== + if v == "./"+pathToCheck { + return false + } + } + return true +} func copyHash(src hash.Hash) hash.Hash { typ := reflect.TypeOf(src) @@ -32,8 +50,88 @@ func copyHash(src hash.Hash) hash.Hash { return elem.Addr().Interface().(hash.Hash) } -func hash_directory (searchDir, database string) { - db, dberr := sql.Open("sqlite3", database) +func hashFileVeryQuickly (file *os.File, fileSize int64, targetHash *hash.Hash) error { + if fileSize > int64(sampleThreshold) { + (*targetHash).Reset() + // samples beginning from file + buffer := make([]byte, int64(sampleSize)) + file.Read(buffer) + (*targetHash).Write(buffer) + // samples middle of file + file.Seek(fileSize/2-int64(sampleSize)/2, 0) + file.Read(buffer) + (*targetHash).Write(buffer) + // samples end of file + file.Seek(int64(-sampleSize), 2) + file.Read(buffer) + (*targetHash).Write(buffer) + } + buffer := make([]byte, 8) + // puts filesize into hash + binary.PutVarint(buffer, fileSize) + (*targetHash).Write(buffer) + return nil +} + +func hashFileWithBothHashes (path string, fileSize int64) (string, string) { + f, err := os.Open(path) + if err != nil { + log.Fatal(err) + } + defer f.Close() + h := sha1.New() + if _, err := io.Copy(h, f); err != nil { + log.Fatal(err) + } + qh := copyHash(h) + hashFileVeryQuickly(f, fileSize, &qh) + return hex.EncodeToString(qh.Sum(nil)), hex.EncodeToString(h.Sum(nil)) +} + +func hashFileWithQuickHash (path string, fileSize int64) string { + f, err := os.Open(path) + if err != nil { + log.Fatal(err) + } + defer f.Close() + qh := sha1.New() + if fileSize < int64(sampleThreshold) { + if _, err := io.Copy(qh, f); err != nil { + log.Fatal(err) + } + } + hashFileVeryQuickly(f, fileSize, &qh) + return hex.EncodeToString(qh.Sum(nil)) +} + +func initialiseDatabase(dbFilePath string) error { + os.Remove(dbFilePath) + db, err := sql.Open("sqlite3", dbFilePath) + if err != nil { + log.Fatal(err) + } + defer db.Close() + sqlStatement := ` + create table hashes ( + hash_sha1 text not null primary key, + hash_quick text not null, + filename text not null, + filesize_bytes integer not null, + path text not null, + changedate datetime not null + );` + _, err = db.Exec(sqlStatement) + if err != nil { + log.Printf("%q: %s\n", err, sqlStatement) + return err + } else { + return nil + } +} + +func createHashDatabase (searchDir, dbFilePath string) { + initialiseDatabase(dbFilePath) + db, dberr := sql.Open("sqlite3", dbFilePath) if dberr != nil { log.Fatal(dberr) } @@ -44,47 +142,27 @@ func hash_directory (searchDir, database string) { log.Fatal(err) return err } - if !info.IsDir() { - fmt.Printf("Name: %s\n", info.Name()) - f, err := os.Open(path) - if err != nil { - log.Fatal(err) + if !info.IsDir() && checkBlacklist(path) { + quickSum, hashSum := hashFileWithBothHashes(path, info.Size()) + if verbose { + fmt.Printf("Name: %s\n", info.Name()) + fmt.Printf("SHA1: %s\n", hashSum) + fmt.Printf("qSHA1: %s\n", quickSum) + fmt.Printf("Size: %d\n", info.Size()) + fmt.Printf("Time: %s\n\n", info.ModTime().Format(time.RFC3339)) } - defer f.Close() - h := sha1.New() - if _, err := io.Copy(h, f); err != nil { - log.Fatal(err) - } - qh := copyHash(h) - if info.Size() > int64(sampleThreshold) { - qh.Reset() - buffer := make([]byte, sampleSize) - f.Read(buffer) - qh.Write(buffer) - f.Seek(info.Size()/2-sampleSize/2, 0) - f.Read(buffer) - qh.Write(buffer) - f.Seek(int64(-sampleSize), 2) - f.Read(buffer) - qh.Write(buffer) - } - buffer := make([]byte, 8) - binary.PutVarint(buffer, info.Size()) - qh.Write(buffer) - quickSum := hex.EncodeToString(qh.Sum(nil)) - - hashSum := hex.EncodeToString(h.Sum(nil)) - fmt.Printf("SHA1: %x\n", hashSum) - fmt.Printf("qSHA1: %x\n", quickSum) - fmt.Printf("Size: %d\n", info.Size()) - fmt.Printf("Time: %s\n\n", info.ModTime().Format(time.RFC3339)) - sqlStatement := "INSERT INTO hashes (hash_sha1, filename, filesize_bytes, path, changedate )"+ - "VALUES ('"+hashSum+"','"+info.Name()+"','"+strconv.FormatInt(info.Size(),10)+"','"+path+"','"+info.ModTime().Format(time.RFC3339)+"');" + // save file information into database + sqlStatement := "INSERT INTO hashes (hash_sha1, hash_quick, filename, filesize_bytes, path, changedate )"+ + "VALUES ('"+hashSum+"','"+quickSum+"','"+info.Name()+"','"+strconv.FormatInt(info.Size(),10)+"','"+path+"','"+info.ModTime().Format(time.RFC3339)+"');" _, err = db.Exec(sqlStatement) if err != nil { - log.Printf("%q: %s\n", err, sqlStatement) - return nil + if strings.Contains(err.Error(), "UNIQUE constraint failed: hashes.hash_sha1") { + fmt.Println("duplicate found:\n"+path+"\n") + } else { + log.Printf("%q: %s\n", err, sqlStatement) + return nil + } } } @@ -95,27 +173,62 @@ func hash_directory (searchDir, database string) { } } -func main() { - dbname := "./sneakerhash.db" - os.Remove(dbname) - db, err := sql.Open("sqlite3", dbname) - if err != nil { - log.Fatal(err) +func copyFilesToHashedTarget(src, dest, dbFilePath string) error { + db, dberr := sql.Open("sqlite3", dbFilePath) + if dberr != nil { + log.Fatal(dberr) } defer db.Close() - sqlStatement := ` - create table hashes ( - hash_sha1 text not null primary key, - filename text, - filesize_bytes integer, - path text, - changedate datetime - );` - _, err = db.Exec(sqlStatement) + err := filepath.Walk(src, + func(path string, info os.FileInfo, err error) error { + if err != nil { + log.Fatal(err) + return err + } + if !info.IsDir() && checkBlacklist(path) { + quickSum := hashFileWithQuickHash(path, info.Size()) + sqlStatement := "SELECT path FROM hashes WHERE hash_quick = '"+quickSum+"';" + var targetPath string; + err := db.QueryRow(sqlStatement).Scan(&targetPath) + if err == nil { + } else if err == sql.ErrNoRows { + // no duplicate found: output source file list + fmt.Println(path) + } else { + log.Printf("%q: %s\n", err, sqlStatement) + return nil + } + } + return nil + }) if err != nil { - log.Printf("%q: %s\n", err, sqlStatement) - return + log.Fatal(err) + } + return nil; +} + +func main() { + // define command line flags + flag.StringVar(&dbFilePath, "f", "sneakerhash.db", "path to hash database file") + flag.BoolVar(&verbose, "v", false, "verbosely list files and their hashes") + // parse command line flags + flag.Parse() + + // command line arguments + var destDir, srcDir string + srcDir = "" + if flag.NArg() > 0 { + destDir = flag.Arg(0) + if flag.NArg() > 1 { + srcDir = flag.Arg(1) + } + } else { + destDir = "." + } + if srcDir == "" { + createHashDatabase(destDir, dbFilePath) + } else { + copyFilesToHashedTarget(srcDir, destDir, dbFilePath) } - hash_directory(".", dbname) }