You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

234 lines
5.6 KiB

package main
import (
"database/sql"
_ "github.com/mattn/go-sqlite3"
"log"
"io"
"fmt"
"os"
"crypto/sha1"
"time"
"path/filepath"
"strconv"
"encoding/hex"
"encoding/binary"
"hash"
"reflect"
"flag"
"strings"
)
const sampleSize = 16 * 1024
const sampleThreshold = 48 * 1024
var dbFilePath string;
var verbose bool
func checkBlacklist(pathToCheck string) bool {
var blackList []string
blackList = append(blackList, os.Args[0])
blackList = append(blackList, "./"+dbFilePath)
for _, v := range blackList {
// TODO: ENTER CORRECT PATH (HINT: IT IS NOT .) ==========================================
if v == "./"+pathToCheck {
return false
}
}
return true
}
func copyHash(src hash.Hash) hash.Hash {
typ := reflect.TypeOf(src)
val := reflect.ValueOf(src)
if typ.Kind() == reflect.Ptr {
typ = typ.Elem()
val = val.Elem()
}
elem := reflect.New(typ).Elem()
elem.Set(val)
return elem.Addr().Interface().(hash.Hash)
}
func hashFileVeryQuickly (file *os.File, fileSize int64, targetHash *hash.Hash) error {
if fileSize > int64(sampleThreshold) {
(*targetHash).Reset()
// samples beginning from file
buffer := make([]byte, int64(sampleSize))
file.Read(buffer)
(*targetHash).Write(buffer)
// samples middle of file
file.Seek(fileSize/2-int64(sampleSize)/2, 0)
file.Read(buffer)
(*targetHash).Write(buffer)
// samples end of file
file.Seek(int64(-sampleSize), 2)
file.Read(buffer)
(*targetHash).Write(buffer)
}
buffer := make([]byte, 8)
// puts filesize into hash
binary.PutVarint(buffer, fileSize)
(*targetHash).Write(buffer)
return nil
}
func hashFileWithBothHashes (path string, fileSize int64) (string, string) {
f, err := os.Open(path)
if err != nil {
log.Fatal(err)
}
defer f.Close()
h := sha1.New()
if _, err := io.Copy(h, f); err != nil {
log.Fatal(err)
}
qh := copyHash(h)
hashFileVeryQuickly(f, fileSize, &qh)
return hex.EncodeToString(qh.Sum(nil)), hex.EncodeToString(h.Sum(nil))
}
func hashFileWithQuickHash (path string, fileSize int64) string {
f, err := os.Open(path)
if err != nil {
log.Fatal(err)
}
defer f.Close()
qh := sha1.New()
if fileSize < int64(sampleThreshold) {
if _, err := io.Copy(qh, f); err != nil {
log.Fatal(err)
}
}
hashFileVeryQuickly(f, fileSize, &qh)
return hex.EncodeToString(qh.Sum(nil))
}
func initialiseDatabase(dbFilePath string) error {
os.Remove(dbFilePath)
db, err := sql.Open("sqlite3", dbFilePath)
if err != nil {
log.Fatal(err)
}
defer db.Close()
sqlStatement := `
create table hashes (
hash_sha1 text not null primary key,
hash_quick text not null,
filename text not null,
filesize_bytes integer not null,
path text not null,
changedate datetime not null
);`
_, err = db.Exec(sqlStatement)
if err != nil {
log.Printf("%q: %s\n", err, sqlStatement)
return err
} else {
return nil
}
}
func createHashDatabase (searchDir, dbFilePath string) {
initialiseDatabase(dbFilePath)
db, dberr := sql.Open("sqlite3", dbFilePath)
if dberr != nil {
log.Fatal(dberr)
}
defer db.Close()
err := filepath.Walk(searchDir,
func(path string, info os.FileInfo, err error) error {
if err != nil {
log.Fatal(err)
return err
}
if !info.IsDir() && checkBlacklist(path) {
quickSum, hashSum := hashFileWithBothHashes(path, info.Size())
if verbose {
fmt.Printf("Name: %s\n", info.Name())
fmt.Printf("SHA1: %s\n", hashSum)
fmt.Printf("qSHA1: %s\n", quickSum)
fmt.Printf("Size: %d\n", info.Size())
fmt.Printf("Time: %s\n\n", info.ModTime().Format(time.RFC3339))
}
// save file information into database
sqlStatement := "INSERT INTO hashes (hash_sha1, hash_quick, filename, filesize_bytes, path, changedate )"+
"VALUES ('"+hashSum+"','"+quickSum+"','"+info.Name()+"','"+strconv.FormatInt(info.Size(),10)+"','"+path+"','"+info.ModTime().Format(time.RFC3339)+"');"
_, err = db.Exec(sqlStatement)
if err != nil {
if strings.Contains(err.Error(), "UNIQUE constraint failed: hashes.hash_sha1") {
fmt.Println("duplicate found:\n"+path+"\n")
} else {
log.Printf("%q: %s\n", err, sqlStatement)
return nil
}
}
}
return nil
})
if err != nil {
log.Fatal(err)
}
}
func copyFilesToHashedTarget(src, dest, dbFilePath string) error {
db, dberr := sql.Open("sqlite3", dbFilePath)
if dberr != nil {
log.Fatal(dberr)
}
defer db.Close()
err := filepath.Walk(src,
func(path string, info os.FileInfo, err error) error {
if err != nil {
log.Fatal(err)
return err
}
if !info.IsDir() && checkBlacklist(path) {
quickSum := hashFileWithQuickHash(path, info.Size())
sqlStatement := "SELECT path FROM hashes WHERE hash_quick = '"+quickSum+"';"
var targetPath string;
err := db.QueryRow(sqlStatement).Scan(&targetPath)
if err == nil {
} else if err == sql.ErrNoRows {
// no duplicate found: output source file list
fmt.Println(path)
} else {
log.Printf("%q: %s\n", err, sqlStatement)
return nil
}
}
return nil
})
if err != nil {
log.Fatal(err)
}
return nil;
}
func main() {
// define command line flags
flag.StringVar(&dbFilePath, "f", "sneakerhash.db", "path to hash database file")
flag.BoolVar(&verbose, "v", false, "verbosely list files and their hashes")
// parse command line flags
flag.Parse()
// command line arguments
var destDir, srcDir string
srcDir = ""
if flag.NArg() > 0 {
destDir = flag.Arg(0)
if flag.NArg() > 1 {
srcDir = flag.Arg(1)
}
} else {
destDir = "."
}
if srcDir == "" {
createHashDatabase(destDir, dbFilePath)
} else {
copyFilesToHashedTarget(srcDir, destDir, dbFilePath)
}
}