|
|
|
@ -15,10 +15,28 @@ import (
|
|
|
|
|
"encoding/binary"
|
|
|
|
|
"hash"
|
|
|
|
|
"reflect"
|
|
|
|
|
"flag"
|
|
|
|
|
"strings"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
const sampleSize = 16 * 1024
|
|
|
|
|
const sampleThreshold = 48 * 1024
|
|
|
|
|
var dbFilePath string;
|
|
|
|
|
var verbose bool
|
|
|
|
|
|
|
|
|
|
func checkBlacklist(pathToCheck string) bool {
|
|
|
|
|
var blackList []string
|
|
|
|
|
blackList = append(blackList, os.Args[0])
|
|
|
|
|
blackList = append(blackList, "./"+dbFilePath)
|
|
|
|
|
|
|
|
|
|
for _, v := range blackList {
|
|
|
|
|
// TODO: ENTER CORRECT PATH (HINT: IT IS NOT .) ==========================================
|
|
|
|
|
if v == "./"+pathToCheck {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func copyHash(src hash.Hash) hash.Hash {
|
|
|
|
|
typ := reflect.TypeOf(src)
|
|
|
|
@ -32,8 +50,88 @@ func copyHash(src hash.Hash) hash.Hash {
|
|
|
|
|
return elem.Addr().Interface().(hash.Hash)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func hash_directory (searchDir, database string) {
|
|
|
|
|
db, dberr := sql.Open("sqlite3", database)
|
|
|
|
|
func hashFileVeryQuickly (file *os.File, fileSize int64, targetHash *hash.Hash) error {
|
|
|
|
|
if fileSize > int64(sampleThreshold) {
|
|
|
|
|
(*targetHash).Reset()
|
|
|
|
|
// samples beginning from file
|
|
|
|
|
buffer := make([]byte, int64(sampleSize))
|
|
|
|
|
file.Read(buffer)
|
|
|
|
|
(*targetHash).Write(buffer)
|
|
|
|
|
// samples middle of file
|
|
|
|
|
file.Seek(fileSize/2-int64(sampleSize)/2, 0)
|
|
|
|
|
file.Read(buffer)
|
|
|
|
|
(*targetHash).Write(buffer)
|
|
|
|
|
// samples end of file
|
|
|
|
|
file.Seek(int64(-sampleSize), 2)
|
|
|
|
|
file.Read(buffer)
|
|
|
|
|
(*targetHash).Write(buffer)
|
|
|
|
|
}
|
|
|
|
|
buffer := make([]byte, 8)
|
|
|
|
|
// puts filesize into hash
|
|
|
|
|
binary.PutVarint(buffer, fileSize)
|
|
|
|
|
(*targetHash).Write(buffer)
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func hashFileWithBothHashes (path string, fileSize int64) (string, string) {
|
|
|
|
|
f, err := os.Open(path)
|
|
|
|
|
if err != nil {
|
|
|
|
|
log.Fatal(err)
|
|
|
|
|
}
|
|
|
|
|
defer f.Close()
|
|
|
|
|
h := sha1.New()
|
|
|
|
|
if _, err := io.Copy(h, f); err != nil {
|
|
|
|
|
log.Fatal(err)
|
|
|
|
|
}
|
|
|
|
|
qh := copyHash(h)
|
|
|
|
|
hashFileVeryQuickly(f, fileSize, &qh)
|
|
|
|
|
return hex.EncodeToString(qh.Sum(nil)), hex.EncodeToString(h.Sum(nil))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func hashFileWithQuickHash (path string, fileSize int64) string {
|
|
|
|
|
f, err := os.Open(path)
|
|
|
|
|
if err != nil {
|
|
|
|
|
log.Fatal(err)
|
|
|
|
|
}
|
|
|
|
|
defer f.Close()
|
|
|
|
|
qh := sha1.New()
|
|
|
|
|
if fileSize < int64(sampleThreshold) {
|
|
|
|
|
if _, err := io.Copy(qh, f); err != nil {
|
|
|
|
|
log.Fatal(err)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
hashFileVeryQuickly(f, fileSize, &qh)
|
|
|
|
|
return hex.EncodeToString(qh.Sum(nil))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func initialiseDatabase(dbFilePath string) error {
|
|
|
|
|
os.Remove(dbFilePath)
|
|
|
|
|
db, err := sql.Open("sqlite3", dbFilePath)
|
|
|
|
|
if err != nil {
|
|
|
|
|
log.Fatal(err)
|
|
|
|
|
}
|
|
|
|
|
defer db.Close()
|
|
|
|
|
sqlStatement := `
|
|
|
|
|
create table hashes (
|
|
|
|
|
hash_sha1 text not null primary key,
|
|
|
|
|
hash_quick text not null,
|
|
|
|
|
filename text not null,
|
|
|
|
|
filesize_bytes integer not null,
|
|
|
|
|
path text not null,
|
|
|
|
|
changedate datetime not null
|
|
|
|
|
);`
|
|
|
|
|
_, err = db.Exec(sqlStatement)
|
|
|
|
|
if err != nil {
|
|
|
|
|
log.Printf("%q: %s\n", err, sqlStatement)
|
|
|
|
|
return err
|
|
|
|
|
} else {
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func createHashDatabase (searchDir, dbFilePath string) {
|
|
|
|
|
initialiseDatabase(dbFilePath)
|
|
|
|
|
db, dberr := sql.Open("sqlite3", dbFilePath)
|
|
|
|
|
if dberr != nil {
|
|
|
|
|
log.Fatal(dberr)
|
|
|
|
|
}
|
|
|
|
@ -44,47 +142,27 @@ func hash_directory (searchDir, database string) {
|
|
|
|
|
log.Fatal(err)
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
if !info.IsDir() {
|
|
|
|
|
fmt.Printf("Name: %s\n", info.Name())
|
|
|
|
|
f, err := os.Open(path)
|
|
|
|
|
if err != nil {
|
|
|
|
|
log.Fatal(err)
|
|
|
|
|
}
|
|
|
|
|
defer f.Close()
|
|
|
|
|
h := sha1.New()
|
|
|
|
|
if _, err := io.Copy(h, f); err != nil {
|
|
|
|
|
log.Fatal(err)
|
|
|
|
|
}
|
|
|
|
|
qh := copyHash(h)
|
|
|
|
|
if info.Size() > int64(sampleThreshold) {
|
|
|
|
|
qh.Reset()
|
|
|
|
|
buffer := make([]byte, sampleSize)
|
|
|
|
|
f.Read(buffer)
|
|
|
|
|
qh.Write(buffer)
|
|
|
|
|
f.Seek(info.Size()/2-sampleSize/2, 0)
|
|
|
|
|
f.Read(buffer)
|
|
|
|
|
qh.Write(buffer)
|
|
|
|
|
f.Seek(int64(-sampleSize), 2)
|
|
|
|
|
f.Read(buffer)
|
|
|
|
|
qh.Write(buffer)
|
|
|
|
|
if !info.IsDir() && checkBlacklist(path) {
|
|
|
|
|
quickSum, hashSum := hashFileWithBothHashes(path, info.Size())
|
|
|
|
|
if verbose {
|
|
|
|
|
fmt.Printf("Name: %s\n", info.Name())
|
|
|
|
|
fmt.Printf("SHA1: %s\n", hashSum)
|
|
|
|
|
fmt.Printf("qSHA1: %s\n", quickSum)
|
|
|
|
|
fmt.Printf("Size: %d\n", info.Size())
|
|
|
|
|
fmt.Printf("Time: %s\n\n", info.ModTime().Format(time.RFC3339))
|
|
|
|
|
}
|
|
|
|
|
buffer := make([]byte, 8)
|
|
|
|
|
binary.PutVarint(buffer, info.Size())
|
|
|
|
|
qh.Write(buffer)
|
|
|
|
|
quickSum := hex.EncodeToString(qh.Sum(nil))
|
|
|
|
|
|
|
|
|
|
hashSum := hex.EncodeToString(h.Sum(nil))
|
|
|
|
|
fmt.Printf("SHA1: %x\n", hashSum)
|
|
|
|
|
fmt.Printf("qSHA1: %x\n", quickSum)
|
|
|
|
|
fmt.Printf("Size: %d\n", info.Size())
|
|
|
|
|
fmt.Printf("Time: %s\n\n", info.ModTime().Format(time.RFC3339))
|
|
|
|
|
sqlStatement := "INSERT INTO hashes (hash_sha1, filename, filesize_bytes, path, changedate )"+
|
|
|
|
|
"VALUES ('"+hashSum+"','"+info.Name()+"','"+strconv.FormatInt(info.Size(),10)+"','"+path+"','"+info.ModTime().Format(time.RFC3339)+"');"
|
|
|
|
|
// save file information into database
|
|
|
|
|
sqlStatement := "INSERT INTO hashes (hash_sha1, hash_quick, filename, filesize_bytes, path, changedate )"+
|
|
|
|
|
"VALUES ('"+hashSum+"','"+quickSum+"','"+info.Name()+"','"+strconv.FormatInt(info.Size(),10)+"','"+path+"','"+info.ModTime().Format(time.RFC3339)+"');"
|
|
|
|
|
|
|
|
|
|
_, err = db.Exec(sqlStatement)
|
|
|
|
|
if err != nil {
|
|
|
|
|
log.Printf("%q: %s\n", err, sqlStatement)
|
|
|
|
|
return nil
|
|
|
|
|
if strings.Contains(err.Error(), "UNIQUE constraint failed: hashes.hash_sha1") {
|
|
|
|
|
fmt.Println("duplicate found:\n"+path+"\n")
|
|
|
|
|
} else {
|
|
|
|
|
log.Printf("%q: %s\n", err, sqlStatement)
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
@ -95,27 +173,62 @@ func hash_directory (searchDir, database string) {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func main() {
|
|
|
|
|
dbname := "./sneakerhash.db"
|
|
|
|
|
os.Remove(dbname)
|
|
|
|
|
db, err := sql.Open("sqlite3", dbname)
|
|
|
|
|
if err != nil {
|
|
|
|
|
log.Fatal(err)
|
|
|
|
|
func copyFilesToHashedTarget(src, dest, dbFilePath string) error {
|
|
|
|
|
db, dberr := sql.Open("sqlite3", dbFilePath)
|
|
|
|
|
if dberr != nil {
|
|
|
|
|
log.Fatal(dberr)
|
|
|
|
|
}
|
|
|
|
|
defer db.Close()
|
|
|
|
|
sqlStatement := `
|
|
|
|
|
create table hashes (
|
|
|
|
|
hash_sha1 text not null primary key,
|
|
|
|
|
filename text,
|
|
|
|
|
filesize_bytes integer,
|
|
|
|
|
path text,
|
|
|
|
|
changedate datetime
|
|
|
|
|
);`
|
|
|
|
|
_, err = db.Exec(sqlStatement)
|
|
|
|
|
err := filepath.Walk(src,
|
|
|
|
|
func(path string, info os.FileInfo, err error) error {
|
|
|
|
|
if err != nil {
|
|
|
|
|
log.Fatal(err)
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
if !info.IsDir() && checkBlacklist(path) {
|
|
|
|
|
quickSum := hashFileWithQuickHash(path, info.Size())
|
|
|
|
|
sqlStatement := "SELECT path FROM hashes WHERE hash_quick = '"+quickSum+"';"
|
|
|
|
|
var targetPath string;
|
|
|
|
|
err := db.QueryRow(sqlStatement).Scan(&targetPath)
|
|
|
|
|
if err == nil {
|
|
|
|
|
} else if err == sql.ErrNoRows {
|
|
|
|
|
// no duplicate found: output source file list
|
|
|
|
|
fmt.Println(path)
|
|
|
|
|
} else {
|
|
|
|
|
log.Printf("%q: %s\n", err, sqlStatement)
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return nil
|
|
|
|
|
})
|
|
|
|
|
if err != nil {
|
|
|
|
|
log.Printf("%q: %s\n", err, sqlStatement)
|
|
|
|
|
return
|
|
|
|
|
log.Fatal(err)
|
|
|
|
|
}
|
|
|
|
|
return nil;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func main() {
|
|
|
|
|
// define command line flags
|
|
|
|
|
flag.StringVar(&dbFilePath, "f", "sneakerhash.db", "path to hash database file")
|
|
|
|
|
flag.BoolVar(&verbose, "v", false, "verbosely list files and their hashes")
|
|
|
|
|
// parse command line flags
|
|
|
|
|
flag.Parse()
|
|
|
|
|
|
|
|
|
|
// command line arguments
|
|
|
|
|
var destDir, srcDir string
|
|
|
|
|
srcDir = ""
|
|
|
|
|
if flag.NArg() > 0 {
|
|
|
|
|
destDir = flag.Arg(0)
|
|
|
|
|
if flag.NArg() > 1 {
|
|
|
|
|
srcDir = flag.Arg(1)
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
destDir = "."
|
|
|
|
|
}
|
|
|
|
|
if srcDir == "" {
|
|
|
|
|
createHashDatabase(destDir, dbFilePath)
|
|
|
|
|
} else {
|
|
|
|
|
copyFilesToHashedTarget(srcDir, destDir, dbFilePath)
|
|
|
|
|
}
|
|
|
|
|
hash_directory(".", dbname)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|