add file list function, blacklist

master
Crack Duck 6 years ago
parent f32558514a
commit c8b2748c63

@ -15,10 +15,28 @@ import (
"encoding/binary" "encoding/binary"
"hash" "hash"
"reflect" "reflect"
"flag"
"strings"
) )
const sampleSize = 16 * 1024 const sampleSize = 16 * 1024
const sampleThreshold = 48 * 1024 const sampleThreshold = 48 * 1024
var dbFilePath string;
var verbose bool
func checkBlacklist(pathToCheck string) bool {
var blackList []string
blackList = append(blackList, os.Args[0])
blackList = append(blackList, "./"+dbFilePath)
for _, v := range blackList {
// TODO: ENTER CORRECT PATH (HINT: IT IS NOT .) ==========================================
if v == "./"+pathToCheck {
return false
}
}
return true
}
func copyHash(src hash.Hash) hash.Hash { func copyHash(src hash.Hash) hash.Hash {
typ := reflect.TypeOf(src) typ := reflect.TypeOf(src)
@ -32,20 +50,30 @@ func copyHash(src hash.Hash) hash.Hash {
return elem.Addr().Interface().(hash.Hash) return elem.Addr().Interface().(hash.Hash)
} }
func hash_directory (searchDir, database string) { func hashFileVeryQuickly (file *os.File, fileSize int64, targetHash *hash.Hash) error {
db, dberr := sql.Open("sqlite3", database) if fileSize > int64(sampleThreshold) {
if dberr != nil { (*targetHash).Reset()
log.Fatal(dberr) // samples beginning from file
buffer := make([]byte, int64(sampleSize))
file.Read(buffer)
(*targetHash).Write(buffer)
// samples middle of file
file.Seek(fileSize/2-int64(sampleSize)/2, 0)
file.Read(buffer)
(*targetHash).Write(buffer)
// samples end of file
file.Seek(int64(-sampleSize), 2)
file.Read(buffer)
(*targetHash).Write(buffer)
} }
defer db.Close() buffer := make([]byte, 8)
err := filepath.Walk(searchDir, // puts filesize into hash
func(path string, info os.FileInfo, err error) error { binary.PutVarint(buffer, fileSize)
if err != nil { (*targetHash).Write(buffer)
log.Fatal(err) return nil
return err
} }
if !info.IsDir() {
fmt.Printf("Name: %s\n", info.Name()) func hashFileWithBothHashes (path string, fileSize int64) (string, string) {
f, err := os.Open(path) f, err := os.Open(path)
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
@ -56,36 +84,86 @@ func hash_directory (searchDir, database string) {
log.Fatal(err) log.Fatal(err)
} }
qh := copyHash(h) qh := copyHash(h)
if info.Size() > int64(sampleThreshold) { hashFileVeryQuickly(f, fileSize, &qh)
qh.Reset() return hex.EncodeToString(qh.Sum(nil)), hex.EncodeToString(h.Sum(nil))
buffer := make([]byte, sampleSize)
f.Read(buffer)
qh.Write(buffer)
f.Seek(info.Size()/2-sampleSize/2, 0)
f.Read(buffer)
qh.Write(buffer)
f.Seek(int64(-sampleSize), 2)
f.Read(buffer)
qh.Write(buffer)
} }
buffer := make([]byte, 8)
binary.PutVarint(buffer, info.Size())
qh.Write(buffer)
quickSum := hex.EncodeToString(qh.Sum(nil))
hashSum := hex.EncodeToString(h.Sum(nil)) func hashFileWithQuickHash (path string, fileSize int64) string {
fmt.Printf("SHA1: %x\n", hashSum) f, err := os.Open(path)
fmt.Printf("qSHA1: %x\n", quickSum) if err != nil {
log.Fatal(err)
}
defer f.Close()
qh := sha1.New()
if fileSize < int64(sampleThreshold) {
if _, err := io.Copy(qh, f); err != nil {
log.Fatal(err)
}
}
hashFileVeryQuickly(f, fileSize, &qh)
return hex.EncodeToString(qh.Sum(nil))
}
func initialiseDatabase(dbFilePath string) error {
os.Remove(dbFilePath)
db, err := sql.Open("sqlite3", dbFilePath)
if err != nil {
log.Fatal(err)
}
defer db.Close()
sqlStatement := `
create table hashes (
hash_sha1 text not null primary key,
hash_quick text not null,
filename text not null,
filesize_bytes integer not null,
path text not null,
changedate datetime not null
);`
_, err = db.Exec(sqlStatement)
if err != nil {
log.Printf("%q: %s\n", err, sqlStatement)
return err
} else {
return nil
}
}
func createHashDatabase (searchDir, dbFilePath string) {
initialiseDatabase(dbFilePath)
db, dberr := sql.Open("sqlite3", dbFilePath)
if dberr != nil {
log.Fatal(dberr)
}
defer db.Close()
err := filepath.Walk(searchDir,
func(path string, info os.FileInfo, err error) error {
if err != nil {
log.Fatal(err)
return err
}
if !info.IsDir() && checkBlacklist(path) {
quickSum, hashSum := hashFileWithBothHashes(path, info.Size())
if verbose {
fmt.Printf("Name: %s\n", info.Name())
fmt.Printf("SHA1: %s\n", hashSum)
fmt.Printf("qSHA1: %s\n", quickSum)
fmt.Printf("Size: %d\n", info.Size()) fmt.Printf("Size: %d\n", info.Size())
fmt.Printf("Time: %s\n\n", info.ModTime().Format(time.RFC3339)) fmt.Printf("Time: %s\n\n", info.ModTime().Format(time.RFC3339))
sqlStatement := "INSERT INTO hashes (hash_sha1, filename, filesize_bytes, path, changedate )"+ }
"VALUES ('"+hashSum+"','"+info.Name()+"','"+strconv.FormatInt(info.Size(),10)+"','"+path+"','"+info.ModTime().Format(time.RFC3339)+"');" // save file information into database
sqlStatement := "INSERT INTO hashes (hash_sha1, hash_quick, filename, filesize_bytes, path, changedate )"+
"VALUES ('"+hashSum+"','"+quickSum+"','"+info.Name()+"','"+strconv.FormatInt(info.Size(),10)+"','"+path+"','"+info.ModTime().Format(time.RFC3339)+"');"
_, err = db.Exec(sqlStatement) _, err = db.Exec(sqlStatement)
if err != nil { if err != nil {
if strings.Contains(err.Error(), "UNIQUE constraint failed: hashes.hash_sha1") {
fmt.Println("duplicate found:\n"+path+"\n")
} else {
log.Printf("%q: %s\n", err, sqlStatement) log.Printf("%q: %s\n", err, sqlStatement)
return nil return nil
} }
}
} }
return nil return nil
@ -95,27 +173,62 @@ func hash_directory (searchDir, database string) {
} }
} }
func main() { func copyFilesToHashedTarget(src, dest, dbFilePath string) error {
dbname := "./sneakerhash.db" db, dberr := sql.Open("sqlite3", dbFilePath)
os.Remove(dbname) if dberr != nil {
db, err := sql.Open("sqlite3", dbname) log.Fatal(dberr)
if err != nil {
log.Fatal(err)
} }
defer db.Close() defer db.Close()
sqlStatement := ` err := filepath.Walk(src,
create table hashes ( func(path string, info os.FileInfo, err error) error {
hash_sha1 text not null primary key,
filename text,
filesize_bytes integer,
path text,
changedate datetime
);`
_, err = db.Exec(sqlStatement)
if err != nil { if err != nil {
log.Fatal(err)
return err
}
if !info.IsDir() && checkBlacklist(path) {
quickSum := hashFileWithQuickHash(path, info.Size())
sqlStatement := "SELECT path FROM hashes WHERE hash_quick = '"+quickSum+"';"
var targetPath string;
err := db.QueryRow(sqlStatement).Scan(&targetPath)
if err == nil {
} else if err == sql.ErrNoRows {
// no duplicate found: output source file list
fmt.Println(path)
} else {
log.Printf("%q: %s\n", err, sqlStatement) log.Printf("%q: %s\n", err, sqlStatement)
return return nil
}
}
return nil
})
if err != nil {
log.Fatal(err)
}
return nil;
}
func main() {
// define command line flags
flag.StringVar(&dbFilePath, "f", "sneakerhash.db", "path to hash database file")
flag.BoolVar(&verbose, "v", false, "verbosely list files and their hashes")
// parse command line flags
flag.Parse()
// command line arguments
var destDir, srcDir string
srcDir = ""
if flag.NArg() > 0 {
destDir = flag.Arg(0)
if flag.NArg() > 1 {
srcDir = flag.Arg(1)
}
} else {
destDir = "."
}
if srcDir == "" {
createHashDatabase(destDir, dbFilePath)
} else {
copyFilesToHashedTarget(srcDir, destDir, dbFilePath)
} }
hash_directory(".", dbname)
} }

Loading…
Cancel
Save