add file list function, blacklist

master
Crack Duck 6 years ago
parent f32558514a
commit c8b2748c63

@ -15,10 +15,28 @@ import (
"encoding/binary" "encoding/binary"
"hash" "hash"
"reflect" "reflect"
"flag"
"strings"
) )
const sampleSize = 16 * 1024 const sampleSize = 16 * 1024
const sampleThreshold = 48 * 1024 const sampleThreshold = 48 * 1024
var dbFilePath string;
var verbose bool
func checkBlacklist(pathToCheck string) bool {
var blackList []string
blackList = append(blackList, os.Args[0])
blackList = append(blackList, "./"+dbFilePath)
for _, v := range blackList {
// TODO: ENTER CORRECT PATH (HINT: IT IS NOT .) ==========================================
if v == "./"+pathToCheck {
return false
}
}
return true
}
func copyHash(src hash.Hash) hash.Hash { func copyHash(src hash.Hash) hash.Hash {
typ := reflect.TypeOf(src) typ := reflect.TypeOf(src)
@ -32,8 +50,88 @@ func copyHash(src hash.Hash) hash.Hash {
return elem.Addr().Interface().(hash.Hash) return elem.Addr().Interface().(hash.Hash)
} }
func hash_directory (searchDir, database string) { func hashFileVeryQuickly (file *os.File, fileSize int64, targetHash *hash.Hash) error {
db, dberr := sql.Open("sqlite3", database) if fileSize > int64(sampleThreshold) {
(*targetHash).Reset()
// samples beginning from file
buffer := make([]byte, int64(sampleSize))
file.Read(buffer)
(*targetHash).Write(buffer)
// samples middle of file
file.Seek(fileSize/2-int64(sampleSize)/2, 0)
file.Read(buffer)
(*targetHash).Write(buffer)
// samples end of file
file.Seek(int64(-sampleSize), 2)
file.Read(buffer)
(*targetHash).Write(buffer)
}
buffer := make([]byte, 8)
// puts filesize into hash
binary.PutVarint(buffer, fileSize)
(*targetHash).Write(buffer)
return nil
}
func hashFileWithBothHashes (path string, fileSize int64) (string, string) {
f, err := os.Open(path)
if err != nil {
log.Fatal(err)
}
defer f.Close()
h := sha1.New()
if _, err := io.Copy(h, f); err != nil {
log.Fatal(err)
}
qh := copyHash(h)
hashFileVeryQuickly(f, fileSize, &qh)
return hex.EncodeToString(qh.Sum(nil)), hex.EncodeToString(h.Sum(nil))
}
func hashFileWithQuickHash (path string, fileSize int64) string {
f, err := os.Open(path)
if err != nil {
log.Fatal(err)
}
defer f.Close()
qh := sha1.New()
if fileSize < int64(sampleThreshold) {
if _, err := io.Copy(qh, f); err != nil {
log.Fatal(err)
}
}
hashFileVeryQuickly(f, fileSize, &qh)
return hex.EncodeToString(qh.Sum(nil))
}
func initialiseDatabase(dbFilePath string) error {
os.Remove(dbFilePath)
db, err := sql.Open("sqlite3", dbFilePath)
if err != nil {
log.Fatal(err)
}
defer db.Close()
sqlStatement := `
create table hashes (
hash_sha1 text not null primary key,
hash_quick text not null,
filename text not null,
filesize_bytes integer not null,
path text not null,
changedate datetime not null
);`
_, err = db.Exec(sqlStatement)
if err != nil {
log.Printf("%q: %s\n", err, sqlStatement)
return err
} else {
return nil
}
}
func createHashDatabase (searchDir, dbFilePath string) {
initialiseDatabase(dbFilePath)
db, dberr := sql.Open("sqlite3", dbFilePath)
if dberr != nil { if dberr != nil {
log.Fatal(dberr) log.Fatal(dberr)
} }
@ -44,47 +142,27 @@ func hash_directory (searchDir, database string) {
log.Fatal(err) log.Fatal(err)
return err return err
} }
if !info.IsDir() { if !info.IsDir() && checkBlacklist(path) {
fmt.Printf("Name: %s\n", info.Name()) quickSum, hashSum := hashFileWithBothHashes(path, info.Size())
f, err := os.Open(path) if verbose {
if err != nil { fmt.Printf("Name: %s\n", info.Name())
log.Fatal(err) fmt.Printf("SHA1: %s\n", hashSum)
fmt.Printf("qSHA1: %s\n", quickSum)
fmt.Printf("Size: %d\n", info.Size())
fmt.Printf("Time: %s\n\n", info.ModTime().Format(time.RFC3339))
} }
defer f.Close() // save file information into database
h := sha1.New() sqlStatement := "INSERT INTO hashes (hash_sha1, hash_quick, filename, filesize_bytes, path, changedate )"+
if _, err := io.Copy(h, f); err != nil { "VALUES ('"+hashSum+"','"+quickSum+"','"+info.Name()+"','"+strconv.FormatInt(info.Size(),10)+"','"+path+"','"+info.ModTime().Format(time.RFC3339)+"');"
log.Fatal(err)
}
qh := copyHash(h)
if info.Size() > int64(sampleThreshold) {
qh.Reset()
buffer := make([]byte, sampleSize)
f.Read(buffer)
qh.Write(buffer)
f.Seek(info.Size()/2-sampleSize/2, 0)
f.Read(buffer)
qh.Write(buffer)
f.Seek(int64(-sampleSize), 2)
f.Read(buffer)
qh.Write(buffer)
}
buffer := make([]byte, 8)
binary.PutVarint(buffer, info.Size())
qh.Write(buffer)
quickSum := hex.EncodeToString(qh.Sum(nil))
hashSum := hex.EncodeToString(h.Sum(nil))
fmt.Printf("SHA1: %x\n", hashSum)
fmt.Printf("qSHA1: %x\n", quickSum)
fmt.Printf("Size: %d\n", info.Size())
fmt.Printf("Time: %s\n\n", info.ModTime().Format(time.RFC3339))
sqlStatement := "INSERT INTO hashes (hash_sha1, filename, filesize_bytes, path, changedate )"+
"VALUES ('"+hashSum+"','"+info.Name()+"','"+strconv.FormatInt(info.Size(),10)+"','"+path+"','"+info.ModTime().Format(time.RFC3339)+"');"
_, err = db.Exec(sqlStatement) _, err = db.Exec(sqlStatement)
if err != nil { if err != nil {
log.Printf("%q: %s\n", err, sqlStatement) if strings.Contains(err.Error(), "UNIQUE constraint failed: hashes.hash_sha1") {
return nil fmt.Println("duplicate found:\n"+path+"\n")
} else {
log.Printf("%q: %s\n", err, sqlStatement)
return nil
}
} }
} }
@ -95,27 +173,62 @@ func hash_directory (searchDir, database string) {
} }
} }
func main() { func copyFilesToHashedTarget(src, dest, dbFilePath string) error {
dbname := "./sneakerhash.db" db, dberr := sql.Open("sqlite3", dbFilePath)
os.Remove(dbname) if dberr != nil {
db, err := sql.Open("sqlite3", dbname) log.Fatal(dberr)
if err != nil {
log.Fatal(err)
} }
defer db.Close() defer db.Close()
sqlStatement := ` err := filepath.Walk(src,
create table hashes ( func(path string, info os.FileInfo, err error) error {
hash_sha1 text not null primary key, if err != nil {
filename text, log.Fatal(err)
filesize_bytes integer, return err
path text, }
changedate datetime if !info.IsDir() && checkBlacklist(path) {
);` quickSum := hashFileWithQuickHash(path, info.Size())
_, err = db.Exec(sqlStatement) sqlStatement := "SELECT path FROM hashes WHERE hash_quick = '"+quickSum+"';"
var targetPath string;
err := db.QueryRow(sqlStatement).Scan(&targetPath)
if err == nil {
} else if err == sql.ErrNoRows {
// no duplicate found: output source file list
fmt.Println(path)
} else {
log.Printf("%q: %s\n", err, sqlStatement)
return nil
}
}
return nil
})
if err != nil { if err != nil {
log.Printf("%q: %s\n", err, sqlStatement) log.Fatal(err)
return }
return nil;
}
func main() {
// define command line flags
flag.StringVar(&dbFilePath, "f", "sneakerhash.db", "path to hash database file")
flag.BoolVar(&verbose, "v", false, "verbosely list files and their hashes")
// parse command line flags
flag.Parse()
// command line arguments
var destDir, srcDir string
srcDir = ""
if flag.NArg() > 0 {
destDir = flag.Arg(0)
if flag.NArg() > 1 {
srcDir = flag.Arg(1)
}
} else {
destDir = "."
}
if srcDir == "" {
createHashDatabase(destDir, dbFilePath)
} else {
copyFilesToHashedTarget(srcDir, destDir, dbFilePath)
} }
hash_directory(".", dbname)
} }

Loading…
Cancel
Save