From f87e1a13d2155984fc9693d7b12a02421331ec10 Mon Sep 17 00:00:00 2001 From: Daniel Gnoutcheff Date: Fri, 29 Sep 2017 13:37:31 -0400 Subject: suspicious: represent filenames as strings, not bytes A summary file with base64-encoded filenames is a pain to work with. --- suspicious | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/suspicious b/suspicious index 8140254..3a4d056 100755 --- a/suspicious +++ b/suspicious @@ -95,7 +95,7 @@ def sortscore(score, reverse=True): def printscore(report): for filename, filescore in report: - print(fsdecode_display(filename) + ':' + str(filescore)) + print(filename + ':' + str(filescore)) def scorewords(report): for file in report.keys(): @@ -142,7 +142,7 @@ def summary(report): filescore = scorefile(report) text = "" for filename, filescore in sortscore(filescore): - text += fsdecode_display(filename) + '(' + str(filescore) + '):' + text += filename + '(' + str(filescore) + '):' for word, wordfreq in report[filename].items(): if wordfreq > 0: text += word + '(' + str(wordfreq) + ');' @@ -316,21 +316,19 @@ if len(args) > 0: for a in args: filelist.extend(avfs.find(os.fsencode(a), prunedirs)) -skipfileexts_bytes = [os.fsencode(e) for e in options.skipfileextensions] - start = datetime.datetime.now() -for file in filelist: - file_displayname = fsdecode_display(file) - if skipfile(file, skipfileexts_bytes): +for filename_bytes in filelist: + file = fsdecode_display(filename_bytes) + if skipfile(file, options.skipfileextensions): skipped += 1 continue try: - filecontents = decode_file(file) + filecontents = decode_file(filename_bytes) except OSError: - print("failed to open: " + file_displayname) + print("failed to open: " + file) continue if filecontents is None: - print("possible binary: " + file_displayname) + print("possible binary: " + file) continue opened +=1 now = datetime.datetime.now() @@ -339,8 +337,8 @@ for file in filelist: est = ((now - start) / (opened + skipped)) * len(filelist) est_hr, est_rem = divmod(est.total_seconds(), 3600) est_min, est_sec = divmod(est_rem, 60) - if len(file_displayname)> 52: - prog_file = file_displayname.split('/')[0] + "/.../" + file_displayname.split('/')[-1] + if len(file)> 52: + prog_file = file.split('/')[0] + "/.../" + file.split('/')[-1] if len(prog_file) > 52: prog_file = prog_file[0:52] else: @@ -351,7 +349,7 @@ for file in filelist: print(progresstext, end='', file=sys.stderr) sys.stdout.flush() datasize += len(filecontents) - filenamescore = scoretext(wordlist, file_displayname, options.maxwholewordlength) + filenamescore = scoretext(wordlist, file, options.maxwholewordlength) filecontentsscore = scoretext(wordlist, filecontents, options.maxwholewordlength) report[file] = {} for k in filecontentsscore.keys(): -- cgit v1.2.1