From 7fc88c0c901b68b6e223d6f1383bc1221eae8488 Mon Sep 17 00:00:00 2001 From: Daniel Gnoutcheff Date: Mon, 25 Jul 2016 18:24:16 -0400 Subject: Don't break on strange filenames --- suspicious | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) (limited to 'suspicious') diff --git a/suspicious b/suspicious index 178ae4b..72dd9b0 100755 --- a/suspicious +++ b/suspicious @@ -43,6 +43,11 @@ opened = 0 datasize = 0 progresstext = "" + + +fsencoding = sys.getfilesystemencoding() +def fsdecode_display(bytestring): + return str(bytestring, encoding=fsencoding, errors='replace') def sortscore(score, reverse=True): sortedscore = sorted(score.items(), key=lambda score: score[1], reverse=reverse) @@ -54,8 +59,8 @@ def sortscore(score, reverse=True): return returnscore def printscore(report): - for i in report: - print(i[0] + ':' + str(i[1])) + for filename, filescore in report: + print(fsdecode_display(filename) + ':' + str(filescore)) def scorewords(report): for file in report.keys(): @@ -101,11 +106,11 @@ def scorefile(report): def summary(report): filescore = scorefile(report) text = "" - for file in sortscore(filescore): - text += file[0] + '(' + str(file[1]) + '):' - for word in report[file[0]].keys(): - if report[file[0]][word] > 0: - text += word + '(' + str(report[file[0]][word]) + ');' + for filename, filescore in sortscore(filescore): + text += fsdecode_display(filename) + '(' + str(filescore) + '):' + for word, wordfreq in report[filename].items(): + if wordfreq > 0: + text += word + '(' + str(wordfreq) + ');' text += '\n' return text @@ -285,20 +290,23 @@ if options.displaysummary and options.summaryfile: exit() #Run a search if not displaying a existing report -prunedirs = {'CVS', '.git', '.bzr', '.hg', '.svn'} +prunedirs = {b'CVS', b'.git', b'.bzr', b'.hg', b'.svn'} if len(args) > 0: for a in args: - filelist.extend(avfs.find(a, prunedirs)) + filelist.extend(avfs.find(os.fsencode(a), prunedirs)) + +skipfileexts_bytes = [os.fsencode(e) for e in options.skipfileextensions] start = datetime.datetime.now() for file in filelist: - if skipfile(file, options.skipfileextensions): + file_displayname = fsdecode_display(file) + if skipfile(file, skipfileexts_bytes): skipped += 1 continue try: f = avfs.open(file) except: - print("failed to open: " + file) + print("failed to open: " + file_displayname) continue opened +=1 now = datetime.datetime.now() @@ -307,8 +315,8 @@ for file in filelist: est = ((now - start) / (opened + skipped)) * len(filelist) est_hr, est_rem = divmod(est.total_seconds(), 3600) est_min, est_sec = divmod(est_rem, 60) - if len(file)> 52: - prog_file = file.split('/')[0] + "/.../" + file.split('/')[-1] + if len(file_displayname)> 52: + prog_file = file_displayname.split('/')[0] + "/.../" + file_displayname.split('/')[-1] if len(prog_file) > 52: prog_file = prog_file[0:52] else: @@ -321,13 +329,13 @@ for file in filelist: try: filecontents = f.read() except UnicodeDecodeError: - print("possible binary: " + file) + print("possible binary: " + file_displayname) continue except OSError: - print("read error: " + file) + print("read error: " + file_displayname) continue datasize += len(filecontents) - filenamescore = scoretext(wordlist, file, options.maxwholewordlength) + filenamescore = scoretext(wordlist, file_displayname, options.maxwholewordlength) filecontentsscore = scoretext(wordlist, filecontents, options.maxwholewordlength) report[file] = {} for k in filecontentsscore.keys(): -- cgit v1.2.3