summaryrefslogtreecommitdiff
path: root/suspicious
diff options
context:
space:
mode:
authorDaniel Gnoutcheff <gnoutchd@softwarefreedom.org>2016-07-25 18:24:16 -0400
committerDaniel Gnoutcheff <gnoutchd@softwarefreedom.org>2016-07-25 18:24:16 -0400
commit7fc88c0c901b68b6e223d6f1383bc1221eae8488 (patch)
treeb0ee28937e4d5be1938bd62afb3ae0e6424a12ea /suspicious
parentc51dcc263928f1b0610a74fc4f12a7ea60c59f6f (diff)
Don't break on strange filenames
Diffstat (limited to 'suspicious')
-rwxr-xr-xsuspicious40
1 files changed, 24 insertions, 16 deletions
diff --git a/suspicious b/suspicious
index 178ae4b..72dd9b0 100755
--- a/suspicious
+++ b/suspicious
@@ -43,6 +43,11 @@ opened = 0
datasize = 0
progresstext = ""
+
+
+fsencoding = sys.getfilesystemencoding()
+def fsdecode_display(bytestring):
+ return str(bytestring, encoding=fsencoding, errors='replace')
def sortscore(score, reverse=True):
sortedscore = sorted(score.items(), key=lambda score: score[1], reverse=reverse)
@@ -54,8 +59,8 @@ def sortscore(score, reverse=True):
return returnscore
def printscore(report):
- for i in report:
- print(i[0] + ':' + str(i[1]))
+ for filename, filescore in report:
+ print(fsdecode_display(filename) + ':' + str(filescore))
def scorewords(report):
for file in report.keys():
@@ -101,11 +106,11 @@ def scorefile(report):
def summary(report):
filescore = scorefile(report)
text = ""
- for file in sortscore(filescore):
- text += file[0] + '(' + str(file[1]) + '):'
- for word in report[file[0]].keys():
- if report[file[0]][word] > 0:
- text += word + '(' + str(report[file[0]][word]) + ');'
+ for filename, filescore in sortscore(filescore):
+ text += fsdecode_display(filename) + '(' + str(filescore) + '):'
+ for word, wordfreq in report[filename].items():
+ if wordfreq > 0:
+ text += word + '(' + str(wordfreq) + ');'
text += '\n'
return text
@@ -285,20 +290,23 @@ if options.displaysummary and options.summaryfile:
exit()
#Run a search if not displaying a existing report
-prunedirs = {'CVS', '.git', '.bzr', '.hg', '.svn'}
+prunedirs = {b'CVS', b'.git', b'.bzr', b'.hg', b'.svn'}
if len(args) > 0:
for a in args:
- filelist.extend(avfs.find(a, prunedirs))
+ filelist.extend(avfs.find(os.fsencode(a), prunedirs))
+
+skipfileexts_bytes = [os.fsencode(e) for e in options.skipfileextensions]
start = datetime.datetime.now()
for file in filelist:
- if skipfile(file, options.skipfileextensions):
+ file_displayname = fsdecode_display(file)
+ if skipfile(file, skipfileexts_bytes):
skipped += 1
continue
try:
f = avfs.open(file)
except:
- print("failed to open: " + file)
+ print("failed to open: " + file_displayname)
continue
opened +=1
now = datetime.datetime.now()
@@ -307,8 +315,8 @@ for file in filelist:
est = ((now - start) / (opened + skipped)) * len(filelist)
est_hr, est_rem = divmod(est.total_seconds(), 3600)
est_min, est_sec = divmod(est_rem, 60)
- if len(file)> 52:
- prog_file = file.split('/')[0] + "/.../" + file.split('/')[-1]
+ if len(file_displayname)> 52:
+ prog_file = file_displayname.split('/')[0] + "/.../" + file_displayname.split('/')[-1]
if len(prog_file) > 52:
prog_file = prog_file[0:52]
else:
@@ -321,13 +329,13 @@ for file in filelist:
try:
filecontents = f.read()
except UnicodeDecodeError:
- print("possible binary: " + file)
+ print("possible binary: " + file_displayname)
continue
except OSError:
- print("read error: " + file)
+ print("read error: " + file_displayname)
continue
datasize += len(filecontents)
- filenamescore = scoretext(wordlist, file, options.maxwholewordlength)
+ filenamescore = scoretext(wordlist, file_displayname, options.maxwholewordlength)
filecontentsscore = scoretext(wordlist, filecontents, options.maxwholewordlength)
report[file] = {}
for k in filecontentsscore.keys():