summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Gnoutcheff <gnoutchd@softwarefreedom.org>2016-07-25 18:24:16 -0400
committerDaniel Gnoutcheff <gnoutchd@softwarefreedom.org>2016-07-25 18:24:16 -0400
commit7fc88c0c901b68b6e223d6f1383bc1221eae8488 (patch)
treeb0ee28937e4d5be1938bd62afb3ae0e6424a12ea
parentc51dcc263928f1b0610a74fc4f12a7ea60c59f6f (diff)
Don't break on strange filenames
-rw-r--r--avfs.py78
-rwxr-xr-xsuspicious40
2 files changed, 63 insertions, 55 deletions
diff --git a/avfs.py b/avfs.py
index 5e877ef..5ab7bea 100644
--- a/avfs.py
+++ b/avfs.py
@@ -9,7 +9,7 @@ import os.path
import subprocess
import stat
-avfs_sys_mount = os.environ['HOME'] + '/.avfs'
+avfs_sys_mount = os.fsencode(os.environ['HOME'] + '/.avfs')
avfs_started = False
def sys_name(fpath):
@@ -35,47 +35,47 @@ def open(fpath, *pargs, **kwargs):
# extensions we want to add.
avfscmds = {
- ('.gz', '#ugz'),
- ('.tgz', '#ugz#utar'),
- ('.tar.bz2', '#ubz2#utar'),
- ('.bz2', '#ubz2'),
- ('.bz', '#ubz2'),
- ('.tbz2', '#ubz2#utar'),
- ('.tbz', '#ubz2#utar'),
- ('.Z', '#uz'),
- ('.tpz', '#uz#utar'),
- ('.tz', '#uz#utar'),
- ('.taz', '#uz#utar'),
- ('.a', '#uar'),
- ('.deb', '#uar'),
- ('.tar', '#utar'),
- ('.gem', '#utar'), # Add upstream
- ('.rar', '#urar'),
- ('.sfx', '#urar'),
- ('.zip', '#uzip'),
- ('.jar', '#uzip'),
- ('.ear', '#uzip'),
- ('.war', '#uzip'),
- ('.nupkg', '#uzip'), # Add upstream
- ('.whl', '#uzip'), # Add upstream
- ('.7z', '#u7z'),
- ('.zoo', '#uzoo'),
- ('.lha', '#ulha'),
- ('.lhz', '#ulha'),
- ('.arj', '#uarj'),
- ('.cpio', '#ucpio'),
- ('.rpm', '#rpm'),
- ('.tar.xz', '#uxze#utar'),
- ('.txz', '#uxze#utar'),
- ('.xz', '#uxze'),
- ('.lzma', '#uxze'),
+ (b'.gz', b'#ugz'),
+ (b'.tgz', b'#ugz#utar'),
+ (b'.tar.bz2', b'#ubz2#utar'),
+ (b'.bz2', b'#ubz2'),
+ (b'.bz', b'#ubz2'),
+ (b'.tbz2', b'#ubz2#utar'),
+ (b'.tbz', b'#ubz2#utar'),
+ (b'.Z', b'#uz'),
+ (b'.tpz', b'#uz#utar'),
+ (b'.tz', b'#uz#utar'),
+ (b'.taz', b'#uz#utar'),
+ (b'.a', b'#uar'),
+ (b'.deb', b'#uar'),
+ (b'.tar', b'#utar'),
+ (b'.gem', b'#utar'), # Add upstream
+ (b'.rar', b'#urar'),
+ (b'.sfx', b'#urar'),
+ (b'.zip', b'#uzip'),
+ (b'.jar', b'#uzip'),
+ (b'.ear', b'#uzip'),
+ (b'.war', b'#uzip'),
+ (b'.nupkg', b'#uzip'), # Add upstream
+ (b'.whl', b'#uzip'), # Add upstream
+ (b'.7z', b'#u7z'),
+ (b'.zoo', b'#uzoo'),
+ (b'.lha', b'#ulha'),
+ (b'.lhz', b'#ulha'),
+ (b'.arj', b'#uarj'),
+ (b'.cpio', b'#ucpio'),
+ (b'.rpm', b'#rpm'),
+ (b'.tar.xz', b'#uxze#utar'),
+ (b'.txz', b'#uxze#utar'),
+ (b'.xz', b'#uxze'),
+ (b'.lzma', b'#uxze'),
}
def guesscmd(filename):
for ext, cmd in avfscmds:
if filename.endswith(ext):
return cmd + guesscmd(filename[:-len(ext)])
- return ''
+ return b''
def get_lstat_mode(filename):
"""
@@ -115,11 +115,11 @@ def find(path, excludes):
if stat.S_ISDIR(mode):
for entry in os.listdir(sys_path):
- yield from find(path + '/' + entry, excludes)
+ yield from find(path + b'/' + entry, excludes)
elif stat.S_ISREG(mode):
yield path
if __name__ == "__main__":
import sys
- for f in find(sys.argv[1], {'.git'}):
- print(f)
+ for f in find(os.fsencode(sys.argv[1]), {b'.git'}):
+ print(f.decode('utf-8', 'replace'))
diff --git a/suspicious b/suspicious
index 178ae4b..72dd9b0 100755
--- a/suspicious
+++ b/suspicious
@@ -43,6 +43,11 @@ opened = 0
datasize = 0
progresstext = ""
+
+
+fsencoding = sys.getfilesystemencoding()
+def fsdecode_display(bytestring):
+ return str(bytestring, encoding=fsencoding, errors='replace')
def sortscore(score, reverse=True):
sortedscore = sorted(score.items(), key=lambda score: score[1], reverse=reverse)
@@ -54,8 +59,8 @@ def sortscore(score, reverse=True):
return returnscore
def printscore(report):
- for i in report:
- print(i[0] + ':' + str(i[1]))
+ for filename, filescore in report:
+ print(fsdecode_display(filename) + ':' + str(filescore))
def scorewords(report):
for file in report.keys():
@@ -101,11 +106,11 @@ def scorefile(report):
def summary(report):
filescore = scorefile(report)
text = ""
- for file in sortscore(filescore):
- text += file[0] + '(' + str(file[1]) + '):'
- for word in report[file[0]].keys():
- if report[file[0]][word] > 0:
- text += word + '(' + str(report[file[0]][word]) + ');'
+ for filename, filescore in sortscore(filescore):
+ text += fsdecode_display(filename) + '(' + str(filescore) + '):'
+ for word, wordfreq in report[filename].items():
+ if wordfreq > 0:
+ text += word + '(' + str(wordfreq) + ');'
text += '\n'
return text
@@ -285,20 +290,23 @@ if options.displaysummary and options.summaryfile:
exit()
#Run a search if not displaying a existing report
-prunedirs = {'CVS', '.git', '.bzr', '.hg', '.svn'}
+prunedirs = {b'CVS', b'.git', b'.bzr', b'.hg', b'.svn'}
if len(args) > 0:
for a in args:
- filelist.extend(avfs.find(a, prunedirs))
+ filelist.extend(avfs.find(os.fsencode(a), prunedirs))
+
+skipfileexts_bytes = [os.fsencode(e) for e in options.skipfileextensions]
start = datetime.datetime.now()
for file in filelist:
- if skipfile(file, options.skipfileextensions):
+ file_displayname = fsdecode_display(file)
+ if skipfile(file, skipfileexts_bytes):
skipped += 1
continue
try:
f = avfs.open(file)
except:
- print("failed to open: " + file)
+ print("failed to open: " + file_displayname)
continue
opened +=1
now = datetime.datetime.now()
@@ -307,8 +315,8 @@ for file in filelist:
est = ((now - start) / (opened + skipped)) * len(filelist)
est_hr, est_rem = divmod(est.total_seconds(), 3600)
est_min, est_sec = divmod(est_rem, 60)
- if len(file)> 52:
- prog_file = file.split('/')[0] + "/.../" + file.split('/')[-1]
+ if len(file_displayname)> 52:
+ prog_file = file_displayname.split('/')[0] + "/.../" + file_displayname.split('/')[-1]
if len(prog_file) > 52:
prog_file = prog_file[0:52]
else:
@@ -321,13 +329,13 @@ for file in filelist:
try:
filecontents = f.read()
except UnicodeDecodeError:
- print("possible binary: " + file)
+ print("possible binary: " + file_displayname)
continue
except OSError:
- print("read error: " + file)
+ print("read error: " + file_displayname)
continue
datasize += len(filecontents)
- filenamescore = scoretext(wordlist, file, options.maxwholewordlength)
+ filenamescore = scoretext(wordlist, file_displayname, options.maxwholewordlength)
filecontentsscore = scoretext(wordlist, filecontents, options.maxwholewordlength)
report[file] = {}
for k in filecontentsscore.keys():