diff options
Diffstat (limited to 'suspicious')
| -rwxr-xr-x | suspicious | 232 | 
1 files changed, 125 insertions, 107 deletions
@@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3  # Copyright 2014, 2016 Software Freedom Law Center (www.softwarefreedom.org)  # @@ -25,11 +25,109 @@  #need to discount found words if they are substrings of common strings in text  from optparse import OptionParser -import os, os.path +import os +import os.path  import re  import sys  import datetime  import string +import subprocess + +# AVFS stuff ------------------------------------------------------------------ + +avfs_sys_mount = os.environ['HOME'] + '/.avfs'	 +avfs_started = False +def avfs_sys_name(fpath): +	if not avfs_started: +		subprocess.check_call(['mountavfs']) +	return avfs_sys_mount + os.path.abspath(fpath) + +def avfs_exists(fpath): +	return os.path.exists(avfs_sys_name(fpath)) + +def avfs_isdir(fpath): +	return os.path.isdir(avfs_sys_name(fpath)) + +def avfs_open(fpath, *pargs, **kwargs): +	return open(avfs_sys_name(fpath), *pargs, **kwargs) + +# AVFS has its own automatic view selection using file extensions, but it +# includes plugins (like #patch) that will lead us into an infinite loop +# if we try to do a directory traversal.  Also, there are a few +# extensions we want to add. + +avfscmds = { +	('.gz', '#ugz'), +	('.tgz', '#ugz#utar'), +	('.tar.bz2', '#ubz2#utar'), +	('.bz2', '#ubz2'), +	('.bz', '#ubz2'), +	('.tbz2', '#ubz2#utar'), +	('.tbz', '#ubz2#utar'), +	('.Z', '#uz'), +	('.tpz', '#uz#utar'), +	('.tz', '#uz#utar'), +	('.taz', '#uz#utar'), +	('.a', '#uar'), +	('.deb', '#uar'), +	('.tar', '#utar'), +	('.gem', '#utar'),    # Add upstream +	('.rar', '#urar'), +	('.sfx', '#urar'), +	('.zip', '#uzip'), +	('.jar', '#uzip'), +	('.ear', '#uzip'), +	('.war', '#uzip'), +	('.nupkg', '#uzip'),  # Add upstream +	('.whl', '#uzip'),    # Add upstream +	('.7z', '#u7z'), +	('.zoo', '#uzoo'), +	('.lha', '#ulha'), +	('.lhz', '#ulha'), +	('.arj', '#uarj'), +	('.cpio', '#ucpio'), +	('.rpm', '#rpm'), +	('.tar.xz', '#uxze#utar'), +	('.txz', '#uxze#utar'), +	('.xz', '#uxze'), +	('.lzma', '#uxze'), +} + +def avfs_guesscmd(filename): +	for ext, cmd in avfscmds: +		if filename.endswith(ext): +			return cmd + avfs_guesscmd(filename[:-len(ext)]) +	return '' + +def avfs_find(rootdir, prunedirs): +	""" +	Recursively list all files under rootdir, including files in archives +	supported by AVFS. +	""" + +	sys_rootdir = avfs_sys_name(rootdir) + +	for name in os.listdir(sys_rootdir): +		path = rootdir + '/' + name +		sys_path = sys_rootdir + '/' + name + +		if os.path.isdir(sys_path): +			if name not in prunedirs: +				yield from avfs_find(path, prunedirs) +		else: +			cmd = avfs_guesscmd(name) +			filtered_path = path + cmd +			sys_filtered_path = sys_path + cmd + +			if cmd and os.path.exists(sys_filtered_path): +				if os.path.isdir(sys_filtered_path): +					yield from avfs_find(filtered_path, prunedirs) +				else: +					yield filtered_path +			else: +				yield path + +# -----------------------------------------------------------------------------  report = {}  wordscore = {} @@ -52,7 +150,7 @@ def sortscore(score, reverse=True):  def printscore(report):  	for i in report: -		print i[0] + ':' + str(i[1]) +		print(i[0] + ':' + str(i[1]))  def scorewords(report):  	for file in report.keys(): @@ -66,7 +164,7 @@ def scorewords(report):  def weightreport(report, commonwords):  	notsuspiciousfiles = [] -        weightedout = 0 +	weightedout = 0  	for file in report:  		suspicious = False  		filescore = 0 @@ -81,7 +179,7 @@ def weightreport(report, commonwords):  	for file in notsuspiciousfiles:  		report.pop(file) -                weightedout +=1 +		weightedout +=1  	return report, weightedout @@ -153,87 +251,6 @@ def scoretext(wordlist, text, maxwholewordlen = -1):  			score[word] = len(wholeword(wordreg,text))  	return score - -# AVFS stuff ------------------------------------------------------------------ - -# AVFS has its own automatic view selection using file extensions, but it -# includes plugins (like #patch) that will lead us into an infinite loop -# if we try to do a directory traversal.  Also, there are a few -# extensions we want to add. - -avfscmds = { -	('.gz', '#ugz'), -	('.tgz', '#ugz#utar'), -	('.tar.bz2', '#ubz2#utar'), -	('.bz2', '#ubz2'), -	('.bz', '#ubz2'), -	('.tbz2', '#ubz2#utar'), -	('.tbz', '#ubz2#utar'), -	('.Z', '#uz'), -	('.tpz', '#uz#utar'), -	('.tz', '#uz#utar'), -	('.taz', '#uz#utar'), -	('.a', '#uar'), -	('.deb', '#uar'), -	('.tar', '#utar'), -	('.gem', '#utar'),    # Add upstream -	('.rar', '#urar'), -	('.sfx', '#urar'), -	('.zip', '#uzip'), -	('.jar', '#uzip'), -	('.ear', '#uzip'), -	('.war', '#uzip'), -	('.nupkg', '#uzip'),  # Add upstream -	('.whl', '#uzip'),    # Add upstream -	('.7z', '#u7z'), -	('.zoo', '#uzoo'), -	('.lha', '#ulha'), -	('.lhz', '#ulha'), -	('.arj', '#uarj'), -	('.cpio', '#ucpio'), -	('.rpm', '#rpm'), -	('.tar.xz', '#uxze#utar'), -	('.txz', '#uxze#utar'), -	('.xz', '#uxze'), -	('.lzma', '#uxze'), -} - -def avfs_guesscmd(filename): -	for ext, cmd in avfscmds: -		if filename.endswith(ext): -			return cmd + avfs_guesscmd(filename[:-len(ext)]) -	return '' - -def mkfilelist(rootdir): -	""" -	Produce a list of files to examine.  Use AVFS paths if available. - -	rootdir: path to directory to examine, as a string.  Preferably -	somewhere inside an AVFS mount. -	""" -	prunedirs = {'CVS', '.git', '.bzr', '.hg', '.svn'} - -	for base, dirs, files in os.walk(rootdir): -		for dname in dirs: -			if dname in prunedirs: -				dirs.remove(dname) - -		for fname in files: -			fpath = base + '/' + fname - -			view_fname = fname + avfs_guesscmd(fname) -			view_fpath = base + '/' + view_fname - -			if fname != view_fname and os.path.exists(view_fpath): -				if os.path.isdir(view_fpath): -					dirs.append(view_fname) -				else: -					yield view_fpath -			else: -				yield fpath - -# ----------------------------------------------------------------------------- -  usage = "%prog [options] DIRECTORY ... DIRECTORYN"  epilog = "example: ./suspicious ../gitcheckout -s .tar -s .gz -s .bmp -s .zip -s .ppt -s .docx -s .pdf -s .xls -s .xlsx -s .gif -s .png -s .jpg -s .css -r fw -w cryptology.txt -c -p -l 3"  parser = OptionParser(usage = usage, epilog = epilog) @@ -322,14 +339,14 @@ if options.optimizewordlist and options.wordlistfilename:                      wordlist.remove(check_word)                      break -if options.show_wordlist: print wordlist; exit() +if options.show_wordlist: print(wordlist); exit()  if options.displaysummary and options.summaryfile:  	report = dict()  	try:  		summaryfile = open(options.summaryfile)  	except: -		print "no summary file: " + options.summaryfile +		print("no summary file: " + options.summaryfile)  		exit()  	#sample input  	#../bzr.lf/lsb/devel/build_env/headers/x86-64/4.1/glib-2.0/gio/gmenuexporter.h.defs(1): export(1); @@ -356,26 +373,27 @@ if options.displaysummary and options.summaryfile:  		elif options.printreport == "w":  			printscore(sortscore(scorewords(report)))  		elif options.printreport == "wf" or options.printreport == "fw": -			print summary(report)			 +			print(summary(report))			  	else: -		print summary(report) +		print(summary(report))  	exit()  #Run a search if not displaying a existing report +prunedirs = {'CVS', '.git', '.bzr', '.hg', '.svn'}  if len(args) > 0:  	for a in args: -		filelist.extend(mkfilelist(a)) -	 +		filelist.extend(avfs_find(a, prunedirs)) +  start = datetime.datetime.now()  for file in filelist:  	if skipfile(file, options.skipfileextensions):  		skipped += 1  		continue  	try: -		f = open(file) +		f = avfs_open(file)  	except: -		print "failed to open: " + file +		print("failed to open: " + file)  		continue  	opened +=1  	now = datetime.datetime.now() @@ -385,9 +403,9 @@ for file in filelist:  			prog_file = file.split('/')[0] + "/.../" + file.split('/')[-1]  		else:  			prog_file = file -		print '\r' + " " * len(progresstext) + '\r', +		print('\r' + " " * len(progresstext) + '\r')  		progresstext = str(((opened + skipped)*1.0/len(filelist))*100)[:5] + '% '+ " time left:" + str(estimate).split('.')[0] + ' ' + prog_file + '\r' -		print progresstext, +		print(progresstext)  	sys.stdout.flush()  	filecontents = f.read()  	datasize += len(filecontents)		 @@ -399,7 +417,7 @@ for file in filelist:  #Clear screen of proggress text now that finished scoring file  if options.display_progress:  -	print '\r' + " " * len(progresstext) + '\r', +	print('\r' + " " * len(progresstext) + '\r')  #Save summary as a file, but if the filename exists do not overwrite, append a number  if options.summaryfile and len(filelist) > 0 and not options.displaysummary: @@ -409,13 +427,13 @@ if options.summaryfile and len(filelist) > 0 and not options.displaysummary:  		counter +=1  		summaryfilename = options.summaryfile + '.' + str(counter)  	try: -		if counter > 1: print "saving as " + summaryfilename + "...."	 +		if counter > 1: print("saving as " + summaryfilename + "....")  		summaryfile = open(summaryfilename, 'w+')  		summaryfile.write(summary(report))  		summaryfile.close()		  	except: -		print report -		print "error saving summary as " + summaryfilename +		print(report) +		print("error saving summary as " + summaryfilename)  if options.commonwordfilename and not(options.dontweightreport):  	report, weightedfiles = weightreport(report, commonwords) @@ -424,15 +442,15 @@ if options.printreport and not options.dontdisplaysummary:  	if options.printreport == "f":  		printscore(sortscore(scorefile(report)))  	elif options.printreport == "wf" or options.printreport == "fw": -		print summary(report) +		print(summary(report))  	else:  		printscore(sortscore(scorewords(report)))  if options.display_counts: -	print "total files:" + str(len(filelist)) , -	print "suspicious files:" + str(len(sortscore(scorefile(report)))) , -	print "skipped files:" + str(skipped), +	print("total files:" + str(len(filelist))) +	print("suspicious files:" + str(len(sortscore(scorefile(report))))) +	print("skipped files:" + str(skipped))  	if options.commonwordfilename and not(options.dontweightreport): -		print "removed weighted files:" + str(weightedfiles), -	print "searched:" + str(datasize) + 'B',  -	print "time:" + str(datetime.datetime.now() - start).split('.')[0] +		print("removed weighted files:" + str(weightedfiles)) +	print("searched:" + str(datasize) + 'B') +	print("time:" + str(datetime.datetime.now() - start).split('.')[0])  | 
