diff options
| author | Daniel Gnoutcheff <gnoutchd@softwarefreedom.org> | 2016-06-29 14:58:33 -0400 | 
|---|---|---|
| committer | Daniel Gnoutcheff <gnoutchd@softwarefreedom.org> | 2016-06-29 14:58:33 -0400 | 
| commit | 66f706f3669af849397b408f2522db5d7d5f2ff3 (patch) | |
| tree | 8db4dee67311134036c6a4a591a1701fae44a7bd /suspicious | |
| parent | 10b7b82dd70ea48ec16d1bc9d67baf49ebef4308 (diff) | |
preliminary avfs support
Diffstat (limited to 'suspicious')
| -rwxr-xr-x | suspicious | 117 | 
1 files changed, 89 insertions, 28 deletions
| @@ -1,6 +1,6 @@  #!/usr/bin/python -# Copyright 2014 Software Freedom Law Center (www.softwarefreedom.org) +# Copyright 2014, 2016 Software Freedom Law Center (www.softwarefreedom.org)  #  #    This program is free software: you can redistribute it and/or modify  #    it under the terms of the GNU General Public License as published by @@ -15,18 +15,17 @@  #    You should have received a copy of the GNU General Public License  #    along with this program.  If not, see <http://www.gnu.org/licenses/>. -#Author: Marc Jones <mjones@softwarefreedom.org> -#Date: June 30, 2014 -#Version 0.2.1 -#Added weight score function to remove scores from files that otherwise would not have scores -#Added Remove superstrings of other words from search to speed things up if they are greater than wordlength +#Authors: Marc Jones <mjones@softwarefreedom.org>, Daniel Gnoutcheff <gnoutchd@softwarefreedom.org> +#Date: June 2016 +#Version 0.3.0 +#Added AVFS support  ##TODO  #need to verify that word score counts each instance, not just 0 or 1  #need to discount found words if they are substrings of common strings in text  from optparse import OptionParser -import os +import os, os.path  import re  import sys  import datetime @@ -154,12 +153,90 @@ def scoretext(wordlist, text, maxwholewordlen = -1):  			score[word] = len(wholeword(wordreg,text))  	return score + +# AVFS stuff ------------------------------------------------------------------ + +# AVFS has its own automatic view selection using file extensions, but it +# includes plugins (like #patch) that will lead us into an infinite loop +# if we try to do a directory traversal.  Also, there are a few +# extensions we want to add. + +avfscmds = { +	('.gz', '#ugz'), +	('.tgz', '#ugz#utar'), +	('.tar.bz2', '#ubz2#utar'), +	('.bz2', '#ubz2'), +	('.bz', '#ubz2'), +	('.tbz2', '#ubz2#utar'), +	('.tbz', '#ubz2#utar'), +	('.Z', '#uz'), +	('.tpz', '#uz#utar'), +	('.tz', '#uz#utar'), +	('.taz', '#uz#utar'), +	('.a', '#uar'), +	('.deb', '#uar'), +	('.tar', '#utar'), +	('.gem', '#utar'),    # Add upstream +	('.rar', '#urar'), +	('.sfx', '#urar'), +	('.zip', '#uzip'), +	('.jar', '#uzip'), +	('.ear', '#uzip'), +	('.war', '#uzip'), +	('.nupkg', '#uzip'),  # Add upstream +	('.whl', '#uzip'),    # Add upstream +	('.7z', '#u7z'), +	('.zoo', '#uzoo'), +	('.lha', '#ulha'), +	('.lhz', '#ulha'), +	('.arj', '#uarj'), +	('.cpio', '#ucpio'), +	('.rpm', '#rpm'), +	('.tar.xz', '#uxze#utar'), +	('.txz', '#uxze#utar'), +	('.xz', '#uxze'), +	('.lzma', '#uxze'), +} + +def avfs_guesscmd(filename): +	for ext, cmd in avfscmds: +		if filename.endswith(ext): +			return cmd + avfs_guesscmd(filename[:-len(ext)]) +	return '' + +def mkfilelist(rootdir): +	""" +	Produce a list of files to examine.  Use AVFS paths if available. + +	rootdir: path to directory to examine, as a string.  Preferably +	somewhere inside an AVFS mount. +	""" +	prunedirs = {'CVS', '.git', '.bzr', '.hg', '.svn'} + +	for base, dirs, files in os.walk(rootdir): +		for dname in dirs: +			if dname in prunedirs: +				dirs.remove(dname) + +		for fname in files: +			fpath = base + '/' + fname + +			view_fname = fname + avfs_guesscmd(fname) +			view_fpath = base + '/' + view_fname + +			if fname != view_fname and os.path.exists(view_fpath): +				if os.path.isdir(view_fpath): +					dirs.append(view_fname) +				else: +					yield view_fpath +			else: +				yield fpath + +# ----------------------------------------------------------------------------- +  usage = "%prog [options] DIRECTORY ... DIRECTORYN"  epilog = "example: ./suspicious ../gitcheckout -s .tar -s .gz -s .bmp -s .zip -s .ppt -s .docx -s .pdf -s .xls -s .xlsx -s .gif -s .png -s .jpg -s .css -r fw -w cryptology.txt -c -p -l 3"  parser = OptionParser(usage = usage, epilog = epilog) -parser.add_option("-f", "--file",  -		dest="suspiciousfilename",  -		help="specify file to scan", action="append")  parser.add_option("-w", "--wordlist",   		dest="wordlistfilename",   		help="file containing all of the words to look for") @@ -285,27 +362,11 @@ if options.displaysummary and options.summaryfile:  	exit() -#Run a serarch if not displaying a existing report +#Run a search if not displaying a existing report  if len(args) > 0:  	for a in args: -		for (path, dirs, files) in os.walk(a): -			if 'CVS' in dirs: -				dirs.remove('CVS') -			if '.git' in dirs: -				dirs.remove('.git') -			if '.bzr' in dirs: -				dirs.remove('.bzr') -			if '.hg' in dirs: -				dirs.remove('.hg') -			if '.svn' in dirs: -				dirs.remove('.svn') -	 -			for file in files: -				filelist.append(path + '/' + file) +		filelist.extend(mkfilelist(a)) -if options.suspiciousfilename: -	filelist += options.suspiciousfilename -  start = datetime.datetime.now()  for file in filelist:  	if skipfile(file, options.skipfileextensions): | 
