diff options
author | Daniel Gnoutcheff <gnoutchd@softwarefreedom.org> | 2016-06-29 14:58:33 -0400 |
---|---|---|
committer | Daniel Gnoutcheff <gnoutchd@softwarefreedom.org> | 2016-06-29 14:58:33 -0400 |
commit | 66f706f3669af849397b408f2522db5d7d5f2ff3 (patch) | |
tree | 8db4dee67311134036c6a4a591a1701fae44a7bd /suspicious | |
parent | 10b7b82dd70ea48ec16d1bc9d67baf49ebef4308 (diff) |
preliminary avfs support
Diffstat (limited to 'suspicious')
-rwxr-xr-x | suspicious | 117 |
1 files changed, 89 insertions, 28 deletions
@@ -1,6 +1,6 @@ #!/usr/bin/python -# Copyright 2014 Software Freedom Law Center (www.softwarefreedom.org) +# Copyright 2014, 2016 Software Freedom Law Center (www.softwarefreedom.org) # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -15,18 +15,17 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. -#Author: Marc Jones <mjones@softwarefreedom.org> -#Date: June 30, 2014 -#Version 0.2.1 -#Added weight score function to remove scores from files that otherwise would not have scores -#Added Remove superstrings of other words from search to speed things up if they are greater than wordlength +#Authors: Marc Jones <mjones@softwarefreedom.org>, Daniel Gnoutcheff <gnoutchd@softwarefreedom.org> +#Date: June 2016 +#Version 0.3.0 +#Added AVFS support ##TODO #need to verify that word score counts each instance, not just 0 or 1 #need to discount found words if they are substrings of common strings in text from optparse import OptionParser -import os +import os, os.path import re import sys import datetime @@ -154,12 +153,90 @@ def scoretext(wordlist, text, maxwholewordlen = -1): score[word] = len(wholeword(wordreg,text)) return score + +# AVFS stuff ------------------------------------------------------------------ + +# AVFS has its own automatic view selection using file extensions, but it +# includes plugins (like #patch) that will lead us into an infinite loop +# if we try to do a directory traversal. Also, there are a few +# extensions we want to add. + +avfscmds = { + ('.gz', '#ugz'), + ('.tgz', '#ugz#utar'), + ('.tar.bz2', '#ubz2#utar'), + ('.bz2', '#ubz2'), + ('.bz', '#ubz2'), + ('.tbz2', '#ubz2#utar'), + ('.tbz', '#ubz2#utar'), + ('.Z', '#uz'), + ('.tpz', '#uz#utar'), + ('.tz', '#uz#utar'), + ('.taz', '#uz#utar'), + ('.a', '#uar'), + ('.deb', '#uar'), + ('.tar', '#utar'), + ('.gem', '#utar'), # Add upstream + ('.rar', '#urar'), + ('.sfx', '#urar'), + ('.zip', '#uzip'), + ('.jar', '#uzip'), + ('.ear', '#uzip'), + ('.war', '#uzip'), + ('.nupkg', '#uzip'), # Add upstream + ('.whl', '#uzip'), # Add upstream + ('.7z', '#u7z'), + ('.zoo', '#uzoo'), + ('.lha', '#ulha'), + ('.lhz', '#ulha'), + ('.arj', '#uarj'), + ('.cpio', '#ucpio'), + ('.rpm', '#rpm'), + ('.tar.xz', '#uxze#utar'), + ('.txz', '#uxze#utar'), + ('.xz', '#uxze'), + ('.lzma', '#uxze'), +} + +def avfs_guesscmd(filename): + for ext, cmd in avfscmds: + if filename.endswith(ext): + return cmd + avfs_guesscmd(filename[:-len(ext)]) + return '' + +def mkfilelist(rootdir): + """ + Produce a list of files to examine. Use AVFS paths if available. + + rootdir: path to directory to examine, as a string. Preferably + somewhere inside an AVFS mount. + """ + prunedirs = {'CVS', '.git', '.bzr', '.hg', '.svn'} + + for base, dirs, files in os.walk(rootdir): + for dname in dirs: + if dname in prunedirs: + dirs.remove(dname) + + for fname in files: + fpath = base + '/' + fname + + view_fname = fname + avfs_guesscmd(fname) + view_fpath = base + '/' + view_fname + + if fname != view_fname and os.path.exists(view_fpath): + if os.path.isdir(view_fpath): + dirs.append(view_fname) + else: + yield view_fpath + else: + yield fpath + +# ----------------------------------------------------------------------------- + usage = "%prog [options] DIRECTORY ... DIRECTORYN" epilog = "example: ./suspicious ../gitcheckout -s .tar -s .gz -s .bmp -s .zip -s .ppt -s .docx -s .pdf -s .xls -s .xlsx -s .gif -s .png -s .jpg -s .css -r fw -w cryptology.txt -c -p -l 3" parser = OptionParser(usage = usage, epilog = epilog) -parser.add_option("-f", "--file", - dest="suspiciousfilename", - help="specify file to scan", action="append") parser.add_option("-w", "--wordlist", dest="wordlistfilename", help="file containing all of the words to look for") @@ -285,27 +362,11 @@ if options.displaysummary and options.summaryfile: exit() -#Run a serarch if not displaying a existing report +#Run a search if not displaying a existing report if len(args) > 0: for a in args: - for (path, dirs, files) in os.walk(a): - if 'CVS' in dirs: - dirs.remove('CVS') - if '.git' in dirs: - dirs.remove('.git') - if '.bzr' in dirs: - dirs.remove('.bzr') - if '.hg' in dirs: - dirs.remove('.hg') - if '.svn' in dirs: - dirs.remove('.svn') - - for file in files: - filelist.append(path + '/' + file) + filelist.extend(mkfilelist(a)) -if options.suspiciousfilename: - filelist += options.suspiciousfilename - start = datetime.datetime.now() for file in filelist: if skipfile(file, options.skipfileextensions): |