summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Gnoutcheff <gnoutchd@softwarefreedom.org>2016-06-29 14:58:33 -0400
committerDaniel Gnoutcheff <gnoutchd@softwarefreedom.org>2016-06-29 14:58:33 -0400
commit66f706f3669af849397b408f2522db5d7d5f2ff3 (patch)
tree8db4dee67311134036c6a4a591a1701fae44a7bd
parent10b7b82dd70ea48ec16d1bc9d67baf49ebef4308 (diff)
preliminary avfs support
-rwxr-xr-xsuspicious117
1 files changed, 89 insertions, 28 deletions
diff --git a/suspicious b/suspicious
index 487a844..d4ea346 100755
--- a/suspicious
+++ b/suspicious
@@ -1,6 +1,6 @@
#!/usr/bin/python
-# Copyright 2014 Software Freedom Law Center (www.softwarefreedom.org)
+# Copyright 2014, 2016 Software Freedom Law Center (www.softwarefreedom.org)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -15,18 +15,17 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
-#Author: Marc Jones <mjones@softwarefreedom.org>
-#Date: June 30, 2014
-#Version 0.2.1
-#Added weight score function to remove scores from files that otherwise would not have scores
-#Added Remove superstrings of other words from search to speed things up if they are greater than wordlength
+#Authors: Marc Jones <mjones@softwarefreedom.org>, Daniel Gnoutcheff <gnoutchd@softwarefreedom.org>
+#Date: June 2016
+#Version 0.3.0
+#Added AVFS support
##TODO
#need to verify that word score counts each instance, not just 0 or 1
#need to discount found words if they are substrings of common strings in text
from optparse import OptionParser
-import os
+import os, os.path
import re
import sys
import datetime
@@ -154,12 +153,90 @@ def scoretext(wordlist, text, maxwholewordlen = -1):
score[word] = len(wholeword(wordreg,text))
return score
+
+# AVFS stuff ------------------------------------------------------------------
+
+# AVFS has its own automatic view selection using file extensions, but it
+# includes plugins (like #patch) that will lead us into an infinite loop
+# if we try to do a directory traversal. Also, there are a few
+# extensions we want to add.
+
+avfscmds = {
+ ('.gz', '#ugz'),
+ ('.tgz', '#ugz#utar'),
+ ('.tar.bz2', '#ubz2#utar'),
+ ('.bz2', '#ubz2'),
+ ('.bz', '#ubz2'),
+ ('.tbz2', '#ubz2#utar'),
+ ('.tbz', '#ubz2#utar'),
+ ('.Z', '#uz'),
+ ('.tpz', '#uz#utar'),
+ ('.tz', '#uz#utar'),
+ ('.taz', '#uz#utar'),
+ ('.a', '#uar'),
+ ('.deb', '#uar'),
+ ('.tar', '#utar'),
+ ('.gem', '#utar'), # Add upstream
+ ('.rar', '#urar'),
+ ('.sfx', '#urar'),
+ ('.zip', '#uzip'),
+ ('.jar', '#uzip'),
+ ('.ear', '#uzip'),
+ ('.war', '#uzip'),
+ ('.nupkg', '#uzip'), # Add upstream
+ ('.whl', '#uzip'), # Add upstream
+ ('.7z', '#u7z'),
+ ('.zoo', '#uzoo'),
+ ('.lha', '#ulha'),
+ ('.lhz', '#ulha'),
+ ('.arj', '#uarj'),
+ ('.cpio', '#ucpio'),
+ ('.rpm', '#rpm'),
+ ('.tar.xz', '#uxze#utar'),
+ ('.txz', '#uxze#utar'),
+ ('.xz', '#uxze'),
+ ('.lzma', '#uxze'),
+}
+
+def avfs_guesscmd(filename):
+ for ext, cmd in avfscmds:
+ if filename.endswith(ext):
+ return cmd + avfs_guesscmd(filename[:-len(ext)])
+ return ''
+
+def mkfilelist(rootdir):
+ """
+ Produce a list of files to examine. Use AVFS paths if available.
+
+ rootdir: path to directory to examine, as a string. Preferably
+ somewhere inside an AVFS mount.
+ """
+ prunedirs = {'CVS', '.git', '.bzr', '.hg', '.svn'}
+
+ for base, dirs, files in os.walk(rootdir):
+ for dname in dirs:
+ if dname in prunedirs:
+ dirs.remove(dname)
+
+ for fname in files:
+ fpath = base + '/' + fname
+
+ view_fname = fname + avfs_guesscmd(fname)
+ view_fpath = base + '/' + view_fname
+
+ if fname != view_fname and os.path.exists(view_fpath):
+ if os.path.isdir(view_fpath):
+ dirs.append(view_fname)
+ else:
+ yield view_fpath
+ else:
+ yield fpath
+
+# -----------------------------------------------------------------------------
+
usage = "%prog [options] DIRECTORY ... DIRECTORYN"
epilog = "example: ./suspicious ../gitcheckout -s .tar -s .gz -s .bmp -s .zip -s .ppt -s .docx -s .pdf -s .xls -s .xlsx -s .gif -s .png -s .jpg -s .css -r fw -w cryptology.txt -c -p -l 3"
parser = OptionParser(usage = usage, epilog = epilog)
-parser.add_option("-f", "--file",
- dest="suspiciousfilename",
- help="specify file to scan", action="append")
parser.add_option("-w", "--wordlist",
dest="wordlistfilename",
help="file containing all of the words to look for")
@@ -285,27 +362,11 @@ if options.displaysummary and options.summaryfile:
exit()
-#Run a serarch if not displaying a existing report
+#Run a search if not displaying a existing report
if len(args) > 0:
for a in args:
- for (path, dirs, files) in os.walk(a):
- if 'CVS' in dirs:
- dirs.remove('CVS')
- if '.git' in dirs:
- dirs.remove('.git')
- if '.bzr' in dirs:
- dirs.remove('.bzr')
- if '.hg' in dirs:
- dirs.remove('.hg')
- if '.svn' in dirs:
- dirs.remove('.svn')
-
- for file in files:
- filelist.append(path + '/' + file)
+ filelist.extend(mkfilelist(a))
-if options.suspiciousfilename:
- filelist += options.suspiciousfilename
-
start = datetime.datetime.now()
for file in filelist:
if skipfile(file, options.skipfileextensions):