summaryrefslogtreecommitdiff
path: root/main.py~
diff options
context:
space:
mode:
authorMarc Jones <mjones@softwarefreedom.org>2014-02-05 17:09:01 -0500
committerMarc Jones <mjones@softwarefreedom.org>2014-02-05 17:09:01 -0500
commit02f79fecf9fa49f3271beb0fc7e3c859e1920979 (patch)
tree0e74bf02d13e63c85eec74e796f183dd92a7655d /main.py~
parent26b444b0277d5253f0ee9ca3325287cc6eb1b8d5 (diff)
updated crpto word list
Diffstat (limited to 'main.py~')
-rwxr-xr-xmain.py~264
1 files changed, 0 insertions, 264 deletions
diff --git a/main.py~ b/main.py~
deleted file mode 100755
index 9dc8af5..0000000
--- a/main.py~
+++ /dev/null
@@ -1,264 +0,0 @@
-#!/usr/bin/python
-#example command
-
-from optparse import OptionParser
-import os
-import re
-import sys
-import datetime
-import string
-
-report = {}
-wordscore = {}
-filescore = {}
-filelist = list()
-skipped = 0
-opened = 0
-datasize = 0
-progresstext = ""
-
-def sortscore(score, reverse=False):
- sortedscore = sorted(score.items(), key=lambda score: score[1], reverse=reverse)
- returnscore = []
- for s in sortedscore:
- if s[1] > 0:
- returnscore.append(s)
-
- return returnscore
-
-def printscore(report):
- for i in report:
- print i[0] + ':' + str(i[1])
-
-def scorewords(report):
- for file in report.keys():
- for word in report[file].keys():
- if not word in wordscore:
- wordscore[word] = 0
- if not file in filescore:
- filescore[file] = 0
- wordscore[word] += report[file][word]
- return wordscore
-
-def scorefile(report):
- for file in report.keys():
- for word in report[file].keys():
- if not word in wordscore:
- wordscore[word] = 0
- if not file in filescore:
- filescore[file] = 0
- filescore[file] += report[file][word]
- return filescore
-
-def summary(report):
- filescore = scorefile(report)
- text = ""
- for file in sortscore(filescore):
- text += file[0] + '(' + str(file[1]) + '):'
- for word in report[file[0]].keys():
- if report[file[0]][word] > 0:
- text += word + '(' + str(report[file[0]][word]) + ');'
- text += '\n'
- return text
-
-def wholeword(word, string):
- re.purge()
- matches = []
-
- if word.isdigit():
- int(word)
- regexNum = r'([^0-9]|\b)(' + word + r')([^0-9]|\b)'
- mN = re.search(regexNum, string)
- if "groups" in dir(mN):
- matches.append(mN.groups())
-
- else:
- regexU = r'([A-Z]|[^a-zA-Z]|\b)(' + word.lower() + r')([A-Z]|[^a-zA-Z]|\b)'
- regexL = r'([a-z]|[^a-zA-Z]|\b)(' + word.upper() + r')([a-z]|[^a-zA-Z]|\b)'
- mU = re.search(regexU, string)
- if "groups" in dir(mU):
- matches.append(mU.groups())
- re.purge()
- mL = re.search(regexL, string)
- if "groups" in dir(mL):
- matches.append(mL.groups())
- return matches
-
-def skipfile(filename,skippedexts):
- if not isinstance(skippedexts, list):
- return False
- for skip in skippedexts:
- if filename.endswith(skip):
- return True
- return False
-
-def scoretext(wordlist, text, maxwholewordlen = -1):
- score = {}
- for word in wordlist:
- wordreg = word.replace('-', ' ')
- wordreg = wordreg.replace(' ', '['+string.punctuation+' ]*')
- if int(len(word)) > int(maxwholewordlen):
- matches = []
- m = re.search(wordreg.lower(),text.lower())
- if "groups" in dir(m):
- matches.append(m.groups())
- score[word] = len(matches)
- else:
- score[word] = len(wholeword(wordreg,text))
- return score
-
-usage = "%prog [options] DIRECTORY ... DIRECTORYN"
-epilog = "example: ./main.py ../git.lf/janitor -s .ppt -s .docx -s .pdf -s .xls -s .xlsx -s .gif -s .png -s .jpg -s .css -r fw -w cryptology.txt -c -p -l 3"
-parser = OptionParser(usage = usage, epilog = epilog)
-parser.add_option("-f", "--file", dest="suspiciousfilename", help="specify file to scan", action="append")
-parser.add_option("-w", "--wordlist", dest="wordlistfilename", help="file containing all of the words to look for")
-parser.add_option("-s", "--skip", dest="skipfileextensions", help="file extensions to skip", action="append")
-parser.add_option("-v", "--verbose", dest="verbose", help="print verberose information", default=False, action="store_true")
-parser.add_option("-r", "--report", dest="printreport", default="wf", help="print score")
-parser.add_option("--show-wordlist", dest="show_wordlist", default=False, help="print list of words to detect", action="store_true")
-parser.add_option("-c", "--display-counts", dest="display_counts", default=False, help="Show the num ber of files processed", action="store_true")
-parser.add_option("-p", "--display_progress", dest="display_progress", default=False, help="show percentage complete", action="store_true")
-parser.add_option("-l", "--max-wholeword-length", dest="maxwholewordlength", type="int", default=-1, help="maximun length of a word allowed to only find matches on whole word")
-parser.add_option("-o", "--summary-file", dest="summaryfile", help="name of the file to store the summary in")
-parser.add_option("-x", "--display-summary", dest="displaysummary", default=False, help="Display a summary from the summary file", action="store_true")
-parser.add_option("-X", "--dont-display-summary", dest="dontdisplaysummary", default=False, help="Dont Display a summary after running a scan", action="store_true")
-
-(options, args) = parser.parse_args()
-
-if options.wordlistfilename:
- wordlist = list(set(open(options.wordlistfilename).read().lower().strip().split('\n')))
-
-if options.show_wordlist: print wordlist; exit()
-
-if options.displaysummary and options.summaryfile:
- report = dict()
- try:
- summaryfile = open(options.summaryfile)
- except:
- print "no summary file: " + options.summaryfile
- exit()
- #sample input
- #../bzr.lf/lsb/devel/build_env/headers/x86-64/4.1/glib-2.0/gio/gmenuexporter.h.defs(1): export(1);
- for line in summaryfile:
- #find the file name which is before the matching parathsis before the last colon on the line
- filename = line[:line[:line.rfind(':')].rfind('(')]
- #find the total number of words found by locating the end of the filename and taking the number in parathesis right before the :
- totalfilecount = line[line[:line.rfind(':')].rfind('(')+1:line[:line.rfind(':')].rfind(')')]
- #find the list of words following the :, and split them by the ;, and then drop the last item on the list which is always a \n
- foundwords = line[line.rfind(':')+1:].split(';')[:-1]
- report[filename] = dict()
- for w in foundwords:
- w = w.strip()
- word = w[:w.find('(')]
- wcount = w[w.find('(')+1:w.find(')')]
- report[filename][word] = int(wcount)
-
- if options.printreport:
- if options.printreport == "f":
- printscore(sortscore(scorefile(report)))
- elif options.printreport == "w":
- printscore(sortscore(scorewords(report)))
- elif options.printreport == "wf" or options.printreport == "fw":
- print summary(report)
- else:
- print summary(report)
- exit()
-
-
-for a in args:
- #filelist.append(a)
- for (path, dirs, files) in os.walk(a):
- if 'CVS' in dirs:
- dirs.remove('CVS')
- if '.git' in dirs:
- dirs.remove('.git')
- if '.bzr' in dirs:
- dirs.remove('.bzr')
- if '.hg' in dirs:
- dirs.remove('.hg')
- if '.svn' in dirs:
- dirs.remove('.svn')
-
- for file in files:
- filelist.append(path + '/' + file)
-
-if options.suspiciousfilename:
- filelist += options.suspiciousfilename
-
-start = datetime.datetime.now()
-for file in filelist:
- if skipfile(file, options.skipfileextensions):
- skipped += 1
- continue
- try:
- f = open(file)
- except:
- print "failed to open: " + file
- continue
- opened +=1
- now = datetime.datetime.now()
- estimate = (((now - start) / (opened + skipped)) * len(filelist))
- if options.display_progress:
- print '\r' + " " * len(progresstext) + '\r',
- progresstext = str(((opened + skipped)*1.0/len(filelist))*100)[:5] + '% '+ " time left:" + str(estimate).split('.')[0] + ' ' + file + '\r'
- print progresstext,
- sys.stdout.flush()
- filecontents = f.read()
- datasize += len(filecontents)
- filenamescore = scoretext(wordlist, file, options.maxwholewordlength)
- filecontentsscore = scoretext(wordlist, filecontents, options.maxwholewordlength)
- report[file] = {}
- for k in filecontentsscore.keys():
- report[file][k] = filenamescore[k] + filecontentsscore[k]
-
-if options.display_progress:
- print '\r' + " " * len(progresstext) + '\r',
-
-if options.printreport and not options.dontdisplaysummary:
- if options.printreport == "f":
- printscore(sortscore(scorefile(report)))
- elif options.printreport == "wf" or options.printreport == "fw":
- print summary(report)
- else:
- printscore(sortscore(scorewords(report)))
-
-if options.display_counts:
- print "total files:" + str(len(filelist)) ,
- print "suspicious files:" + str(len(sortscore(scorefile(report)))) ,
- print "skipped files:" + str(skipped) ,
- print "searched:" + str(datasize) + 'B',
- print "time:" + str(datetime.datetime.now() - start).split('.')[0]
-
-if options.summaryfile and len(filelist) > 0 and not options.displaysummary:
- summaryfilename = options.summaryfile
- counter = 0
- while os.path.isfile(summaryfilename):
- counter +=1
- summaryfilename = options.summaryfile + '.' + str(counter)
- try:
- if counter > 1: print "saving as " + summaryfilename + "...."
- summaryfile = open(summaryfilename, 'w+')
- summaryfile.write(summary(report))
- summaryfile.close()
- except:
- print report
- print "error saving summary as " + summaryfilename
-
-
-def test():
- print wholeword("22", "port22")
- print wholeword("22", "22")
- print wholeword("22", ":22'")
- print wholeword("22", "223")
- print wholeword("22", "open('22')")
- print wholeword("ear","bearth")
- print wholeword("ear","BearTH")
- print wholeword("ear","bEARth")
- print wholeword("ear","ear_")
- print wholeword("ear","ear()")
- print wholeword("ear","ear.")
- print wholeword("ear","ear:")
- print wholeword("ear","ear\n\r")
- print wholeword("ear","myEAR() MYear: myEAR()")
-
-#test()