From 943dbf041031f6c940b8f57dea12820b362bfde4 Mon Sep 17 00:00:00 2001
From: Marc Jones <mjones@softwarefreedom.org>
Date: Wed, 5 Feb 2014 09:37:07 -0500
Subject: adding summary.py

---
 encryption-high.txt | 205 ----------------------------------------------------
 main.py             |  62 +++++++++++++---
 2 files changed, 50 insertions(+), 217 deletions(-)
 delete mode 100644 encryption-high.txt

diff --git a/encryption-high.txt b/encryption-high.txt
deleted file mode 100644
index 575e269..0000000
--- a/encryption-high.txt
+++ /dev/null
@@ -1,205 +0,0 @@
-sasl
-blowfish
-cast-128
-cast 128
-p-array
-salsa20
-threefish
-rijndael
-yarrow
-fortuna
-tyche
-export
-tsu
-enc
-encryption
-crypt
-crypto
-cryptography
-cryptographically
-cipher
-ca
-public-key
-public key
-IDEA
-newdes
-safer
-cas5
-feal
-gost 28147-89
-gost
-skipjack
-des-x
-gdes
-digitial signature
-plaintext
-ciphertext
-lookup table
-permutation
-cipher
-key-generation
-block cipher
-des
-3des
-triple des
-aes
-nist
-rc6
-mars
-serpent
-subkey
-block size
-feistel
-blowfish
-s-box
-s box
-twofish
-key size
-finite field
-key schedule
-key length
-mix columns
-diffusion
-ecb
-cbc
-nonce
-digital rights management
-tdea
-triple dea
-tiger
-tiger/128
-tiger/160
-openpgp
-skein
-cubehash
-lucifer
-misty1
-feal
-ice
-kasumi
-loki97
-magenta
-sms4
-rc2
-rc6
-clefia
-key whitening
-lai-massey
-lai massey
-p box
-crypto
-simonxtea
-blake
-drm
-ofb
-ctr
-s/mime
-smime
-elliptic curve
-elliptic
-ecc
-ipes
-ipsec
-BassOmatic
-pskc
-pkcs
-ocb
-ccm
-cwc
-gcm
-symmetric
-symmetric key
-asymmetric key
-secure hash
-md5
-sha
-sha-1
-sha-224
-sha-256
-sha-384
-sha-512
-cbc-mac
-cmac
-hmac
-gmac
-secure channel
-diffie-hellman
-diffie
-hellman
-dh
-rsa
-alice
-bob
-negotiation
-pki
-kerberos
-kerb
-vpn
-certificate
-cert
-private key
-public key
-secret key
-revocation
-auth
-authn
-authnz
-authorization
-authenication
-root key
-key size
-salting
-secure token
-tls
-ssl
-https
-ftps
-ssh
-sftp
-pgp
-gpg
-pretty good privacy
-privacy
-decrypt
-secret
-hash
-signature
-parity
-secure
-ear
-ofac
-munition
-rnd
-random
-prng
-pseudorandom
-entropy
-mac
-authority
-password
-salt
-padding
-aes-256
-aes-128
-aes
-fips 140
-fips-140
-fips 140-2
-fips-140-2
-fips 197
-cavp
-whirlpool
-decipher
-encipher
-key length
-key expansion
-round key
-aes-192
-sbox
-openssl
-url
-443
-22
-8443
-ldaps
-636
diff --git a/main.py b/main.py
index 473946d..ae36453 100755
--- a/main.py
+++ b/main.py
@@ -2,11 +2,17 @@
 from optparse import OptionParser
 import os
 import re
+import sys
+import datetime
+
 report = {}
 wordscore = {}
 filescore = {}
 filelist = list()
 skipped = 0
+opened = 0
+datasize = 0
+progresstext = "" 
 
 def sortscore(score, reverse=False):
 	sortedscore = sorted(score.items(), key=lambda score: score[1], reverse=reverse)
@@ -22,12 +28,14 @@ def printscore(report):
 		print i[0] + ':' + str(i[1])
 
 def wholeword(word, string):
+	re.purge()
 	matches = []
-	regexU = r'([A-Z]|[^a-zA-Z]|\b)(' + word.lower() + r')([A-Z]|[^a-zA-Z]|\b)'
-	regexL = r'([a-z]|[^a-zA-Z]|\b)(' + word.upper() + r')([a-z]|[^a-zA-Z]|\b)'
+	regexU = r'([A-Z]|[^a-zA-Z]|\b)(' + re.escape(word.lower()) + r')([A-Z]|[^a-zA-Z]|\b)'
+	regexL = r'([a-z]|[^a-zA-Z]|\b)(' + re.escape(word.upper()) + r')([a-z]|[^a-zA-Z]|\b)'
 	mU = re.search(regexU, string)
 	if "groups" in dir(mU):
 		matches.append(mU.groups())
+	re.purge()
 	mL = re.search(regexL, string)
 	if "groups" in dir(mL):
 		matches.append(mL.groups())
@@ -41,11 +49,13 @@ def skipfile(filename,skippedexts):
 			return True
 	return False
 
-def scoretext(wordlist, text):
+def scoretext(wordlist, text, maxwholewordlen = -1):
 	score = {}
 	for word in wordlist:
-		score[word] = len(wholeword(word,text))
-	
+		if int(len(word)) > int(maxwholewordlen): 
+			score[word] = text.lower().count(word.lower())
+		else:
+			score[word] = len(wholeword(word,text))
 	return score
 
 parser = OptionParser()
@@ -56,22 +66,37 @@ parser.add_option("-v", "--verbose", dest="verbose", help="print verberose infor
 parser.add_option("-r", "--report", dest="printreport", default="w", help="print score")
 parser.add_option("--show-wordlist", dest="show_wordlist", default=False, help="print list of words to detect", action="store_true")
 parser.add_option("-c", "--display-counts", dest="display_counts", default=False, help="Show the num ber of files processed", action="store_true")
+parser.add_option("-p", "--display_progress", dest="display_progress", default=False, help="show percentage complete", action="store_true")
+parser.add_option("-l", "--max-wholeword-length", dest="maxwholewordlength", type="int", default=-1, help="maximun length of a word allowed to only find matches on whole word")
 
 (options, args) = parser.parse_args()
 
 if options.wordlistfilename:
-	wordlist = open(options.wordlistfilename).read().lower().strip().split('\n')
+	wordlist = list(set(open(options.wordlistfilename).read().lower().strip().split('\n')))
 			
 if options.show_wordlist: print wordlist; exit()
 
 for a in args:
+	#filelist.append(a)
 	for (path, dirs, files) in os.walk(a):
+		if 'CVS' in dirs:
+			dirs.remove('CVS')
+		if '.git' in dirs:
+			dirs.remove('.git')
+		if '.bzr' in dirs:
+			dirs.remove('.bzr')
+		if '.hg' in dirs:
+			dirs.remove('.hg')
+		if '.svn' in dirs:
+			dirs.remove('.svn')
+	
 		for file in files:
 			filelist.append(path + '/' + file)
 	
 if options.suspiciousfilename:
 	filelist += options.suspiciousfilename
 
+start = datetime.datetime.now()
 for file in filelist:
 	if skipfile(file, options.skipfileextensions):
 		skipped += 1
@@ -81,10 +106,21 @@ for file in filelist:
 	except:
 		print "failed to open: " + file
 		continue
-	
+	opened +=1
+	now = datetime.datetime.now()
+	estimate = (((now - start) / (opened + skipped)) * len(filelist)) 
+	if options.display_progress: 
+		print '\r' + " " * len(progresstext) + '\r',
+		progresstext = str(((opened + skipped)*1.0/len(filelist))*100)[:5] + '% '+ " time left:" + str(estimate).split('.')[0] + ' ' + file + '\r'
+		print progresstext,
+	sys.stdout.flush()
 	filecontents = f.read()
-			
-	report[file] = scoretext(wordlist, filecontents)
+	datasize += len(filecontents)		
+	filenamescore = scoretext(wordlist, file, options.maxwholewordlength)
+	filecontentsscore = scoretext(wordlist, filecontents, options.maxwholewordlength)
+	report[file] = {}
+	for k in filecontentsscore.keys():
+		report[file][k] = filenamescore[k] + filecontentsscore[k]
 
 for file in report.keys():
 	for word in report[file].keys():
@@ -109,9 +145,11 @@ if options.printreport:
 		printscore(sortscore(wordscore))
 
 if options.display_counts:
-	print "total files: " + str(len(filelist)) ,
-	print "suspicious files: " + str(len(sortscore(filescore))) ,
-	print "skipped files: " + str(skipped)
+	print "total files:" + str(len(filelist)) ,
+	print "suspicious files:" + str(len(sortscore(filescore))) ,
+	print "skipped files:" + str(skipped) ,
+	print "searched:" + str(datasize) + 'B', 
+	print "time:" + str(datetime.datetime.now() - start).split('.')[0]
  
 def test():
 	print wholeword("ear","bearth")
-- 
cgit v1.2.3