summaryrefslogtreecommitdiff
path: root/main.py
diff options
context:
space:
mode:
authorMarc Jones <mjones@softwarefreedom.org>2014-02-05 10:07:54 -0500
committerMarc Jones <mjones@softwarefreedom.org>2014-02-05 10:07:54 -0500
commit4a33223eba7cf5cf66ccdf98abb1f0c81808cfc3 (patch)
tree18f3c798eea1955a750db5fbe0cccf37f834c37b /main.py
parent943dbf041031f6c940b8f57dea12820b362bfde4 (diff)
merging summary.py into main.py
Diffstat (limited to 'main.py')
-rwxr-xr-xmain.py92
1 files changed, 75 insertions, 17 deletions
diff --git a/main.py b/main.py
index ae36453..22da098 100755
--- a/main.py
+++ b/main.py
@@ -27,6 +27,37 @@ def printscore(report):
for i in report:
print i[0] + ':' + str(i[1])
+def scorewords(report):
+ for file in report.keys():
+ for word in report[file].keys():
+ if not word in wordscore:
+ wordscore[word] = 0
+ if not file in filescore:
+ filescore[file] = 0
+ wordscore[word] += report[file][word]
+ return wordscore
+
+def scorefile(report):
+ for file in report.keys():
+ for word in report[file].keys():
+ if not word in wordscore:
+ wordscore[word] = 0
+ if not file in filescore:
+ filescore[file] = 0
+ filescore[file] += report[file][word]
+ return filescore
+
+def summary(report):
+ filescore = scorefile(report)
+ text = ""
+ for file in sortscore(filescore):
+ text += file[0] + '(' + str(file[1]) + '):'
+ for word in report[file[0]].keys():
+ if report[file[0]][word] > 0:
+ text += word + '(' + str(report[file[0]][word]) + ');'
+ text += '\n'
+ return text
+
def wholeword(word, string):
re.purge()
matches = []
@@ -68,6 +99,8 @@ parser.add_option("--show-wordlist", dest="show_wordlist", default=False, help="
parser.add_option("-c", "--display-counts", dest="display_counts", default=False, help="Show the num ber of files processed", action="store_true")
parser.add_option("-p", "--display_progress", dest="display_progress", default=False, help="show percentage complete", action="store_true")
parser.add_option("-l", "--max-wholeword-length", dest="maxwholewordlength", type="int", default=-1, help="maximun length of a word allowed to only find matches on whole word")
+parser.add_option("-o", "--summary-file", dest="summaryfile", help="name of the file to store the summary in")
+parser.add_option("-x", "--display-summary", dest="displaysummary", default=False, help="Display a summary from the summary file", action="store_true")
(options, args) = parser.parse_args()
@@ -76,6 +109,33 @@ if options.wordlistfilename:
if options.show_wordlist: print wordlist; exit()
+if options.displaysummary and options.summaryfile
+ report = dict()
+ try:
+ summaryfile = open(options.summaryfile)
+ except:
+ print "no summary file: " + options.summaryfile
+ exit()
+ #sample input
+ #../bzr.lf/lsb/devel/build_env/headers/x86-64/4.1/glib-2.0/gio/gmenuexporter.h.defs(1): export(1);
+ for line in summaryfile:
+ #find the file name which is before the matching parathsis before the last colon on the line
+ filename = line[:line[:line.rfind(':')].rfind('(')]
+ #find the total number of words found by locating the end of the filename and taking the number in parathesis right before the :
+ totalfilecount = line[line[:line.rfind(':')].rfind('(')+1:line[:line.rfind(':')].rfind(')')]
+ #find the list of words following the :, and split them by the ;, and then drop the last item on the list which is always a \n
+ foundwords = line[line.rfind(':')+1:].split(';')[:-1]
+ report[filename] = dict()
+ for w in foundwords:
+ w = w.strip()
+ word = w[:w.find('(')]
+ wcount = w[w.find('(')+1:w.find(')')]
+ report[filename][word] = wcount
+
+ print summary(report)
+ exit()
+
+
for a in args:
#filelist.append(a)
for (path, dirs, files) in os.walk(a):
@@ -122,27 +182,15 @@ for file in filelist:
for k in filecontentsscore.keys():
report[file][k] = filenamescore[k] + filecontentsscore[k]
-for file in report.keys():
- for word in report[file].keys():
- if not word in wordscore:
- wordscore[word] = 0
- if not file in filescore:
- filescore[file] = 0
- wordscore[word] += report[file][word]
- filescore[file] += report[file][word]
+
if options.printreport:
if options.printreport == "f":
- printscore(sortscore(filescore))
+ printscore(sortscore(scorefile(report)))
elif options.printreport == "wf" or options.printreport == "fw":
- for file in sortscore(filescore):
- print file[0] + '(' + str(file[1]) + '):',
- for word in report[file[0]].keys():
- if report[file[0]][word] > 0:
- print word + '(' + str(report[file[0]][word]) + ');',
- print ""
+ print summary(report)
else:
- printscore(sortscore(wordscore))
+ printscore(sortscore(scorewords(report)))
if options.display_counts:
print "total files:" + str(len(filelist)) ,
@@ -150,7 +198,17 @@ if options.display_counts:
print "skipped files:" + str(skipped) ,
print "searched:" + str(datasize) + 'B',
print "time:" + str(datetime.datetime.now() - start).split('.')[0]
-
+
+if options.summaryfile and len(filelist) > 0 and not options.displaysummary:
+ summaryfilename = options.summaryfile
+ counter = None
+ while os.path.isfile(summaryfilename)
+ counter +=1
+ summaryfilename = options.summaryfile + '.' + str(counter)
+ summaryfile = open(summaryfile, 'w+')
+ summaryfile.write(summary(report))
+ summaryfile.close()
+
def test():
print wholeword("ear","bearth")
print wholeword("ear","BearTH")