From 4a33223eba7cf5cf66ccdf98abb1f0c81808cfc3 Mon Sep 17 00:00:00 2001 From: Marc Jones Date: Wed, 5 Feb 2014 10:07:54 -0500 Subject: merging summary.py into main.py --- main.py | 92 +++++++++++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 75 insertions(+), 17 deletions(-) (limited to 'main.py') diff --git a/main.py b/main.py index ae36453..22da098 100755 --- a/main.py +++ b/main.py @@ -27,6 +27,37 @@ def printscore(report): for i in report: print i[0] + ':' + str(i[1]) +def scorewords(report): + for file in report.keys(): + for word in report[file].keys(): + if not word in wordscore: + wordscore[word] = 0 + if not file in filescore: + filescore[file] = 0 + wordscore[word] += report[file][word] + return wordscore + +def scorefile(report): + for file in report.keys(): + for word in report[file].keys(): + if not word in wordscore: + wordscore[word] = 0 + if not file in filescore: + filescore[file] = 0 + filescore[file] += report[file][word] + return filescore + +def summary(report): + filescore = scorefile(report) + text = "" + for file in sortscore(filescore): + text += file[0] + '(' + str(file[1]) + '):' + for word in report[file[0]].keys(): + if report[file[0]][word] > 0: + text += word + '(' + str(report[file[0]][word]) + ');' + text += '\n' + return text + def wholeword(word, string): re.purge() matches = [] @@ -68,6 +99,8 @@ parser.add_option("--show-wordlist", dest="show_wordlist", default=False, help=" parser.add_option("-c", "--display-counts", dest="display_counts", default=False, help="Show the num ber of files processed", action="store_true") parser.add_option("-p", "--display_progress", dest="display_progress", default=False, help="show percentage complete", action="store_true") parser.add_option("-l", "--max-wholeword-length", dest="maxwholewordlength", type="int", default=-1, help="maximun length of a word allowed to only find matches on whole word") +parser.add_option("-o", "--summary-file", dest="summaryfile", help="name of the file to store the summary in") +parser.add_option("-x", "--display-summary", dest="displaysummary", default=False, help="Display a summary from the summary file", action="store_true") (options, args) = parser.parse_args() @@ -76,6 +109,33 @@ if options.wordlistfilename: if options.show_wordlist: print wordlist; exit() +if options.displaysummary and options.summaryfile + report = dict() + try: + summaryfile = open(options.summaryfile) + except: + print "no summary file: " + options.summaryfile + exit() + #sample input + #../bzr.lf/lsb/devel/build_env/headers/x86-64/4.1/glib-2.0/gio/gmenuexporter.h.defs(1): export(1); + for line in summaryfile: + #find the file name which is before the matching parathsis before the last colon on the line + filename = line[:line[:line.rfind(':')].rfind('(')] + #find the total number of words found by locating the end of the filename and taking the number in parathesis right before the : + totalfilecount = line[line[:line.rfind(':')].rfind('(')+1:line[:line.rfind(':')].rfind(')')] + #find the list of words following the :, and split them by the ;, and then drop the last item on the list which is always a \n + foundwords = line[line.rfind(':')+1:].split(';')[:-1] + report[filename] = dict() + for w in foundwords: + w = w.strip() + word = w[:w.find('(')] + wcount = w[w.find('(')+1:w.find(')')] + report[filename][word] = wcount + + print summary(report) + exit() + + for a in args: #filelist.append(a) for (path, dirs, files) in os.walk(a): @@ -122,27 +182,15 @@ for file in filelist: for k in filecontentsscore.keys(): report[file][k] = filenamescore[k] + filecontentsscore[k] -for file in report.keys(): - for word in report[file].keys(): - if not word in wordscore: - wordscore[word] = 0 - if not file in filescore: - filescore[file] = 0 - wordscore[word] += report[file][word] - filescore[file] += report[file][word] + if options.printreport: if options.printreport == "f": - printscore(sortscore(filescore)) + printscore(sortscore(scorefile(report))) elif options.printreport == "wf" or options.printreport == "fw": - for file in sortscore(filescore): - print file[0] + '(' + str(file[1]) + '):', - for word in report[file[0]].keys(): - if report[file[0]][word] > 0: - print word + '(' + str(report[file[0]][word]) + ');', - print "" + print summary(report) else: - printscore(sortscore(wordscore)) + printscore(sortscore(scorewords(report))) if options.display_counts: print "total files:" + str(len(filelist)) , @@ -150,7 +198,17 @@ if options.display_counts: print "skipped files:" + str(skipped) , print "searched:" + str(datasize) + 'B', print "time:" + str(datetime.datetime.now() - start).split('.')[0] - + +if options.summaryfile and len(filelist) > 0 and not options.displaysummary: + summaryfilename = options.summaryfile + counter = None + while os.path.isfile(summaryfilename) + counter +=1 + summaryfilename = options.summaryfile + '.' + str(counter) + summaryfile = open(summaryfile, 'w+') + summaryfile.write(summary(report)) + summaryfile.close() + def test(): print wholeword("ear","bearth") print wholeword("ear","BearTH") -- cgit v1.2.3