From 7c83421ce723aa211168b6fb46d5f8ec7d187a88 Mon Sep 17 00:00:00 2001 From: Daniel Gnoutcheff Date: Fri, 29 Sep 2017 12:32:12 -0400 Subject: suspicious: use YAML for summary files --- suspicious | 31 ++++++++++++------------------- suspicious.mak | 2 +- 2 files changed, 13 insertions(+), 20 deletions(-) diff --git a/suspicious b/suspicious index 2ab782b..8140254 100755 --- a/suspicious +++ b/suspicious @@ -17,9 +17,9 @@ # Authors: Marc Jones , # Daniel Gnoutcheff -# Date: June 2016 -# Version 0.3.0 -# Added AVFS support +# Date: October 2017 +# Version 0.4.0 +# Use YAML for summary files ##TODO #need to verify that word score counts each instance, not just 0 or 1 @@ -35,6 +35,12 @@ import string import subprocess import chardet +from yaml import load, dump +try: + from yaml import CLoader as Loader, CDumper as Dumper +except ImportError: + from yaml import Loader, Dumper + import avfs report = {} @@ -287,21 +293,7 @@ if options.displaysummary and options.summaryfile: except: print("no summary file: " + options.summaryfile) exit() - #sample input - #../bzr.lf/lsb/devel/build_env/headers/x86-64/4.1/glib-2.0/gio/gmenuexporter.h.defs(1): export(1); - for line in summaryfile: - #find the file name which is before the matching parathsis before the last colon on the line - filename = line[:line[:line.rfind(':')].rfind('(')] - #find the total number of words found by locating the end of the filename and taking the number in parathesis right before the : - totalfilecount = line[line[:line.rfind(':')].rfind('(')+1:line[:line.rfind(':')].rfind(')')] - #find the list of words following the :, and split them by the ;, and then drop the last item on the list which is always a \n - foundwords = line[line.rfind(':')+1:].split(';')[:-1] - report[filename] = dict() - for w in foundwords: - w = w.strip() - word = w[:w.find('(')] - wcount = w[w.find('(')+1:w.find(')')] - report[filename][word] = int(wcount) + report = load(summaryfile, Loader=Loader) if options.commonwordfilename and not(options.dontweightreport): report, weightedfiles = weightreport(report, commonwords) @@ -379,7 +371,8 @@ if options.summaryfile and len(filelist) > 0 and not options.displaysummary: try: if counter > 1: print("saving as " + summaryfilename + "....") summaryfile = open(summaryfilename, 'w+') - summaryfile.write(summary(report)) + + summaryfile.write(dump(report, Dumper=Dumper)) summaryfile.close() except: print(report) diff --git a/suspicious.mak b/suspicious.mak index 734ebce..00751b6 100644 --- a/suspicious.mak +++ b/suspicious.mak @@ -5,5 +5,5 @@ suspicious_checkout = ../tools all: $(reportlist) %.suspicious: - ${suspicious_checkout}/suspicious -s .yml -s .bmp -s .ppt -s .docx -s .pdf -s .xls -s .xlsx -s .gif -s .png -s .jpg -s .css -s .md -s .rst -r fw -w ${suspicious_checkout}/cryptology.txt -c -l 3 $* >$@ + ${suspicious_checkout}/suspicious -s .yml -s .bmp -s .ppt -s .docx -s .pdf -s .xls -s .xlsx -s .gif -s .png -s .jpg -s .css -s .md -s .rst -r fw -w ${suspicious_checkout}/cryptology.txt -c -l 3 --summary-file $*.yml $* >$@ -- cgit v1.2.3