From 7c83421ce723aa211168b6fb46d5f8ec7d187a88 Mon Sep 17 00:00:00 2001
From: Daniel Gnoutcheff <gnoutchd@softwarefreedom.org>
Date: Fri, 29 Sep 2017 12:32:12 -0400
Subject: suspicious: use YAML for summary files

---
 suspicious     | 31 ++++++++++++-------------------
 suspicious.mak |  2 +-
 2 files changed, 13 insertions(+), 20 deletions(-)

diff --git a/suspicious b/suspicious
index 2ab782b..8140254 100755
--- a/suspicious
+++ b/suspicious
@@ -17,9 +17,9 @@
 
 # Authors: Marc Jones <mjones@softwarefreedom.org>, 
 #          Daniel Gnoutcheff <gnoutchd@softwarefreedom.org>
-# Date: June 2016
-# Version 0.3.0
-# Added AVFS support
+# Date: October 2017
+# Version 0.4.0
+# Use YAML for summary files
 
 ##TODO
 #need to verify that word score counts each instance, not just 0 or 1
@@ -35,6 +35,12 @@ import string
 import subprocess
 import chardet
 
+from yaml import load, dump
+try:
+    from yaml import CLoader as Loader, CDumper as Dumper
+except ImportError:
+    from yaml import Loader, Dumper
+
 import avfs
 
 report = {}
@@ -287,21 +293,7 @@ if options.displaysummary and options.summaryfile:
 	except:
 		print("no summary file: " + options.summaryfile)
 		exit()
-	#sample input
-	#../bzr.lf/lsb/devel/build_env/headers/x86-64/4.1/glib-2.0/gio/gmenuexporter.h.defs(1): export(1);
-	for line in summaryfile:
-		#find the file name which is before the matching parathsis before the last colon on the line
-		filename = line[:line[:line.rfind(':')].rfind('(')]
-		#find the total number of words found by locating the end of the filename and taking the number in parathesis right before the :
-		totalfilecount = line[line[:line.rfind(':')].rfind('(')+1:line[:line.rfind(':')].rfind(')')]
-		#find the list of words following the :, and split them by the ;, and then drop the last item on the list which is always a \n
-		foundwords = line[line.rfind(':')+1:].split(';')[:-1]
-		report[filename] = dict()		
-		for w in foundwords:
-			w = w.strip()
-			word = w[:w.find('(')]
-			wcount = w[w.find('(')+1:w.find(')')]		
-			report[filename][word] = int(wcount)
+	report = load(summaryfile, Loader=Loader)
 
 	if options.commonwordfilename and not(options.dontweightreport):
 		report, weightedfiles = weightreport(report, commonwords)
@@ -379,7 +371,8 @@ if options.summaryfile and len(filelist) > 0 and not options.displaysummary:
 	try:
 		if counter > 1: print("saving as " + summaryfilename + "....")
 		summaryfile = open(summaryfilename, 'w+')
-		summaryfile.write(summary(report))
+
+		summaryfile.write(dump(report, Dumper=Dumper))
 		summaryfile.close()		
 	except:
 		print(report)
diff --git a/suspicious.mak b/suspicious.mak
index 734ebce..00751b6 100644
--- a/suspicious.mak
+++ b/suspicious.mak
@@ -5,5 +5,5 @@ suspicious_checkout = ../tools
 all: $(reportlist)
 
 %.suspicious:
-	${suspicious_checkout}/suspicious -s .yml -s .bmp -s .ppt -s .docx -s .pdf -s .xls -s .xlsx -s .gif -s .png -s .jpg -s .css -s .md -s .rst -r fw -w ${suspicious_checkout}/cryptology.txt -c -l 3 $* >$@
+	${suspicious_checkout}/suspicious -s .yml -s .bmp -s .ppt -s .docx -s .pdf -s .xls -s .xlsx -s .gif -s .png -s .jpg -s .css -s .md -s .rst -r fw -w ${suspicious_checkout}/cryptology.txt -c -l 3 --summary-file $*.yml $* >$@
 
-- 
cgit v1.2.3