From d7c84268a26a4391af55a3533422367b02737435 Mon Sep 17 00:00:00 2001 From: Daniel Gnoutcheff Date: Mon, 23 Jan 2017 18:38:22 -0500 Subject: Misc. unrecorded changes --- README | 1 + avfs.py | 1 + binls.nfsf.mak | 7 +++++++ cryptology.txt | 1 + findelf.py | 15 +++++++++++++++ findjava.py | 15 +++++++++++++++ lsbin.py | 19 +++++++++++++++++++ suspicious | 52 ++++++++++++++++++++++++++++++++++++++-------------- 8 files changed, 97 insertions(+), 14 deletions(-) create mode 100644 README create mode 100644 binls.nfsf.mak create mode 100755 findelf.py create mode 100755 findjava.py create mode 100755 lsbin.py diff --git a/README b/README new file mode 100644 index 0000000..4c52cdb --- /dev/null +++ b/README @@ -0,0 +1 @@ +A motley collection of (mostly hideously awful) tools we cobbled together while scanning codebases for export-controlled cryptography. diff --git a/avfs.py b/avfs.py index 5ab7bea..c05d7aa 100644 --- a/avfs.py +++ b/avfs.py @@ -69,6 +69,7 @@ avfscmds = { (b'.txz', b'#uxze#utar'), (b'.xz', b'#uxze'), (b'.lzma', b'#uxze'), + (b'.vsix', b'#uzip'), # Add upstream } def guesscmd(filename): diff --git a/binls.nfsf.mak b/binls.nfsf.mak new file mode 100644 index 0000000..865cef8 --- /dev/null +++ b/binls.nfsf.mak @@ -0,0 +1,7 @@ +repos := $(patsubst %/.git,%,$(wildcard */.git)) +binls := $(repos:=.binls) + +suspicious_checkout=/home/gnoutchd/export-ctrl/suspicious + +%.binls: + ${suspicious_checkout}/binls $* >$@ || rm $@ diff --git a/cryptology.txt b/cryptology.txt index f054af5..5b6d01b 100644 --- a/cryptology.txt +++ b/cryptology.txt @@ -611,3 +611,4 @@ rtc ocsp cms gost +passphrase diff --git a/findelf.py b/findelf.py new file mode 100755 index 0000000..addb51d --- /dev/null +++ b/findelf.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 + +import avfs +import os, sys + +def is_elf(fname): + return b'\x7fELF' == avfs.open(fname, 'rb').read(4) + +if __name__ == "__main__": + for fname in avfs.find(os.fsencode(sys.argv[1]), {b'.git'}): + try: + if is_elf(fname): + print(fname.decode('utf-8', 'replace')) + except: + pass diff --git a/findjava.py b/findjava.py new file mode 100755 index 0000000..09f65ac --- /dev/null +++ b/findjava.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 + +import avfs +import os, sys + +def is_jvm_class(fname): + return b'\xca\xfe\xba\xbe' == avfs.open(fname, 'rb').read(4) + +if __name__ == "__main__": + for fname in avfs.find(os.fsencode(sys.argv[1]), {b'.git'}): + try: + if is_jvm_class(fname): + print(fname.decode('utf-8', 'replace')) + except: + pass diff --git a/lsbin.py b/lsbin.py new file mode 100755 index 0000000..e4bd36c --- /dev/null +++ b/lsbin.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python3 + +import avfs +import os, sys + +# https://stackoverflow.com/questions/898669/how-can-i-detect-if-a-file-is-binary-non-text-in-python +textchars = bytearray({7,8,9,10,12,13,27} | set(range(0x20, 0x100)) - {0x7f}) +def is_binary(fname): + head = avfs.open(fname, 'rb').read(1024) + return bool(head.translate(None, textchars)) + +if __name__ == "__main__": + for fname in avfs.find(os.fsencode(sys.argv[1]), {b'.git'}): + try: + if is_binary(fname): + print(fname.decode('utf-8', 'replace')) + except: + pass + diff --git a/suspicious b/suspicious index 72dd9b0..2ab782b 100755 --- a/suspicious +++ b/suspicious @@ -15,10 +15,11 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -#Authors: Marc Jones , Daniel Gnoutcheff -#Date: June 2016 -#Version 0.3.0 -#Added AVFS support +# Authors: Marc Jones , +# Daniel Gnoutcheff +# Date: June 2016 +# Version 0.3.0 +# Added AVFS support ##TODO #need to verify that word score counts each instance, not just 0 or 1 @@ -32,6 +33,8 @@ import sys import datetime import string import subprocess +import chardet + import avfs report = {} @@ -48,6 +51,32 @@ progresstext = "" fsencoding = sys.getfilesystemencoding() def fsdecode_display(bytestring): return str(bytestring, encoding=fsencoding, errors='replace') + + +_def_file_encoding = sys.getdefaultencoding() + +def decode_file(filename): + """ + Return the contents of the file at the given path as a (Unicode) string. + Return None if the file appears to be a binary. + """ + + with avfs.open(filename, 'rb') as filehandle: + contents_raw = filehandle.read() + + try: + return str(contents_raw, encoding=_def_file_encoding) + except UnicodeDecodeError: + pass + + guessed_encoding = chardet.detect(contents_raw)['encoding'] + if not guessed_encoding: + return None + + try: + return str(contents_raw, encoding=guessed_encoding) + except UnicodeDecodeError: + return None def sortscore(score, reverse=True): sortedscore = sorted(score.items(), key=lambda score: score[1], reverse=reverse) @@ -304,10 +333,13 @@ for file in filelist: skipped += 1 continue try: - f = avfs.open(file) - except: + filecontents = decode_file(file) + except OSError: print("failed to open: " + file_displayname) continue + if filecontents is None: + print("possible binary: " + file_displayname) + continue opened +=1 now = datetime.datetime.now() if options.display_progress: @@ -326,14 +358,6 @@ for file in filelist: .format(frac_done, est_hr, est_min, est_sec, prog_file) print(progresstext, end='', file=sys.stderr) sys.stdout.flush() - try: - filecontents = f.read() - except UnicodeDecodeError: - print("possible binary: " + file_displayname) - continue - except OSError: - print("read error: " + file_displayname) - continue datasize += len(filecontents) filenamescore = scoretext(wordlist, file_displayname, options.maxwholewordlength) filecontentsscore = scoretext(wordlist, filecontents, options.maxwholewordlength) -- cgit v1.2.3