diff options
-rw-r--r-- | README | 1 | ||||
-rw-r--r-- | avfs.py | 1 | ||||
-rw-r--r-- | binls.nfsf.mak | 7 | ||||
-rw-r--r-- | cryptology.txt | 1 | ||||
-rwxr-xr-x | findelf.py | 15 | ||||
-rwxr-xr-x | findjava.py | 15 | ||||
-rwxr-xr-x | lsbin.py | 19 | ||||
-rwxr-xr-x | suspicious | 52 |
8 files changed, 97 insertions, 14 deletions
@@ -0,0 +1 @@ +A motley collection of (mostly hideously awful) tools we cobbled together while scanning codebases for export-controlled cryptography. @@ -69,6 +69,7 @@ avfscmds = { (b'.txz', b'#uxze#utar'), (b'.xz', b'#uxze'), (b'.lzma', b'#uxze'), + (b'.vsix', b'#uzip'), # Add upstream } def guesscmd(filename): diff --git a/binls.nfsf.mak b/binls.nfsf.mak new file mode 100644 index 0000000..865cef8 --- /dev/null +++ b/binls.nfsf.mak @@ -0,0 +1,7 @@ +repos := $(patsubst %/.git,%,$(wildcard */.git)) +binls := $(repos:=.binls) + +suspicious_checkout=/home/gnoutchd/export-ctrl/suspicious + +%.binls: + ${suspicious_checkout}/binls $* >$@ || rm $@ diff --git a/cryptology.txt b/cryptology.txt index f054af5..5b6d01b 100644 --- a/cryptology.txt +++ b/cryptology.txt @@ -611,3 +611,4 @@ rtc ocsp cms gost +passphrase diff --git a/findelf.py b/findelf.py new file mode 100755 index 0000000..addb51d --- /dev/null +++ b/findelf.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 + +import avfs +import os, sys + +def is_elf(fname): + return b'\x7fELF' == avfs.open(fname, 'rb').read(4) + +if __name__ == "__main__": + for fname in avfs.find(os.fsencode(sys.argv[1]), {b'.git'}): + try: + if is_elf(fname): + print(fname.decode('utf-8', 'replace')) + except: + pass diff --git a/findjava.py b/findjava.py new file mode 100755 index 0000000..09f65ac --- /dev/null +++ b/findjava.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 + +import avfs +import os, sys + +def is_jvm_class(fname): + return b'\xca\xfe\xba\xbe' == avfs.open(fname, 'rb').read(4) + +if __name__ == "__main__": + for fname in avfs.find(os.fsencode(sys.argv[1]), {b'.git'}): + try: + if is_jvm_class(fname): + print(fname.decode('utf-8', 'replace')) + except: + pass diff --git a/lsbin.py b/lsbin.py new file mode 100755 index 0000000..e4bd36c --- /dev/null +++ b/lsbin.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python3 + +import avfs +import os, sys + +# https://stackoverflow.com/questions/898669/how-can-i-detect-if-a-file-is-binary-non-text-in-python +textchars = bytearray({7,8,9,10,12,13,27} | set(range(0x20, 0x100)) - {0x7f}) +def is_binary(fname): + head = avfs.open(fname, 'rb').read(1024) + return bool(head.translate(None, textchars)) + +if __name__ == "__main__": + for fname in avfs.find(os.fsencode(sys.argv[1]), {b'.git'}): + try: + if is_binary(fname): + print(fname.decode('utf-8', 'replace')) + except: + pass + @@ -15,10 +15,11 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. -#Authors: Marc Jones <mjones@softwarefreedom.org>, Daniel Gnoutcheff <gnoutchd@softwarefreedom.org> -#Date: June 2016 -#Version 0.3.0 -#Added AVFS support +# Authors: Marc Jones <mjones@softwarefreedom.org>, +# Daniel Gnoutcheff <gnoutchd@softwarefreedom.org> +# Date: June 2016 +# Version 0.3.0 +# Added AVFS support ##TODO #need to verify that word score counts each instance, not just 0 or 1 @@ -32,6 +33,8 @@ import sys import datetime import string import subprocess +import chardet + import avfs report = {} @@ -48,6 +51,32 @@ progresstext = "" fsencoding = sys.getfilesystemencoding() def fsdecode_display(bytestring): return str(bytestring, encoding=fsencoding, errors='replace') + + +_def_file_encoding = sys.getdefaultencoding() + +def decode_file(filename): + """ + Return the contents of the file at the given path as a (Unicode) string. + Return None if the file appears to be a binary. + """ + + with avfs.open(filename, 'rb') as filehandle: + contents_raw = filehandle.read() + + try: + return str(contents_raw, encoding=_def_file_encoding) + except UnicodeDecodeError: + pass + + guessed_encoding = chardet.detect(contents_raw)['encoding'] + if not guessed_encoding: + return None + + try: + return str(contents_raw, encoding=guessed_encoding) + except UnicodeDecodeError: + return None def sortscore(score, reverse=True): sortedscore = sorted(score.items(), key=lambda score: score[1], reverse=reverse) @@ -304,10 +333,13 @@ for file in filelist: skipped += 1 continue try: - f = avfs.open(file) - except: + filecontents = decode_file(file) + except OSError: print("failed to open: " + file_displayname) continue + if filecontents is None: + print("possible binary: " + file_displayname) + continue opened +=1 now = datetime.datetime.now() if options.display_progress: @@ -326,14 +358,6 @@ for file in filelist: .format(frac_done, est_hr, est_min, est_sec, prog_file) print(progresstext, end='', file=sys.stderr) sys.stdout.flush() - try: - filecontents = f.read() - except UnicodeDecodeError: - print("possible binary: " + file_displayname) - continue - except OSError: - print("read error: " + file_displayname) - continue datasize += len(filecontents) filenamescore = scoretext(wordlist, file_displayname, options.maxwholewordlength) filecontentsscore = scoretext(wordlist, filecontents, options.maxwholewordlength) |