summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Gnoutcheff <gnoutchd@softwarefreedom.org>2017-01-23 23:38:22 (GMT)
committerDaniel Gnoutcheff <gnoutchd@softwarefreedom.org>2017-01-23 23:49:28 (GMT)
commitd7c84268a26a4391af55a3533422367b02737435 (patch)
tree7dd461cc35542d9ca155ced836c0bda53450bc0d
parent7fc88c0c901b68b6e223d6f1383bc1221eae8488 (diff)
Misc. unrecorded changes
-rw-r--r--README1
-rw-r--r--avfs.py1
-rw-r--r--binls.nfsf.mak7
-rw-r--r--cryptology.txt1
-rwxr-xr-xfindelf.py15
-rwxr-xr-xfindjava.py15
-rwxr-xr-xlsbin.py19
-rwxr-xr-xsuspicious52
8 files changed, 97 insertions, 14 deletions
diff --git a/README b/README
new file mode 100644
index 0000000..4c52cdb
--- /dev/null
+++ b/README
@@ -0,0 +1 @@
+A motley collection of (mostly hideously awful) tools we cobbled together while scanning codebases for export-controlled cryptography.
diff --git a/avfs.py b/avfs.py
index 5ab7bea..c05d7aa 100644
--- a/avfs.py
+++ b/avfs.py
@@ -69,6 +69,7 @@ avfscmds = {
(b'.txz', b'#uxze#utar'),
(b'.xz', b'#uxze'),
(b'.lzma', b'#uxze'),
+ (b'.vsix', b'#uzip'), # Add upstream
}
def guesscmd(filename):
diff --git a/binls.nfsf.mak b/binls.nfsf.mak
new file mode 100644
index 0000000..865cef8
--- /dev/null
+++ b/binls.nfsf.mak
@@ -0,0 +1,7 @@
+repos := $(patsubst %/.git,%,$(wildcard */.git))
+binls := $(repos:=.binls)
+
+suspicious_checkout=/home/gnoutchd/export-ctrl/suspicious
+
+%.binls:
+ ${suspicious_checkout}/binls $* >$@ || rm $@
diff --git a/cryptology.txt b/cryptology.txt
index f054af5..5b6d01b 100644
--- a/cryptology.txt
+++ b/cryptology.txt
@@ -611,3 +611,4 @@ rtc
ocsp
cms
gost
+passphrase
diff --git a/findelf.py b/findelf.py
new file mode 100755
index 0000000..addb51d
--- /dev/null
+++ b/findelf.py
@@ -0,0 +1,15 @@
+#!/usr/bin/env python3
+
+import avfs
+import os, sys
+
+def is_elf(fname):
+ return b'\x7fELF' == avfs.open(fname, 'rb').read(4)
+
+if __name__ == "__main__":
+ for fname in avfs.find(os.fsencode(sys.argv[1]), {b'.git'}):
+ try:
+ if is_elf(fname):
+ print(fname.decode('utf-8', 'replace'))
+ except:
+ pass
diff --git a/findjava.py b/findjava.py
new file mode 100755
index 0000000..09f65ac
--- /dev/null
+++ b/findjava.py
@@ -0,0 +1,15 @@
+#!/usr/bin/env python3
+
+import avfs
+import os, sys
+
+def is_jvm_class(fname):
+ return b'\xca\xfe\xba\xbe' == avfs.open(fname, 'rb').read(4)
+
+if __name__ == "__main__":
+ for fname in avfs.find(os.fsencode(sys.argv[1]), {b'.git'}):
+ try:
+ if is_jvm_class(fname):
+ print(fname.decode('utf-8', 'replace'))
+ except:
+ pass
diff --git a/lsbin.py b/lsbin.py
new file mode 100755
index 0000000..e4bd36c
--- /dev/null
+++ b/lsbin.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python3
+
+import avfs
+import os, sys
+
+# https://stackoverflow.com/questions/898669/how-can-i-detect-if-a-file-is-binary-non-text-in-python
+textchars = bytearray({7,8,9,10,12,13,27} | set(range(0x20, 0x100)) - {0x7f})
+def is_binary(fname):
+ head = avfs.open(fname, 'rb').read(1024)
+ return bool(head.translate(None, textchars))
+
+if __name__ == "__main__":
+ for fname in avfs.find(os.fsencode(sys.argv[1]), {b'.git'}):
+ try:
+ if is_binary(fname):
+ print(fname.decode('utf-8', 'replace'))
+ except:
+ pass
+
diff --git a/suspicious b/suspicious
index 72dd9b0..2ab782b 100755
--- a/suspicious
+++ b/suspicious
@@ -15,10 +15,11 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
-#Authors: Marc Jones <mjones@softwarefreedom.org>, Daniel Gnoutcheff <gnoutchd@softwarefreedom.org>
-#Date: June 2016
-#Version 0.3.0
-#Added AVFS support
+# Authors: Marc Jones <mjones@softwarefreedom.org>,
+# Daniel Gnoutcheff <gnoutchd@softwarefreedom.org>
+# Date: June 2016
+# Version 0.3.0
+# Added AVFS support
##TODO
#need to verify that word score counts each instance, not just 0 or 1
@@ -32,6 +33,8 @@ import sys
import datetime
import string
import subprocess
+import chardet
+
import avfs
report = {}
@@ -48,6 +51,32 @@ progresstext = ""
fsencoding = sys.getfilesystemencoding()
def fsdecode_display(bytestring):
return str(bytestring, encoding=fsencoding, errors='replace')
+
+
+_def_file_encoding = sys.getdefaultencoding()
+
+def decode_file(filename):
+ """
+ Return the contents of the file at the given path as a (Unicode) string.
+ Return None if the file appears to be a binary.
+ """
+
+ with avfs.open(filename, 'rb') as filehandle:
+ contents_raw = filehandle.read()
+
+ try:
+ return str(contents_raw, encoding=_def_file_encoding)
+ except UnicodeDecodeError:
+ pass
+
+ guessed_encoding = chardet.detect(contents_raw)['encoding']
+ if not guessed_encoding:
+ return None
+
+ try:
+ return str(contents_raw, encoding=guessed_encoding)
+ except UnicodeDecodeError:
+ return None
def sortscore(score, reverse=True):
sortedscore = sorted(score.items(), key=lambda score: score[1], reverse=reverse)
@@ -304,10 +333,13 @@ for file in filelist:
skipped += 1
continue
try:
- f = avfs.open(file)
- except:
+ filecontents = decode_file(file)
+ except OSError:
print("failed to open: " + file_displayname)
continue
+ if filecontents is None:
+ print("possible binary: " + file_displayname)
+ continue
opened +=1
now = datetime.datetime.now()
if options.display_progress:
@@ -326,14 +358,6 @@ for file in filelist:
.format(frac_done, est_hr, est_min, est_sec, prog_file)
print(progresstext, end='', file=sys.stderr)
sys.stdout.flush()
- try:
- filecontents = f.read()
- except UnicodeDecodeError:
- print("possible binary: " + file_displayname)
- continue
- except OSError:
- print("read error: " + file_displayname)
- continue
datasize += len(filecontents)
filenamescore = scoretext(wordlist, file_displayname, options.maxwholewordlength)
filecontentsscore = scoretext(wordlist, filecontents, options.maxwholewordlength)