From ad82b0be80b17bc96f3f96f29db591991bfd02a3 Mon Sep 17 00:00:00 2001 From: Marc Jones Date: Wed, 5 Feb 2014 11:22:04 -0500 Subject: only match words made only of digits that are not adjacent to other digits if wordboundaries are turn on --- main.py | 14 ++++++++++---- main.py~ | 34 ++++++++++++++++++++++++---------- 2 files changed, 34 insertions(+), 14 deletions(-) diff --git a/main.py b/main.py index 7c4102b..029c4eb 100755 --- a/main.py +++ b/main.py @@ -64,11 +64,12 @@ def wholeword(word, string): re.purge() matches = [] - try int(word): + try: + int(word) regexNum = r'([^0-9]|\b)(' + word + r')([^0-9]|\b)' - m = re.search(regexNum, string) - if "groups" in dir(mU): - matches.append(mU.groups()) + mN = re.search(regexNum, string) + if "groups" in dir(mN): + matches.append(mN.groups()) except ValueError: regexU = r'([A-Z]|[^a-zA-Z]|\b)(' + re.escape(word.lower()) + r')([A-Z]|[^a-zA-Z]|\b)' @@ -230,6 +231,11 @@ if options.summaryfile and len(filelist) > 0 and not options.displaysummary: summaryfile.close() def test(): + print wholeword("22", "port22") + print wholeword("22", "22") + print wholeword("22", ":22'") + print wholeword("22", "223") + print wholeword("22", "open('22')") print wholeword("ear","bearth") print wholeword("ear","BearTH") print wholeword("ear","bEARth") diff --git a/main.py~ b/main.py~ index fe318c1..7fa6f0e 100755 --- a/main.py~ +++ b/main.py~ @@ -63,15 +63,24 @@ def summary(report): def wholeword(word, string): re.purge() matches = [] - regexU = r'([A-Z]|[^a-zA-Z]|\b)(' + re.escape(word.lower()) + r')([A-Z]|[^a-zA-Z]|\b)' - regexL = r'([a-z]|[^a-zA-Z]|\b)(' + re.escape(word.upper()) + r')([a-z]|[^a-zA-Z]|\b)' - mU = re.search(regexU, string) - if "groups" in dir(mU): - matches.append(mU.groups()) - re.purge() - mL = re.search(regexL, string) - if "groups" in dir(mL): - matches.append(mL.groups()) + + try: + int(word) + regexNum = r'([^0-9]|\b)(' + word + r')([^0-9]|\b)' + mN = re.search(regexNum, string) + if "groups" in dir(mN): + matches.append(mN.groups()) + + except ValueError: + regexU = r'([A-Z]|[^a-zA-Z]|\b)(' + re.escape(word.lower()) + r')([A-Z]|[^a-zA-Z]|\b)' + regexL = r'([a-z]|[^a-zA-Z]|\b)(' + re.escape(word.upper()) + r')([a-z]|[^a-zA-Z]|\b)' + mU = re.search(regexU, string) + if "groups" in dir(mU): + matches.append(mU.groups()) + re.purge() + mL = re.search(regexL, string) + if "groups" in dir(mL): + matches.append(mL.groups()) return matches def skipfile(filename,skippedexts): @@ -222,6 +231,11 @@ if options.summaryfile and len(filelist) > 0 and not options.displaysummary: summaryfile.close() def test(): + print wholeword("22", "port22") + print wholeword("22", "22") + print wholeword("22", ":22'") + print wholeword("22", "223") + print wholeword("22", "open('22')") print wholeword("ear","bearth") print wholeword("ear","BearTH") print wholeword("ear","bEARth") @@ -232,4 +246,4 @@ def test(): print wholeword("ear","ear\n\r") print wholeword("ear","myEAR() MYear: myEAR()") -#test() +test() -- cgit v1.2.3