def firstn(ls,ln):
    for s,n in zip(ls,ln):
        yield s[:n]

ls = ['abc','defg','hijkl']
ln = [1,2,3]

g=firstn(ls,ln)

list(g)

['a', 'de', 'hij']


s = "abc123!xyz 123 The QuIcK-abc*xyz.  XYZ"

def wordfreq(s):
    import re
    l = re.split(r'[^a-z]+',s.lower())
    return sorted([(w,l.count(w)) for w in set(l)])

print(wordfreq(s))

# or 

def wordfreq(s):
    import re
    f = {}
    for w in re.split(r'[^a-z]+',s.lower()):
        f[w] = f[w]+1 if w in f else 1
    return [(k,f[k]) for k in sorted(f)]

# or

def wordfreq(s):
    f={}
    for s in (''.join([c if c.isalpha() else ' ' for c in s.lower()])).split():
            if s not in f:
                f[s] = 0
            f[s] += 1
    return [(k,f[k]) for k in sorted(f.keys())]

[('abc', 2), ('quick', 1), ('the', 1), ('xyz', 3)]


import re

l = "four brown foxes and a lazy dog".split()
print(l)

def abbrev(l):
    o = []
    for s in l:
        a,b,c = re.split(r'([aeiouy])',s,maxsplit=1)
        o.append(a+b+re.sub(r'[aeiouy]','',c))
    return o

abbrev(l)

['four', 'brown', 'foxes', 'and', 'a', 'lazy', 'dog']

['for', 'brown', 'foxs', 'and', 'a', 'laz', 'dog']


import re
re_digit = r'^(one|two|three)$'
print([re.match(re_digit,s) for s in ['one', 'three', 'tone', 'twotwo', 'threes']])

re_base6 = r'^[0-5]+$'
print([re.match(re_base6,s) for s in ['0' , '421' , '' , '16' , '-10']])

re_id = r'^[A-Z]{1,3}[0-9]{3,5}$'
print([re.match(re_id,s) for s in ['A123' , 'XYZ00000' , 'ab123', 'AAAA123' , 'AB12' , 'AB000123']])

[<re.Match object; span=(0, 3), match='one'>, <re.Match object; span=(0, 5), match='three'>, None, None, None]
[<re.Match object; span=(0, 1), match='0'>, <re.Match object; span=(0, 3), match='421'>, None, None, None]
[<re.Match object; span=(0, 4), match='A123'>, <re.Match object; span=(0, 8), match='XYZ00000'>, None, None, None, None]


import re
    
def fixdup(s):
    l = s.split()
    return ' '.join([w for i,w in enumerate(l) if not i or w.lower() != l[i-1].lower()])

for s in ["The the", "The the " , "The the the", "a\nb"]:
    print(repr(fixdup(s)))

'The'
'The'
'The'
'a b'


# lab validation code; do not modify
def labcheck():
    import copy, random, re, string, types
    from random import randint
            
    def checkre(pat,ok,nok):
        for s in ok:
            assert re.fullmatch(pat,s), \
                f"pattern '{pat}'\n did not match string '{s}'"
        for s in nok:
            assert not re.fullmatch(pat,s), \
                f"pattern '{pat}'\n matched string '{s}'"  
            

    def randwords(n,chars=string.ascii_lowercase,nl=(2,5)):
        l = set()
        while len(l)<n:
            l |= set((''.join([chars[randint(0,len(chars)-1)] for i in range(randint(*nl))]),))
        return list(l)


    def q1():
        n=randint(3,5)
        ls=randwords(n)
        ln=[randint(1,n) for n in map(len,ls)]
        g=firstn(ls,ln)
        l=list(g)
        #print(n,ls,ln,l)

        assert isinstance(firstn,types.FunctionType), \
            f"firstn() has type {type(firstn)}"
        assert isinstance(g, types.GeneratorType), \
            f"return from firstn() has type {type(g)}"
        assert all([len(l[i]) == ln[i] and ls[i].startswith(l[i]) for i in range(len(l))]), \
            f"firstn({n},'{s}'') returns{l}"

    def q2():
        chars = string.ascii_lowercase
        joins = 3*[' ', '  ', '   ']+list("!&*012345")+"--,$$$,::,<<<,>>".split(',')

        n = randint(5,8)
        l = sorted(randwords(n))
        f = [[1,1,1,2,2,3][randint(0,5)] for i in range(n)]

        xl = []
        for i in range(n):
            xl.extend([l[i]]*f[i])

        random.shuffle(xl)
        s=''.join([xl[i]+random.choice(joins) for i in range(len(xl))])
        s=''.join([c.upper() if not randint(0,4) else c for c in s])

        rf = wordfreq(s)
        cf = list(zip(l,f))

        # print(n,s,rf,cf,sep='\n')
        assert rf == cf, f"wordfreq('{s}') returns {rf} instead of {cf}"


    def q3():
        t=False
        while not t:
            l=randwords(randint(5,10),"bdlnaeiouy",(4,6))
            t=all([len([c for c in w if c in "aeiouy"])>0 for w in l]) and \
                all ([len([c for c in w if c in "bdln"])>0 for w in l])
            #print(l,t)

        ol=copy.deepcopy(l)
        rl=abbrev(l)

        assert all([len(re.findall(r'[aeiouy]',s)) == 1 for s in rl]) and \
            [re.findall(r'[aeiouy]',s)[0] for s in ol] == [re.findall(r'[aeiouy]',s)[0] for s in rl], \
            f"abbrev('{ol}') returned {rl}"
    
    def q4():
        checkre(re_digit,
               ['one', 'two', 'three'],
               [' one', 'twotwo', 'threes'])
        checkre(re_base6,
               ['0' , '421', '12354' ],
               ['' , '16' , '-10', '7'])
        checkre(re_id,
               ['A123' , 'XYZ00000' , ],
               ['ab123', 'AAAA123' , 'AB12' , 'AB000123'])
   
    def q5():
        ok=["The the ", " the the", "The the the"]
        nok=["a b"]
        for s in ok:
            r = fixdup(s)
            assert r == s.split()[0], f"fixdup({repr(s)} returns {repr(r)})"
        for s in nok:
            r = fixdup(s)
            assert r == s, f"fixdup({repr(s)} returns {repr(r)})"
        
    for s,i in [(s,s[1:]) for s in locals().keys() if re.search(r'q\d+',s)]:
        try:
            locals()[s]()
            print(f"Question {i} OK.")
        except Exception as e:
            print(f"Failed check for Question {i}: {e}")
            
labcheck()

Question 1 OK.
Question 2 OK.
Question 3 OK.
Question 4 OK.
Question 5 OK.

ELEX 4653 Lab 5¶

Question 1¶

Question 2¶

Question3¶

Question 4¶

Question 5¶