Pregunta

Is there any predefined function in Python to identify the common prefixes on the right side of a production rule?

For example, I need to turn this sort of data structure:

['abc', 'ab', 'd', 'dc']

into a dictionary of prefix-to-corresponding suffixes pairs. So my output for this example should look something like this:

{'ab': set(['c', '']), 'd': set(['c', ''])}  # ab is a common prefix and d is a common prefix

In more general terms, I need to convert this sort of input:

S -> abc|ab|d|dc

into this sort of output:

S -> abA|dA  # prefixes ab and d followed by A
A -> c|NULL  # where A is either c or an equivalent of None

I want to use this output to perform left factoring of a grammar.

¿Fue útil?

Solución

No, I do not know of any predefined function that will identify the common prefixes for the right side of a production rule. The following function seems to work fairly well, however. I use izip_longest from the itertools module. You may be able to optimize or improve this function even further:

from itertools import izip_longest

def find_prefixes(strings):
    zipped = izip_longest(*strings, fillvalue='')
    for index, letters in enumerate(zipped):
        if index == 0:
            prefixes = letters  # assumes there will always be a prefix
        else:
            poss_prefixes = [prefix + letters[i] for i, prefix in enumerate(prefixes)]
            prefixes = [prefix if poss_prefixes.count(prefix) == letters.count(prefix)  # changed > 1 to == letters.count(prefix)
                        else prefixes[i] for i, prefix in enumerate(poss_prefixes)]
    return set(prefixes)

Then, if you want, you can also find the suffixes:

def find_prefix_suffixes(strings, prefixes):
    prefix_suffix = dict()
    for s in strings:
        for prefix in sorted(list(prefixes), key=lambda x: len(x), reverse=True):
            if s.startswith(prefix):
                if prefix in prefix_suffix:
                    prefix_suffix[prefix].add(s.replace(prefix, '', 1))
                else:
                    prefix_suffix[prefix] = set([s.replace(prefix, '', 1)])
    return prefix_suffix

Here is the result:

>>> s = ['ab', 'abc', 'abcd', 'acd', 'bc', 'bd', 'd']
>>> find_prefixes(s)
set(['a', 'b', 'd'])
>>> prefixes = find_prefixes(s)
>>> find_prefix_suffixes(s, prefixes)
{'a': set(['bcd', 'cd', 'b', 'bc']), 'b': set(['c', 'd']), 'd': set([''])}

Otros consejos

Another solution, let me know if that works:

from itertools import takewhile,izip

def groupby(ls):
    d = {}
    ls = [ y[0] for y in x ]
    initial = list(set(ls))
    for y in initial:
        for i in x:
            if i.startswith(y):
                if y not in d:
                    d[y] = []
                d[y].append(i)
    return d

def prefix(x):
    return len(set(x)) == 1

x=['abc','ab','d','dc']

for k, l in groupby(x).iteritems():
    r = [l[0] for l in takewhile(prefix ,izip(*l))]
    print ''.join(r)

Output:

ab
d

I have used above codes to do this program and it works from itertools import takewhile

def groupby(ls):
    d = {}
    ls = [ y[0] for y in rules ]
    initial = list(set(ls))
    for y in initial:
        for i in rules:
            if i.startswith(y):
                if y not in d:
                    d[y] = []
                d[y].append(i)
    return d

def prefix(x):
    return len(set(x)) == 1


starting=""
rules=[]
common=[]
alphabetset=["A'","B'","C'","D'","E'","F'","G'","H'","I'","J'","K'","L'","M'","N'","O'","P'","Q'","R'","S'","T'","U'","V'","W'","X'","Y'","Z'"]


s=input("Enter the production: ")
while(True):
    split=s.split("->")
    starting=split[0]
    for i in split[1].split("|"):
        rules.append(i)

#logic for taking commons out
    for k, l in groupby(rules).items():
        r = [l[0] for l in takewhile(prefix, zip(*l))]
        common.append(''.join(r))
#end of taking commons

    for i in common:
        newalphabet=alphabetset.pop()
        print(starting+"->"+i+newalphabet)
        index=[]
        for k in rules:
            if(k.startswith(i)):
                index.append(k)
        print(newalphabet+"->",end="")
        for j in index:
            stringtoprint=j.replace(i,"", 1)+"|"
            if stringtoprint=="|":
                print("\u03B5"+"|",end="")
            else:
                print(j.replace(i,"", 1)+"|",end="")
        print("\b")
        print("")

    print("\ndo you have another production?")
    T=input("y/n:")
    if T=='y':
        s=input("Enter the production: ")
    elif T=='n':
        break
`def leftFactoring(s):
 k=[]
 l=[]
 n=""
 k=s.split('->')
 l=k[1].split('/')
 for i in range(0,len(l)-1):
    for j in range(0,min(len(l[i]),len(l[i+1]))):
        if(l[i][j]==l[i+1][j]):
            if l[i][j] not in n:
                n=n+l[i][j]
 print(k[0]+'->'+n+"R")
 m=k[1].split(n)
 print("R->",end="")
 for i in range(1,len(m)):
    print(m[i],end="")
s=input("Enter the production: ")
while(True):
    leftFactoring(s)
    print("\ndo you have another production?")
    T=input("y/n:")
    if T=='y':
        s=input("Enter the production: ")
    elif T=='n':
        break`

OUTPUT:

Enter the production:A->hgf/hgk
A->hgR
R->f/k
do u have another production?
y/n:y
Enter the production:B->fg/fh/fk
B->fR
R->g/h/k
do you have another production?
y/n:n
Licenciado bajo: CC-BY-SA con atribución
No afiliado a StackOverflow
scroll top