Question

I am trying to build a 2-3-4 tree in python. So far, the insertion seems to be working up to nodes of height 3 or so. After that, the data seems to get dropped rather than being inserted into the tree. I'm unsure why this is happening, and have double and triple checked my code several times. I have commented near the insert code where I got the insertion algorithm from. Thanks in advance for any insight on my issue.

import re
class Node:
    def __init__(self, newInfo = None, p = None, q = None,
                 r = None, s = None, parent = None):
        #initilize parent / child links
        self.children = [p, q, r, s]
        self.parent = parent
        #initialize data 
        self.info = [ newInfo, None, None ]

class TwoThreeFourTree:
    def __init__(self):
        self.root = Node()

    # Built in sort sinks None types to front, which
    # is opposite of how info was structured to work,
    # Using this sort will push None values to back.
    def sortNode(self, curr):
        c = []
        for i in curr.info:
            if i is not None:
                c.append(i)
        c.sort()
        for i in range(3-len(c)):
            c.append(None)
        return c            

    # Built in len counts None, this one doesn't.
    def length(self, node):
        i = 0
        for info in node:
            if info is not None:
                i = i + 1
        return i

    def isLeaf(self, node):
        for child in node.children:
            if child is not None:
                return False
        return True

    def isOrphan(self, node):
        if node.parent is None:
            return True
        return False

    def lookup(self, userStr, reg):
        curr = self.root
        while curr is not None:
            #Two Node
            if self.length(curr.info) is 1:
                if re.match(reg, str(curr.info[0])) is not None:
                    return curr.info[0]
                else:
                    if userStr < curr.info[0]:
                        curr = curr.children[0]
                    else:
                        curr = curr.children[1]

            #Three Node
            elif self.length(curr.info) is 2:
                for item in curr.info:
                    if re.match(reg, str(item)) is not None:
                        return item
                if userStr < curr.info[0]:
                    curr = curr.children[0]
                elif userStr < curr.info[1]:
                    curr = curr.children[1]
                else:
                    curr = curr.children[2]

            #Four Node
            elif self.length(curr.info) is 3:
                for item in curr.info:
                    if item is not None:
                        if re.match(reg, str(item)) is not None:
                            return item
                if userStr < curr.info[0]:
                    curr = curr.children[0]
                elif userStr < curr.info[1]:
                    curr = curr.children[1]
                elif userStr < curr.info[2]:
                    curr = curr.children[2]
                else:
                    curr = curr.children[3]

    def inorder(self, node, retlst = None):
        if retlst is None:
            retlst = [] 
        if node.children[0]:
            retlst = self.inorder(node.children[0], retlst)
        retlst += [node.info[0]] 
        if node.children[1]:
            retlst = self.inorder(node.children[1], retlst)
        retlst += [node.info[1]]
        if node.children[2]:
            retlst = self.inorder(node.children[2], retlst)
        retlst += [node.info[2]]
        if node.children[3]:
            retlst = self.inorder(node.children[3], retst)
        return retlst

    ## Using Algorithm from: http://www.clear.rice.edu/comp212/01-fall/lectures/33/
    def insert(self, info, node):
        curr = node
        if self.length(curr.info) == 0: # curr is empty
            curr.info[0] = info
            return True

        elif self.length(curr.info) == 1: # curr is two node.
            if self.isLeaf(curr):
                curr.info[1] = info
                curr.info = self.sortNode(curr)
                return True
            else:
                if info < curr.info[0]:
                    self.insert(info, curr.children[0])
                else:
                    self.insert(info, curr.children[1])

        elif self.length(curr.info) == 2: # curr is 3 node
            if self.isLeaf(curr):
                curr.info[2] = info
                curr.info = self.sortNode(curr)
                return True
            else:
                if info < curr.info[0]:
                    self.insert(info, curr.children[0])
                elif info < curr.info[1]:
                    self.insert(info, curr.children[1])
                elif info > curr.info[2]:
                    self.insert(info, curr.children[2])

        elif self.length(curr.info) == 3: # curr is 4 node
            if self.isOrphan(curr): # curr has no parent
                curr = Node(curr.info[1],
                            Node(curr.info[0], curr.children[0], curr.children[1]),
                            Node(curr.info[2], curr.children[2], curr.children[3]))
                for child in curr.children:
                    if child is not None:
                        child.parent = curr
                self.root = curr
                self.insert(info, self.root)

            else:   #curr has a parent
                if self.length(curr.parent.info) == 1: # Parent is Two Node:
                    if curr.parent.children[0] == curr:# cur is lst.
                    #If P = [curr, M, p-rst], then P becomes [[lst, X, mlst], Y, [mrst, Z, rst], M, p-rst].
                        curr.parent.info[1] = curr.info[1]
                        curr.parent.info = self.sortNode(curr.parent)
                        curr.parent.children[2] = curr.parent.children[1]
                        curr.parent.children[1] = Node(curr.info[2], curr.children[2], curr.children[3], None, None, curr.parent)
                        curr.parent.children[0] = Node(curr.info[0], curr.children[0], curr.children[1], None, None, curr.parent)
                        self.insert(info, self.root)

                    elif curr.parent.children[1] == curr: # curr is rst.
                    #If P = [p-lst, M, curr], then P becomes [p-lst, M, [lst, X, mlst], Y, [mrst, Z, rst]].
                        curr.parent.info[1] = curr.info[1]
                        curr.parent.info = self.sortNode(curr.parent)
                        curr.parent.children[2] = Node(curr.info[2], curr.children[2], curr.children[3], None, None, curr.parent)
                        curr.parent.children[1] = Node(curr.info[0], curr.children[0], curr.children[1], None, None, curr.parent)
                        self.insert(info, self.root)

                elif self.length(curr.parent.info) == 2: # Parent is Three Node:

                    if curr.parent.children[0] == curr: # curr is lst
                    #If P = [curr, M1, p-mst, M2, p-rst], then P becomes [[lst, X, mlst], Y, [mrst, Z, rst], M1, p-mst, M2, p-rst].
                        curr.parent.info[2] = curr.info[1]
                        curr.parent.info = self.sortNode(curr.parent)
                        curr.parent.children[3] = curr.parent.children[2]
                        curr.parent.children[2] = curr.parent.children[1]
                        curr.parent.children[1] = Node(curr.info[2], curr.children[2], curr.children[3], None, None, curr.parent)
                        curr.parent.children[0] = Node(curr.info[0], curr.children[0], curr.children[1], None, None, curr.parent)
                        self.insert(info, self.root)

                    elif curr.parent.children[1] == curr: # curr is mst
                    #If P = [p-lst, M1, curr, M2, p-rst], then P becomes [p-lst, M1,[lst, X, mlst], Y, [mrst, Z, rst], M2, p-rst].
                        curr.parent.info[2] = curr.info[1]
                        curr.parent.info = self.sortNode(curr.parent)
                        curr.parent.children[3] = curr.parent.children[2]
                        curr.parent.children[2] = Node(curr.info[2], curr.children[2], curr.children[3], None, None, curr.parent)
                        curr.parent.children[1] = Node(curr.info[0], curr.children[0], curr.children[1], None, None, curr.parent)
                        self.insert(info, self.root)

                    elif curr.parent.children[2] == curr: # curr is rst
                    #If P = [p-lst, M1, p-mst, M2, curr], then P becomes [p-lst, M1, p-mst, M2, [lst, X, mlst], Y, [mrst, Z, rst]].
                        curr.parent.info[2] = curr.info[1]
                        curr.parent.info = self.sortNode(curr.parent)
                        curr.parent.children[3] = Node(curr.info[2], curr.children[2], curr.children[3], None, None, curr.parent)
                        curr.parent.children[2] = Node(curr.info[0], curr.children[0], curr.children[1], None, None, curr.parent)
                        self.insert(info, self.root)

Pas de solution correcte

Autres conseils

As a partial answer, you can start by eliminating most of the repetition in your code. For example, you don't need to special-case the lookup() function for the three different node types. You could instead write something like this:

def lookup(self, s, r, node=None):
    if not node: node = self.root
    for item in node.info:
        if re.match(r, str(item)):
            return item
    for (i, item) in enumerate(curr.info):
        if s < item:
            return self.lookup(s, r, curr.children[i])
    return self.lookup(s, r, curr.children[-1]) # Assume len(children) == len(info) + 1

Also, traversals should be implemented as generators, and again, use loops instead of repetitive code:

def inorder(self, node):
    for (i, item) in enumerate(node.info):
        if node.children[i]: self.inorder(node.children[i])
        yield item
    if node.children[-1]: self.inorder(node.children[-1])

This kind of a cleanup will firstly make it much easier for others to diagnose the problem. Secondly, you might find that the problem evaporates in the process.

Licencié sous: CC-BY-SA avec attribution
Non affilié à StackOverflow
scroll top