Staging
v0.5.0
https://repo1.maven.org/maven2/org/prefuse/prefuse
Raw File
Trie.java
package prefuse.data.search;

import java.util.Iterator;
import java.util.LinkedList;
import java.util.NoSuchElementException;

import prefuse.data.Tuple;


/**
 * A trie data structure for fast-lookup of words based on their
 * prefixes. The name "Trie" is a play on the words "tree" and 
 * "retrieval". This class builds a tree structure representing a set of
 * words by their prefixes. It is useful for performing prefix-based
 * searches over large amounts of text in an efficient manner.
 *
 * @version 1.0
 * @author <a href="http://jheer.org">jeffrey heer</a>
 * @see PrefixSearchTupleSet
 */
public class Trie {

    /**
     * Base class for nodes in the trie structure.
     */
    public class TrieNode {
        boolean isLeaf;
        int leafCount = 0;
    }
    
    /**
     * A TrieNode implementation representing a branch in the tree. The
     * class maintains a list of characters (the next character in the
     * prefix) and associated children TrieNodes for each.
     */
    public class TrieBranch extends TrieNode {
        char[] chars = new char[] {0};
        TrieNode[] children = new TrieNode[1];
    }
    
    /**
     * A TrieNode implementation representing a leaf in the tree. The class
     * stores the word and tuple for the leaf, as well as a reference to the
     * successor leaf node in the trie.
     */
    public class TrieLeaf extends TrieNode {
        public TrieLeaf(String word, Tuple t) {
            this.word = word;
            tuple = t;
            next = null;
            leafCount = 1;
        }
        String word;
        Tuple tuple;
        TrieLeaf next;
    }
    
    /**
     * An iterator for traversing a subtree of the Trie.
     */
    public class TrieIterator implements Iterator {
        private LinkedList queue;
        public TrieIterator(TrieNode node) {
            queue = new LinkedList();
            queue.add(node);
        }
        public boolean hasNext() {
            return !queue.isEmpty();
        }
        public Object next() {
            if ( queue.isEmpty() )
                throw new NoSuchElementException();
            
            TrieNode n = (TrieNode)queue.removeFirst();
            Object o;
            if ( n instanceof TrieLeaf ) {
                TrieLeaf l = (TrieLeaf)n;
                o = l.tuple;
                if ( l.next != null )
                    queue.addFirst(l.next);
                return o;
            } else {
                TrieBranch b = (TrieBranch)n;
                for ( int i = b.chars.length-1; i > 0; i-- ) {
                    queue.addFirst(b.children[i]);
                }
                if ( b.children[0] != null )
                    queue.addFirst(b.children[0]);
                return next();
            }
        }
        public void remove() {
            throw new UnsupportedOperationException();
        }
    } // end of inner clas TrieIterator
    
    private TrieBranch root = new TrieBranch();
    private boolean caseSensitive = false;
    
    /**
     * Create a new Trie with the specified case-sensitivity.
     * @param caseSensitive true if the index should be case sensitive for
     * indexed words, false otherwise.
     */
    public Trie(boolean caseSensitive) {
        this.caseSensitive = caseSensitive;
    }
    
    /**
     * Add a new word to the trie, associated with the given Tuple.
     * @param word the word to add to the Trie
     * @param t the Tuple associated with the word
     */
    public void addString(String word, Tuple t) {
        TrieLeaf leaf = new TrieLeaf(word,t);
        addLeaf(root, leaf, 0);
    }
    
    /**
     * Remove a word/Tuple pair from the trie.
     * @param word the word to remove
     * @param t the associate Tuple to remove
     */
    public void removeString(String word, Tuple t) {
        removeLeaf(root, word, t, 0);
    }
    
    private final int getIndex(char[] chars, char c) {
        for ( int i=0; i<chars.length; i++ )
            if ( chars[i] == c ) return i;
        return -1;
    }
    
    private final char getChar(String s, int i) {
        char c = ( i < 0 || i >= s.length() ? 0 : s.charAt(i) );
        return ( caseSensitive ? c : Character.toLowerCase(c) );
    }
    
    private final TrieNode equalityCheck(String word, TrieLeaf l) {
        if ( caseSensitive ) {
            return l.word.startsWith(word) ? l : null;
        } else {
            // do our own looping to avoid string allocation for case change
            int len = word.length();
            if ( len > l.word.length() ) return null;
            for ( int i=0; i<len; ++i ) {
                char c1 = Character.toLowerCase(word.charAt(i));
                char c2 = Character.toLowerCase(l.word.charAt(i));
                if ( c1 != c2 ) return null;
            }
            return l;
        }
    }
    
    private boolean removeLeaf(TrieBranch b, String word, Tuple t, int depth) {
        char c = getChar(word, depth);
        int i = getIndex(b.chars, c);
        
        if ( i == -1 ) {
            // couldn't find leaf
            return false;
        } else {
            TrieNode n = b.children[i];
            if ( n instanceof TrieBranch ) {
                TrieBranch tb = (TrieBranch)n;
                boolean rem = removeLeaf(tb, word, t, depth+1);
                if ( rem ) {
                    b.leafCount--;
                    if ( tb.leafCount == 1 )
                        b.children[i] = tb.children[tb.children[0]!=null?0:1];
                }
                return rem;
            } else {
                TrieLeaf nl = (TrieLeaf)n;
                if ( nl.tuple == t ) {
                    b.children[i] = nl.next;
                    if ( nl.next == null )
                        repairBranch(b,i);
                    b.leafCount--;
                    return true;
                } else {
                    TrieLeaf nnl = nl.next;
                    while ( nnl != null && nnl.tuple != t ) {
                        nl = nnl; nnl = nnl.next;
                    }
                    if ( nnl == null )
                        return false; // couldn't find leaf
                    
                    // update leaf counts
                    for ( TrieLeaf tl = (TrieLeaf)n; tl.tuple != t; tl = tl.next )
                        tl.leafCount--;
                    
                    nl.next = nnl.next;
                    b.leafCount--;
                    return true;
                }     
            }
        }
    }
    
    private void repairBranch(TrieBranch b, int i) {
        if ( i == 0 ) {
            b.children[0] = null;
        } else {
            int len = b.chars.length;
            char[] nchars = new char[len-1];
            TrieNode[] nkids = new TrieNode[len-1];
            System.arraycopy(b.chars,0,nchars,0,i);
            System.arraycopy(b.children,0,nkids,0,i);
            System.arraycopy(b.chars,i+1,nchars,i,len-i-1);
            System.arraycopy(b.children,i+1,nkids,i,len-i-1);
            b.chars = nchars;
            b.children = nkids;
        }
    }
    
    private void addLeaf(TrieBranch b, TrieLeaf l, int depth) {
        b.leafCount += l.leafCount;
        
        char c = getChar(l.word, depth);
        int i = getIndex(b.chars, c);
        
        if ( i == -1 ) {
            addChild(b,l,c);
        } else {
            TrieNode n = b.children[i];
            if ( n == null ) {
                // we have completely spelled out the word
                b.children[i] = l;
            } else if ( n instanceof TrieBranch ) {
                // recurse down the tree
                addLeaf((TrieBranch)n,l,depth+1);
            } else {
                // node is a leaf, need to do a split?
                TrieLeaf nl = (TrieLeaf)n;
                if ( i==0 || (caseSensitive ? nl.word.equals(l.word) 
                                  : nl.word.equalsIgnoreCase(l.word)) )
                {
                    // same word, so chain the entries
                    for ( ; nl.next != null; nl = nl.next )
                        nl.leafCount++;
                    nl.leafCount++;
                    nl.next = l;
                } else {
                    // different words, need to do a split
                    TrieBranch nb = new TrieBranch();
                    b.children[i] = nb;
                    addLeaf(nb,nl,depth+1);
                    addLeaf(nb,l,depth+1);
                }
            }
        }
    }
    
    private void addChild(TrieBranch b, TrieNode n, char c) {
        int len = b.chars.length;
        char[] nchars = new char[len+1];
        TrieNode[] nkids = new TrieNode[len+1];
        System.arraycopy(b.chars,0,nchars,0,len);
        System.arraycopy(b.children,0,nkids,0,len);
        nchars[len] = c;
        nkids[len] = n;
        b.chars = nchars;
        b.children = nkids;
    }
    
    /**
     * Look up the given word in this Trie. If a match is found, a TrieNode
     * is returned. This node is the root of a subtree containing all the
     * matches to the query.
     * @param word the word to lookup
     * @return the TrieNode root of the subtree containing all matches. A
     * null value is returned if no match is found.
     */
    public TrieNode find(String word) {
        return (word.length() < 1 ? null : find(word, root, 0));
    }
    
    private TrieNode find(String word, TrieBranch b, int depth) {
        char c = getChar(word, depth);
        int i = getIndex(b.chars, c);
        if ( i == -1 ) {
            return null; // not in trie
        } else if ( word.length()-1 == depth ) {
            return b.children[i]; // end of search
        } else if ( b.children[i] instanceof TrieLeaf ) {
            return equalityCheck(word, (TrieLeaf)b.children[i]);
        } else {
            return find(word, (TrieBranch)b.children[i], depth+1); // recurse
        }
    }
    
} // end of class Trie
back to top