/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.trees;

import edu.stanford.nlp.io.ExtensionFileFilter;
import edu.stanford.nlp.stats.Counter;
import edu.stanford.nlp.trees.LabeledScoredTreeReaderFactory;
import edu.stanford.nlp.trees.TransformingTreebank;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeReaderFactory;
import edu.stanford.nlp.trees.TreeTransformer;
import edu.stanford.nlp.trees.TreeVisitor;
import edu.stanford.nlp.util.Sets;
import java.io.File;
import java.io.FileFilter;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.text.NumberFormat;
import java.util.AbstractCollection;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public abstract class Treebank
extends AbstractCollection<Tree> {
    private TreeReaderFactory trf;
    private String encoding = "UTF-8";

    public Treebank() {
        this(new LabeledScoredTreeReaderFactory());
    }

    public Treebank(TreeReaderFactory trf) {
        this.trf = trf;
    }

    public Treebank(TreeReaderFactory trf, String encoding) {
        this.trf = trf;
        this.encoding = encoding;
    }

    public Treebank(int initialCapacity) {
        this(initialCapacity, new LabeledScoredTreeReaderFactory());
    }

    public Treebank(int initialCapacity, TreeReaderFactory trf) {
        this.trf = trf;
    }

    protected TreeReaderFactory treeReaderFactory() {
        return this.trf;
    }

    public String encoding() {
        return this.encoding;
    }

    @Override
    public abstract void clear();

    public void loadPath(String pathName) {
        this.loadPath(new File(pathName));
    }

    public void loadPath(File path) {
        this.loadPath(path, "mrg", true);
    }

    public void loadPath(String pathName, String suffix, boolean recursively) {
        this.loadPath(new File(pathName), (FileFilter)new ExtensionFileFilter(suffix, recursively));
    }

    public void loadPath(File path, String suffix, boolean recursively) {
        this.loadPath(path, (FileFilter)new ExtensionFileFilter(suffix, recursively));
    }

    public void loadPath(String pathName, FileFilter filt) {
        this.loadPath(new File(pathName), filt);
    }

    public abstract void loadPath(File var1, FileFilter var2);

    public abstract void apply(TreeVisitor var1);

    public Treebank transform(TreeTransformer treeTrans) {
        return new TransformingTreebank(this, treeTrans);
    }

    @Override
    public String toString() {
        final StringBuffer sb = new StringBuffer();
        this.apply(new TreeVisitor(){

            public void visitTree(Tree t) {
                sb.append(t.toString());
                sb.append("\n");
            }
        });
        return sb.toString();
    }

    @Override
    public int size() {
        CounterTreeProcessor counter = new CounterTreeProcessor();
        this.apply(counter);
        return counter.total();
    }

    public String textualSummary() {
        Set joint;
        int numTrees = 0;
        int numNonUnary = 0;
        Counter<Tree> nonUnaries = new Counter<Tree>();
        Counter<String> roots = new Counter<String>();
        Counter<String> starts = new Counter<String>();
        int numUnenclosedLeaves = 0;
        int numLeaves = 0;
        int numNonPhrasal = 0;
        int numWords = 0;
        int numTags = 0;
        HashSet<String> words = new HashSet<String>();
        Counter<String> tags = new Counter<String>();
        Counter<String> cats = new Counter<String>();
        Tree leafEg = null;
        for (Tree t : this) {
            roots.incrementCount(t.value());
            ++numTrees;
            if (t.numChildren() > 1) {
                ++numNonUnary;
                nonUnaries.incrementCount(t.localTree());
            } else if (t.isLeaf()) {
                ++numUnenclosedLeaves;
            } else {
                Tree t2 = t.firstChild();
                if (t2.isLeaf()) {
                    ++numLeaves;
                    leafEg = t;
                } else if (t2.isPreTerminal()) {
                    ++numNonPhrasal;
                }
                starts.incrementCount(t2.value());
            }
            for (Tree subtree : t) {
                if (subtree.isLeaf()) {
                    ++numWords;
                    words.add(subtree.value());
                    continue;
                }
                if (subtree.isPreTerminal()) {
                    ++numTags;
                    tags.incrementCount(subtree.value());
                    continue;
                }
                if (subtree.isPhrasal()) {
                    cats.incrementCount(subtree.value());
                    continue;
                }
                throw new IllegalStateException("Bad tree in treebank!: " + subtree);
            }
        }
        StringWriter sw = new StringWriter(2000);
        PrintWriter pw = new PrintWriter(sw);
        NumberFormat nf = NumberFormat.getNumberInstance();
        nf.setMaximumFractionDigits(0);
        pw.println("Treebank has " + numTrees + " trees and " + numWords + " words (tokens)");
        if (numTags != numWords) {
            pw.println("  Warning! numTags differs and is " + numTags);
        }
        if (roots.size() == 1) {
            String root = (String)roots.keySet().toArray()[0];
            pw.println("  The root category is: " + root);
        } else {
            pw.println("  Warning! " + roots.size() + " different roots in treebank: " + roots.toString(nf));
        }
        if (numNonUnary > 0) {
            pw.println("  Warning! " + numNonUnary + " trees without unary initial rewrite.  Subtrees: " + nonUnaries.toString(nf));
        }
        if (numUnenclosedLeaves > 0 || numLeaves > 0 || numNonPhrasal > 0) {
            pw.println("  Warning! Non-phrasal trees: " + numUnenclosedLeaves + " bare leaves; " + numLeaves + " root rewrites as leaf; and " + numNonPhrasal + " root rewrites as tagged word");
            if (numLeaves > 0) {
                pw.println("  Example bad root rewrites as leaf: " + leafEg);
            }
        }
        pw.println("  " + cats.size() + " phrasal category types, " + tags.size() + " tag types, and " + words.size() + " word types");
        String[] empties = new String[]{"*", "0", "*T*", "*RNR*", "*U*", "*?*", "*EXP*", "*ICH*", "*NOT*", "*PPA*", "*OP*", "*pro*", "*PRO*"};
        HashSet<String> knownEmpties = new HashSet<String>(Arrays.asList(empties));
        Set emptiesIntersection = Sets.intersection(words, knownEmpties);
        if (emptiesIntersection.size() > 0) {
            pw.println("  Caution! " + emptiesIntersection.size() + " word types are known empty elements: " + emptiesIntersection);
        }
        if ((joint = Sets.intersection(cats.keySet(), tags.keySet())).size() > 0) {
            pw.println("  Warning! " + joint.size() + " items are tags and categories: " + joint);
        }
        pw.println("    Cats: " + cats.toString(nf));
        pw.println("    Tags: " + tags.toString(nf));
        pw.println("    " + starts.size() + " start categories: " + starts.toString(nf));
        return sw.toString();
    }

    @Override
    public boolean remove(Object o) {
        throw new UnsupportedOperationException("Treebank is read-only");
    }

    private static final class CounterTreeProcessor
    implements TreeVisitor {
        int i;

        private CounterTreeProcessor() {
        }

        public void visitTree(Tree t) {
            ++this.i;
        }

        public int total() {
            return this.i;
        }
    }
}

