/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.io.NumberRangeFileFilter;
import edu.stanford.nlp.ling.StringLabelFactory;
import edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams;
import edu.stanford.nlp.stats.Counter;
import edu.stanford.nlp.stats.Counters;
import edu.stanford.nlp.trees.BobChrisTreeNormalizer;
import edu.stanford.nlp.trees.DiskTreebank;
import edu.stanford.nlp.trees.LabeledScoredTreeFactory;
import edu.stanford.nlp.trees.MemoryTreebank;
import edu.stanford.nlp.trees.PennTreeReader;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeReader;
import edu.stanford.nlp.trees.TreeReaderFactory;
import edu.stanford.nlp.trees.TreeVisitor;
import edu.stanford.nlp.trees.Treebank;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.PriorityQueue;
import java.io.FileFilter;
import java.io.Reader;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

public class ParentAnnotationStats
implements TreeVisitor {
    private TreebankLanguagePack tlp;
    private static boolean doTags = false;
    private Map nodeRules = new HashMap();
    private Map pRules = new HashMap();
    private Map gPRules = new HashMap();
    private Map tagNodeRules = new HashMap();
    private Map tagPRules = new HashMap();
    private Map tagGPRules = new HashMap();
    public static final double[] CUTOFFS = new double[]{100.0, 200.0, 500.0, 1000.0};
    public static final double SUPPCUTOFF = 100.0;

    private ParentAnnotationStats() {
        this(null);
    }

    private ParentAnnotationStats(TreebankLanguagePack tlp) {
        this.tlp = tlp;
    }

    public void visitTree(Tree t) {
        this.processTreeHelper("TOP", "TOP", t);
    }

    public static List kidLabels(Tree t) {
        Tree[] kids = t.children();
        ArrayList<String> l = new ArrayList<String>(kids.length);
        for (int i = 0; i < kids.length; ++i) {
            l.add(kids[i].label().value());
        }
        return l;
    }

    public void processTreeHelper(String gP, String p, Tree t) {
        if (!(t.isLeaf() || !doTags && t.isPreTerminal())) {
            Map gpr;
            Map pr;
            Map nr;
            if (t.isPreTerminal()) {
                nr = this.tagNodeRules;
                pr = this.tagPRules;
                gpr = this.tagGPRules;
            } else {
                nr = this.nodeRules;
                pr = this.pRules;
                gpr = this.gPRules;
            }
            String n = t.label().value();
            if (this.tlp != null) {
                p = this.tlp.basicCategory(p);
                gP = this.tlp.basicCategory(gP);
            }
            List kidn = ParentAnnotationStats.kidLabels(t);
            Counter<List> cntr = (Counter<List>)nr.get(n);
            if (cntr == null) {
                cntr = new Counter<List>();
                nr.put(n, cntr);
            }
            cntr.incrementCount(kidn);
            ArrayList<String> pairStr = new ArrayList<String>(2);
            pairStr.add(n);
            pairStr.add(p);
            cntr = (Counter<List>)pr.get(pairStr);
            if (cntr == null) {
                cntr = new Counter<List>();
                pr.put(pairStr, cntr);
            }
            cntr.incrementCount(kidn);
            ArrayList<String> tripleStr = new ArrayList<String>(3);
            tripleStr.add(n);
            tripleStr.add(p);
            tripleStr.add(gP);
            cntr = (Counter<List>)gpr.get(tripleStr);
            if (cntr == null) {
                cntr = new Counter<List>();
                gpr.put(tripleStr, cntr);
            }
            cntr.incrementCount(kidn);
            Tree[] kids = t.children();
            for (int i = 0; i < kids.length; ++i) {
                this.processTreeHelper(p, n, kids[i]);
            }
        }
    }

    public void printStats() {
        int i;
        String nd;
        double psd;
        Pair p;
        int i2;
        int size;
        double support2;
        Counter cntr2;
        double support;
        Counter cntr;
        Object node;
        ArrayList<Pair<List, Double>> answers;
        NumberFormat nf = NumberFormat.getNumberInstance();
        nf.setMaximumFractionDigits(2);
        StringBuffer[] javaSB = new StringBuffer[CUTOFFS.length];
        for (int i3 = 0; i3 < CUTOFFS.length; ++i3) {
            javaSB[i3] = new StringBuffer("  private static String[] splitters" + (i3 + 1) + " = new String[] {");
        }
        Counter<List> allScores = new Counter<List>();
        Iterator it = this.nodeRules.keySet().iterator();
        while (it.hasNext()) {
            answers = new ArrayList<Pair<List, Double>>();
            node = (String)it.next();
            cntr = (Counter)this.nodeRules.get(node);
            support = cntr.totalCount();
            System.out.println("Node " + (String)node + " support is " + support);
            for (List key : this.pRules.keySet()) {
                if (!key.get(0).equals(node)) continue;
                cntr2 = (Counter)this.pRules.get(key);
                support2 = cntr2.totalCount();
                double kl = Counters.klDivergence(cntr2, cntr);
                System.out.println("KL(" + key + "||" + (String)node + ") = " + nf.format(kl) + "\t" + "support(" + key + ") = " + support2);
                double score = kl * support2;
                answers.add(new Pair<List, Double>(key, new Double(score)));
                allScores.setCount(key, score);
            }
            System.out.println("----");
            System.out.println("Sorted descending support * KL");
            Collections.sort(answers, new Comparator(){

                public int compare(Object o1, Object o2) {
                    Pair p1 = (Pair)o1;
                    Pair p2 = (Pair)o2;
                    Double p12 = (Double)p1.second();
                    Double p22 = (Double)p2.second();
                    return p22.compareTo(p12);
                }
            });
            size = answers.size();
            for (i2 = 0; i2 < size; ++i2) {
                p = (Pair)answers.get(i2);
                psd = (Double)p.second();
                System.out.println(p.first() + ": " + nf.format(psd));
                if (!(psd >= CUTOFFS[0])) continue;
                List lst = (List)p.first();
                nd = (String)lst.get(0);
                String par = (String)lst.get(1);
                for (int j = 0; j < CUTOFFS.length; ++j) {
                    if (!(psd >= CUTOFFS[j])) continue;
                    javaSB[j].append("\"").append(nd).append("^");
                    javaSB[j].append(par).append("\", ");
                }
            }
            System.out.println();
        }
        it = this.pRules.keySet().iterator();
        while (it.hasNext()) {
            answers = new ArrayList();
            node = (List)it.next();
            cntr = (Counter)this.pRules.get(node);
            support = cntr.totalCount();
            if (support < 100.0) continue;
            System.out.println("Node " + node + " support is " + support);
            for (List key : this.gPRules.keySet()) {
                if (!key.get(0).equals(node.get(0)) || !key.get(1).equals(node.get(1))) continue;
                cntr2 = (Counter)this.gPRules.get(key);
                support2 = cntr2.totalCount();
                double kl = Counters.klDivergence(cntr2, cntr);
                System.out.println("KL(" + key + "||" + node + ") = " + nf.format(kl) + "\t" + "support(" + key + ") = " + support2);
                double score = kl * support2;
                answers.add(new Pair<List, Double>(key, new Double(score)));
                allScores.setCount(key, score);
            }
            System.out.println("----");
            System.out.println("Sorted descending support * KL");
            Collections.sort(answers, new Comparator(){

                public int compare(Object o1, Object o2) {
                    Pair p1 = (Pair)o1;
                    Pair p2 = (Pair)o2;
                    Double p12 = (Double)p1.second();
                    Double p22 = (Double)p2.second();
                    return p22.compareTo(p12);
                }
            });
            size = answers.size();
            for (i2 = 0; i2 < size; ++i2) {
                p = (Pair)answers.get(i2);
                psd = (Double)p.second();
                System.out.println(p.first() + ": " + nf.format(psd));
                if (!(psd >= CUTOFFS[0])) continue;
                List lst = (List)p.first();
                nd = (String)lst.get(0);
                String par = (String)lst.get(1);
                String gpar = (String)lst.get(2);
                for (int j = 0; j < CUTOFFS.length; ++j) {
                    if (!(psd >= CUTOFFS[j])) continue;
                    javaSB[j].append("\"").append(nd).append("^");
                    javaSB[j].append(par).append("~");
                    javaSB[j].append(gpar).append("\", ");
                }
            }
            System.out.println();
        }
        System.out.println();
        System.out.println("All scores:");
        PriorityQueue pq = Counters.toPriorityQueue(allScores);
        while (!pq.isEmpty()) {
            Object key = pq.getFirst();
            double score = pq.getPriority(key);
            pq.removeFirst();
            System.out.println(key + "\t" + score);
        }
        System.out.println("  // Automatically generated by ParentAnnotationStats -- preferably don't edit");
        for (i = 0; i < CUTOFFS.length; ++i) {
            int len = javaSB[i].length();
            javaSB[i].replace(len - 2, len, "};");
            System.out.println(javaSB[i]);
        }
        System.out.print("  public static HashSet splitters = new HashSet(Arrays.asList(");
        for (i = CUTOFFS.length; i > 0; --i) {
            if (i == 1) {
                System.out.print("splitters1");
                continue;
            }
            System.out.print("selectiveSplit" + i + " ? splitters" + i + " : (");
        }
        for (i = CUTOFFS.length; i >= 0; --i) {
            System.out.print(")");
        }
        System.out.println(";");
    }

    private void getSplitters(double cutOff, Map nr, Map pr, Map gpr, Set splitters) {
        String par;
        String nd;
        double psd;
        Pair p;
        int i;
        int size;
        double support2;
        Counter cntr2;
        double support;
        Counter cntr;
        Object node;
        ArrayList<Pair<List, Double>> answers;
        Iterator it = nr.keySet().iterator();
        while (it.hasNext()) {
            answers = new ArrayList<Pair<List, Double>>();
            node = (String)it.next();
            cntr = (Counter)nr.get(node);
            support = cntr.totalCount();
            for (List key : pr.keySet()) {
                if (!key.get(0).equals(node)) continue;
                cntr2 = (Counter)pr.get(key);
                support2 = cntr2.totalCount();
                double kl = Counters.klDivergence(cntr2, cntr);
                answers.add(new Pair<List, Double>(key, new Double(kl * support2)));
            }
            Collections.sort(answers, new Comparator(){

                public int compare(Object o1, Object o2) {
                    Pair p1 = (Pair)o1;
                    Pair p2 = (Pair)o2;
                    Double p12 = (Double)p1.second();
                    Double p22 = (Double)p2.second();
                    return p22.compareTo(p12);
                }
            });
            size = answers.size();
            for (i = 0; i < size; ++i) {
                p = (Pair)answers.get(i);
                psd = (Double)p.second();
                if (!(psd >= cutOff)) continue;
                List lst = (List)p.first();
                nd = (String)lst.get(0);
                par = (String)lst.get(1);
                String name = nd + "^" + par;
                splitters.add(name);
            }
        }
        it = pr.keySet().iterator();
        while (it.hasNext()) {
            answers = new ArrayList();
            node = (List)it.next();
            cntr = (Counter)pr.get(node);
            support = cntr.totalCount();
            if (support < 100.0) continue;
            for (List key : gpr.keySet()) {
                if (!key.get(0).equals(node.get(0)) || !key.get(1).equals(node.get(1))) continue;
                cntr2 = (Counter)gpr.get(key);
                support2 = cntr2.totalCount();
                double kl = Counters.klDivergence(cntr2, cntr);
                answers.add(new Pair<List, Double>(key, new Double(kl * support2)));
            }
            Collections.sort(answers, new Comparator(){

                public int compare(Object o1, Object o2) {
                    Pair p1 = (Pair)o1;
                    Pair p2 = (Pair)o2;
                    Double p12 = (Double)p1.second();
                    Double p22 = (Double)p2.second();
                    return p22.compareTo(p12);
                }
            });
            size = answers.size();
            for (i = 0; i < size; ++i) {
                p = (Pair)answers.get(i);
                psd = (Double)p.second();
                if (!(psd >= cutOff)) continue;
                List lst = (List)p.first();
                nd = (String)lst.get(0);
                par = (String)lst.get(1);
                String gpar = (String)lst.get(2);
                String name = nd + "^" + par + "~" + gpar;
                splitters.add(name);
            }
        }
    }

    public static void main(String[] args) {
        if (args.length < 1) {
            System.out.println("Usage: java edu.stanford.nlp.parser.lexparser.ParentAnnotationStats [-tags] treebankPath");
        } else {
            int i = 0;
            boolean useCutOff = false;
            double cutOff = 0.0;
            while (args[i].startsWith("-")) {
                if (args[i].equals("-tags")) {
                    doTags = true;
                    ++i;
                    continue;
                }
                if (args[i].equals("-cutOff") && i + 1 < args.length) {
                    useCutOff = true;
                    cutOff = Double.parseDouble(args[i + 1]);
                    i += 2;
                    continue;
                }
                System.err.println("Unknown option: " + args[i]);
                ++i;
            }
            DiskTreebank treebank = new DiskTreebank(new TreeReaderFactory(){

                public TreeReader newTreeReader(Reader in) {
                    return new PennTreeReader(in, new LabeledScoredTreeFactory(new StringLabelFactory()), new BobChrisTreeNormalizer());
                }
            });
            treebank.loadPath(args[i]);
            if (useCutOff) {
                Set splitters = ParentAnnotationStats.getSplitCategories(treebank, doTags, 0, cutOff, cutOff, null);
                System.out.println(splitters);
            } else {
                ParentAnnotationStats pas = new ParentAnnotationStats();
                ((Treebank)treebank).apply(pas);
                pas.printStats();
            }
        }
    }

    public static Set getSplitCategories(Treebank t, double cutOff, TreebankLanguagePack tlp) {
        return ParentAnnotationStats.getSplitCategories(t, true, 0, cutOff, cutOff, tlp);
    }

    public static Set getSplitCategories(Treebank t, boolean doTags, int algorithm, double phrasalCutOff, double tagCutOff, TreebankLanguagePack tlp) {
        ParentAnnotationStats.doTags = doTags;
        ParentAnnotationStats pas = new ParentAnnotationStats(tlp);
        t.apply(pas);
        HashSet splitters = new HashSet();
        pas.getSplitters(phrasalCutOff, pas.nodeRules, pas.pRules, pas.gPRules, splitters);
        pas.getSplitters(tagCutOff, pas.tagNodeRules, pas.tagPRules, pas.tagGPRules, splitters);
        return splitters;
    }

    public static Set getEnglishSplitCategories(String treebankRoot) {
        EnglishTreebankParserParams tlpParams = new EnglishTreebankParserParams();
        MemoryTreebank trees = tlpParams.memoryTreebank();
        trees.loadPath(treebankRoot, (FileFilter)new NumberRangeFileFilter(200, 2199, true));
        return ParentAnnotationStats.getSplitCategories(trees, 300.0, tlpParams.treebankLanguagePack());
    }
}

