/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.ling.HasTag;
import edu.stanford.nlp.parser.lexparser.AbstractTreebankParserParams;
import edu.stanford.nlp.parser.lexparser.BaseLexicon;
import edu.stanford.nlp.parser.lexparser.Lexicon;
import edu.stanford.nlp.parser.lexparser.Options;
import edu.stanford.nlp.process.Function;
import edu.stanford.nlp.trees.DiskTreebank;
import edu.stanford.nlp.trees.HeadFinder;
import edu.stanford.nlp.trees.LabeledScoredTreeFactory;
import edu.stanford.nlp.trees.MemoryTreebank;
import edu.stanford.nlp.trees.PennTreeReader;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeReader;
import edu.stanford.nlp.trees.TreeReaderFactory;
import edu.stanford.nlp.trees.TreeTransformer;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.trees.international.arabic.ArabicHeadFinder;
import edu.stanford.nlp.trees.international.arabic.ArabicTreeNormalizer;
import edu.stanford.nlp.trees.international.arabic.ArabicTreebankLanguagePack;
import edu.stanford.nlp.trees.international.arabic.ArabicTreebankTokenizer;
import edu.stanford.nlp.trees.tregex.ParseException;
import edu.stanford.nlp.trees.tregex.TregexMatcher;
import edu.stanford.nlp.trees.tregex.TregexPattern;
import edu.stanford.nlp.trees.tregex.TregexPatternCompiler;
import edu.stanford.nlp.util.Filter;
import edu.stanford.nlp.util.Pair;
import java.io.PrintWriter;
import java.io.Reader;
import java.io.Serializable;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;

public class ArabicTreebankParserParams
extends AbstractTreebankParserParams {
    private String optionsString = "ArabicTreebankParserParams\n";
    private boolean retainNPTmp = false;
    private boolean retainPRD = false;
    private boolean changeNoLabels = false;
    private boolean collinizerRetainsPunctuation = false;
    private Pattern collinizerPruneRegex = null;
    Class<? extends HeadFinder> headFinderClass = ArabicHeadFinder.class;
    private Map<TregexPattern, Function<TregexMatcher, String>> activeAnnotations = new HashMap<TregexPattern, Function<TregexMatcher, String>>();
    private Map<String, Pair<TregexPattern, Function<TregexMatcher, String>>> annotationPatterns = new HashMap<String, Pair<TregexPattern, Function<TregexMatcher, String>>>();
    private static String genitiveNodeTregexString = "@NP > @NP $- /^N/";
    private static final String copularVerbForms = "/^(kAn|kAnt|ykwn|sykwn|tkwn|ykn|stkwn|ykwnw|ybdw|tbdw|sybdw|stbdw|bdY|ybdy|tbdy|stbdy|sybdy)$/";
    private static final String sbarVerbForms = "/^(qAl|\\>DAf|AEln|\\>wDH|ymkn|\\>Eln|\\*krt|\\>kd|AElnt|Akd|qAlt|\\>DAft|AfAd|y\\*kr|yjb|\\{Etbr|\\>wDHt|AEtbr|sbq|\\*kr|tAbE|nqlt|SrH|r\\>Y|\\>fAd|AfAdt|yqwl|\\>kdt|\\>Elnt|Akdt|yrY|tEtbr|AEtqd|yEtbr|tfyd|ytwqE|AEtbrt|ynbgy|Tlbt|qrr|ktbt|\\>blg|\\>\\$Ar|ywDH|t\\&kd|Tlb|r\\>t|yEny|nryd|nEtbr|yftrD|k\\$f|\\{Etbrt|AwDH|ytEyn|ykfy|y\\&kd|yErf|ydrk|tZhr|tqwl|tbd\\>|nEtqd|nErf|AErf|Elm|Awrdt|AwDHt|AqtrH|yryd|yErfAn|yElm|ybd\\>tstTyE|tHAwl|tEny|nrY|n\\>ml|)$/";
    private static final TregexPatternCompiler tregexPatternCompiler = new TregexPatternCompiler(new ArabicHeadFinder());
    private static final String markPRDverbString = "-markPRDverbs";
    private static final long serialVersionUID = 1L;

    public ArabicTreebankParserParams() {
        super(new ArabicTreebankLanguagePack());
        this.inputEncoding = this.tlp.getEncoding();
        this.outputEncoding = this.tlp.getEncoding();
        this.initializeAnnotationPatterns();
    }

    public TreeReaderFactory treeReaderFactory() {
        return new ArabicTreeReaderFactory(this.retainNPTmp, this.retainPRD, this.changeNoLabels);
    }

    public MemoryTreebank memoryTreebank() {
        return new MemoryTreebank(this.treeReaderFactory());
    }

    public DiskTreebank diskTreebank() {
        return new DiskTreebank(this.treeReaderFactory());
    }

    public HeadFinder headFinder() {
        try {
            return this.headFinderClass.newInstance();
        }
        catch (Exception e) {
            System.err.println("Error while instantiating class " + this.headFinderClass + ": " + e);
            System.err.println("Using ArabicHeadFinder instead.");
            return new ArabicHeadFinder();
        }
    }

    public TreeTransformer collinizer() {
        return new ArabicCollinizer(this.tlp, this.collinizerRetainsPunctuation, this.collinizerPruneRegex);
    }

    public TreeTransformer collinizerEvalb() {
        return this.collinizer();
    }

    public String[] sisterSplitters() {
        return new String[0];
    }

    public Tree transformTree(Tree t, Tree root) {
        String newCategory = t.label().value();
        for (Map.Entry<TregexPattern, Function<TregexMatcher, String>> e : this.activeAnnotations.entrySet()) {
            TregexMatcher m = e.getKey().matcher(root);
            if (!m.matchesAt(t)) continue;
            newCategory = newCategory + e.getValue().apply(m);
        }
        t.label().setValue(newCategory);
        if (t.isPreTerminal()) {
            HasTag lab = (HasTag)((Object)t.label());
            lab.setTag(newCategory);
        }
        return t;
    }

    public void display() {
        System.err.println(this.optionsString);
    }

    private void initializeAnnotationPatterns() {
        try {
            this.annotationPatterns.put("-markFem", new Pair<TregexPattern, SimpleStringFunction>(TregexPattern.compile("__ <<# /p$/"), new SimpleStringFunction("-FEM")));
            this.annotationPatterns.put("-markGappedVP", new Pair<TregexPattern, SimpleStringFunction>(TregexPattern.compile("@VP > @VP $- __ $ /^(CC|CONJ)/ !< /^V/"), new SimpleStringFunction("-gappedVP")));
            this.annotationPatterns.put("-markGappedVPConjoiners", new Pair<TregexPattern, SimpleStringFunction>(TregexPattern.compile("/^(CC|CONJ)/ $ (@VP > @VP $- __ !< /^V/)"), new SimpleStringFunction("-gappedVP")));
            this.annotationPatterns.put("-gpAnnotatePrepositions", new Pair<TregexPattern, AddRelativeNodeFunction>(TregexPattern.compile("IN > (__ > __=gp)"), new AddRelativeNodeFunction("^^", "gp")));
            this.annotationPatterns.put("-genitiveMark", new Pair<TregexPattern, SimpleStringFunction>(TregexPattern.compile(genitiveNodeTregexString), new SimpleStringFunction("-genitive")));
            this.annotationPatterns.put("-markGenitiveParent", new Pair<TregexPattern, SimpleStringFunction>(TregexPattern.compile("@NP < (" + genitiveNodeTregexString + ")"), new SimpleStringFunction("-genitiveParent")));
            this.annotationPatterns.put("-maSdrMark", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("/^N/ <<# (/^t.+y.$/ > @NN)"), new SimpleStringFunction("-maSdr")));
            this.annotationPatterns.put("-mjjMark", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@JJ < /^m/ $+ PP ># ADJP "), new SimpleStringFunction("-mjj")));
            this.annotationPatterns.put("-splitPUNC", new Pair<TregexPattern, AnnotatePunctuationFunction>(tregexPatternCompiler.compile("@PUNC < __=term"), new AnnotatePunctuationFunction()));
            this.annotationPatterns.put("-markPPwithPPdescendant", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ !< @PP << @PP [ >> @PP | == @PP ]"), new SimpleStringFunction("-inPPdominatesPP")));
            this.annotationPatterns.put("-markNPwithSdescendant", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ !< @S << @S [ >> @NP | == @NP ]"), new SimpleStringFunction("-inNPdominatesS")));
            this.annotationPatterns.put("-markContainsVerb", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ << (/^V/ < (__ !< __))"), new SimpleStringFunction("-containsV")));
            this.annotationPatterns.put("-retainNPTmp", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ >># /^NP-TMP/"), new SimpleStringFunction("-TMP")));
            this.annotationPatterns.put("-markRightRecursiveNP", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ <<- @NP [>>- @NP | == @NP]"), new SimpleStringFunction("-rrNP")));
            this.annotationPatterns.put("-markBaseNP", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP !< @NP !< @VP !< SBAR !< ADJP !< ADVP !< S !< QP !< UCP !< PP"), new SimpleStringFunction("-base")));
            this.annotationPatterns.put("-markContainsSBAR", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ << @SBAR"), new SimpleStringFunction("-containsSBAR")));
            this.annotationPatterns.put("-markPhrasalNodesDominatedBySBAR", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ < (__ < __) >> @SBAR"), new SimpleStringFunction("-domBySBAR")));
            this.annotationPatterns.put("-markCoordinateNPs", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP < @CC"), new SimpleStringFunction("-coord")));
            this.annotationPatterns.put("-splitCC", new Pair<TregexPattern, AddRelativeNodeFunction>(tregexPatternCompiler.compile("@CC < __=term"), new AddRelativeNodeFunction("-", "term")));
            this.annotationPatterns.put("-markCopularVerbTags", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("/^V/ < /^(kAn|kAnt|ykwn|sykwn|tkwn|ykn|stkwn|ykwnw|ybdw|tbdw|sybdw|stbdw|bdY|ybdy|tbdy|stbdy|sybdy)$/"), new SimpleStringFunction("-copular")));
            this.annotationPatterns.put("-markSBARVerbTags", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("/^V/ < /^(qAl|\\>DAf|AEln|\\>wDH|ymkn|\\>Eln|\\*krt|\\>kd|AElnt|Akd|qAlt|\\>DAft|AfAd|y\\*kr|yjb|\\{Etbr|\\>wDHt|AEtbr|sbq|\\*kr|tAbE|nqlt|SrH|r\\>Y|\\>fAd|AfAdt|yqwl|\\>kdt|\\>Elnt|Akdt|yrY|tEtbr|AEtqd|yEtbr|tfyd|ytwqE|AEtbrt|ynbgy|Tlbt|qrr|ktbt|\\>blg|\\>\\$Ar|ywDH|t\\&kd|Tlb|r\\>t|yEny|nryd|nEtbr|yftrD|k\\$f|\\{Etbrt|AwDH|ytEyn|ykfy|y\\&kd|yErf|ydrk|tZhr|tqwl|tbd\\>|nEtqd|nErf|AErf|Elm|Awrdt|AwDHt|AqtrH|yryd|yErfAn|yElm|ybd\\>tstTyE|tHAwl|tEny|nrY|n\\>ml|)$/"), new SimpleStringFunction("-SBARverb")));
        }
        catch (ParseException e) {
            System.err.println("Parse exception on annotation pattern initialization:" + e);
        }
    }

    public int setOptionFlag(String[] args, int i) {
        boolean didSomething = true;
        while (i < args.length && didSomething) {
            didSomething = false;
            if (this.annotationPatterns.keySet().contains(args[i])) {
                Pair<TregexPattern, Function<TregexMatcher, String>> p = this.annotationPatterns.get(args[i]);
                this.activeAnnotations.put(p.first(), p.second());
                didSomething = true;
                this.optionsString = this.optionsString + "Option " + args[i] + " added annotation pattern " + p.first() + " with annotation " + p.second() + "\n";
            } else if (args[i].equals("-retainNPTmp")) {
                this.optionsString = this.optionsString + "Retaining NP-TMP marking.\n";
                this.retainNPTmp = true;
                didSomething = true;
            } else if (args[i].equals("-changeNoLabels")) {
                this.optionsString = this.optionsString + "Change no labels.\n";
                this.changeNoLabels = true;
                didSomething = true;
            } else if (args[i].equals(markPRDverbString)) {
                this.optionsString = this.optionsString + "Mark PRD.\n";
                this.retainPRD = true;
                didSomething = true;
            } else if (args[i].equals("-collinizerRetainsPunctuation")) {
                this.optionsString = this.optionsString + "Collinizer retains punctuation.\n";
                this.collinizerRetainsPunctuation = true;
                didSomething = true;
            } else if (args[i].equals("-collinizerPruneRegex")) {
                this.optionsString = this.optionsString + "Collinizer prune regex: " + args[i + 1] + "\n";
                this.collinizerPruneRegex = Pattern.compile(args[i + 1]);
                ++i;
                didSomething = true;
            } else if (args[i].equals("-hf")) {
                try {
                    this.headFinderClass = Class.forName(args[i + 1]).asSubclass(HeadFinder.class);
                    this.optionsString = this.optionsString + "HeadFinder class: " + args[i + 1] + "\n";
                }
                catch (ClassNotFoundException e) {
                    System.err.println("Error -- can't find HeadFinder class" + args[i + 1]);
                }
                ++i;
                didSomething = true;
            }
            if (!didSomething) continue;
            ++i;
        }
        return i;
    }

    public List defaultTestSentence() {
        return Arrays.asList("w", "lm", "tfd", "mElwmAt", "En", "ADrAr", "Aw", "DHAyA", "HtY", "AlAn", ".");
    }

    public static void main(String[] args) {
        int maxLength = Integer.parseInt(args[1]);
        ArabicTreebankParserParams tlpp = new ArabicTreebankParserParams();
        tlpp.setOptionFlag(args, 2);
        DiskTreebank trees = tlpp.diskTreebank();
        trees.loadPath(args[0]);
        PrintWriter pw = tlpp.pw();
        for (Tree t : trees) {
            if (t.yield().size() > maxLength) continue;
            pw.println(t);
        }
    }

    public Lexicon lex() {
        return new BaseLexicon();
    }

    public Lexicon lex(Options.LexOptions op) {
        return new BaseLexicon(op);
    }

    /*
     * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
     */
    private static class AnnotatePunctuationFunction
    implements Function<TregexMatcher, String> {
        static final String key = "term";
        private static final Pattern endOfSentence = Pattern.compile("^(\\.|\\?.*)$");
        private static final Pattern comma = Pattern.compile("^,$");
        private static final Pattern colon = Pattern.compile("^[:;].*$");
        private static final Pattern dash = Pattern.compile("^-.*$");
        private static final Pattern quote = Pattern.compile("^\"$");
        private static final Pattern slash = Pattern.compile("^\\/$");
        private static final Pattern percent = Pattern.compile("^\\%$");
        private static final Pattern ellipses = Pattern.compile("^\\.\\.\\.$");
        private static final Pattern lrb = Pattern.compile("^-LRB-$");
        private static final Pattern rrb = Pattern.compile("^-RRB-$");

        private AnnotatePunctuationFunction() {
        }

        @Override
        public String apply(TregexMatcher m) {
            String punc = m.getNode(key).label().value();
            if (endOfSentence.matcher(punc).matches()) {
                return "-eos";
            }
            if (comma.matcher(punc).matches()) {
                return "-comma";
            }
            if (lrb.matcher(punc).matches()) {
                return "-lrb";
            }
            if (rrb.matcher(punc).matches()) {
                return "-rrb";
            }
            if (dash.matcher(punc).matches()) {
                return "-dash";
            }
            if (quote.matcher(punc).matches()) {
                return "-quote";
            }
            return "";
        }

        public String toString() {
            return "AnnotatePunctuationFunction";
        }
    }

    /*
     * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
     */
    private static class AddRelativeNodeFunction
    implements Function<TregexMatcher, String> {
        private String annotationMark;
        private Object key;

        public AddRelativeNodeFunction(String annotationMark, Object key) {
            this.annotationMark = annotationMark;
            this.key = key;
        }

        @Override
        public String apply(TregexMatcher m) {
            return this.annotationMark + m.getNode(this.key).label().value();
        }

        public String toString() {
            return "AddRelativeNodeFunction[" + this.annotationMark + "," + this.key + "]";
        }
    }

    /*
     * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
     */
    private static class SimpleStringFunction
    implements Function<TregexMatcher, String> {
        private String result;

        public SimpleStringFunction(String result) {
            this.result = result;
        }

        @Override
        public String apply(TregexMatcher tregexMatcher) {
            return this.result;
        }

        public String toString() {
            return "SimpleStringFunction[" + this.result + "]";
        }
    }

    private static class ArabicCollinizer
    implements TreeTransformer,
    Serializable {
        private TreebankLanguagePack tlp;
        private boolean retainPunctuation;
        private Pattern collinizerPruneRegex;

        public ArabicCollinizer(TreebankLanguagePack tlp, boolean retainPunctuation, Pattern collinizerPruneRegex) {
            this.tlp = tlp;
            this.retainPunctuation = retainPunctuation;
            this.collinizerPruneRegex = collinizerPruneRegex;
        }

        public Tree transformTree(Tree t) {
            if (this.tlp.isStartSymbol(t.value())) {
                t = t.firstChild();
            }
            Tree result = t.deepCopy();
            if ((result = result.prune(new Filter<Tree>(){

                @Override
                public boolean accept(Tree tree) {
                    return ArabicCollinizer.this.collinizerPruneRegex == null || tree.label() == null || !ArabicCollinizer.this.collinizerPruneRegex.matcher(tree.label().value()).matches();
                }
            })) == null) {
                return null;
            }
            for (Tree node : result) {
                if (node.label() != null && !node.isLeaf()) {
                    node.label().setValue(this.tlp.basicCategory(node.label().value()));
                }
                if (!node.label().value().equals("ADVP")) continue;
                node.label().setValue("PRT");
            }
            if (this.retainPunctuation) {
                return result;
            }
            return result.prune(new Filter<Tree>(){
                final Filter punctLabelFilter;
                {
                    this.punctLabelFilter = ArabicCollinizer.this.tlp.punctuationTagRejectFilter();
                }

                @Override
                public boolean accept(Tree tree) {
                    return this.punctLabelFilter.accept(tree.value());
                }
            });
        }
    }

    private static class ArabicTreeReaderFactory
    implements TreeReaderFactory,
    Serializable {
        private boolean retainNPTmp;
        private boolean retainPRD;
        private boolean changeNoLabels;

        public ArabicTreeReaderFactory(boolean retainNPTmp, boolean retainPRD, boolean changeNoLabels) {
            this.retainNPTmp = retainNPTmp;
            this.retainPRD = retainPRD;
            this.changeNoLabels = changeNoLabels;
        }

        public TreeReader newTreeReader(Reader in) {
            return new PennTreeReader(in, new LabeledScoredTreeFactory(), new ArabicTreeNormalizer(this.retainNPTmp, this.retainPRD, this.changeNoLabels), new ArabicTreebankTokenizer(in));
        }
    }
}

