package edu.mit.jmwe.detect.score;

import edu.mit.jmwe.data.IMWE;
import edu.mit.jmwe.data.IToken;
import edu.mit.jmwe.data.MWEPOS;
import edu.mit.jmwe.data.StopWords;
import edu.mit.jmwe.util.JWIPOS;
import edu.mit.jwi.IDictionary;
import edu.mit.jwi.item.IIndexWord;
import edu.mit.jwi.item.IWordID;
import edu.mit.jwi.item.POS;
import edu.mit.jwi.morph.IStemmer;
import edu.mit.jwi.morph.WordnetStemmer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;

/* loaded from: input_file:edu/mit/jmwe/detect/score/LeskScore.class */
public class LeskScore<T extends IToken> extends AbstractScorer<IMWE<T>> {
    protected final Set<String> contextWords;
    protected final IDictionary dict;
    protected final IStemmer stemmer;
    protected static final Pattern whitespace = Pattern.compile("\\s+");
    protected static final Pattern punctuation = Pattern.compile("\\p{Punct}");

    public LeskScore(List<T> list, IDictionary iDictionary) {
        if (iDictionary == null) {
            throw new NullPointerException();
        }
        this.dict = iDictionary;
        this.stemmer = new WordnetStemmer(iDictionary);
        StringBuilder sb = new StringBuilder();
        Iterator<T> it = list.iterator();
        while (it.hasNext()) {
            sb.append(it.next().getForm() + " ");
        }
        this.contextWords = Collections.unmodifiableSet(getStemmedWords(getContentWords(sb.toString())));
    }

    @Override // edu.mit.jmwe.detect.score.IScorer
    public double score(IMWE<T> imwe) {
        List<String> glosses = getGlosses(imwe.getEntry().getForm(), imwe.getEntry().getPOS());
        if (glosses.isEmpty()) {
            return 0.0d;
        }
        double d = 0.0d;
        Iterator<String> it = glosses.iterator();
        while (it.hasNext()) {
            double overlap = overlap(it.next());
            if (overlap > d) {
                d = overlap;
            }
        }
        return d;
    }

    protected List<String> getContentWords(String str) {
        LinkedList linkedList = new LinkedList(Arrays.asList(whitespace.split(punctuation.matcher(str.toLowerCase()).replaceAll(""))));
        Set<String> stopWords = getStopWords();
        Iterator it = linkedList.iterator();
        while (it.hasNext()) {
            if (stopWords.contains(it.next())) {
                it.remove();
            }
        }
        return linkedList;
    }

    protected Set<String> getStopWords() {
        return StopWords.get();
    }

    protected List<String> getGlosses(String str, MWEPOS mwepos) {
        IIndexWord indexWord;
        POS pos = mwepos.getIdentifier() == 'P' ? POS.NOUN : JWIPOS.toPOS(mwepos);
        if (pos != null && (indexWord = this.dict.getIndexWord(str, pos)) != null) {
            ArrayList arrayList = new ArrayList();
            Iterator it = indexWord.getWordIDs().iterator();
            while (it.hasNext()) {
                arrayList.add(this.dict.getWord((IWordID) it.next()).getSynset().getGloss());
            }
            return arrayList;
        }
        return Collections.emptyList();
    }

    protected int overlap(String str) {
        Set<String> stemmedWords = getStemmedWords(getContentWords(str));
        stemmedWords.retainAll(this.contextWords);
        return stemmedWords.size();
    }

    protected Set<String> getStemmedWords(Collection<String> collection) {
        HashSet hashSet = new HashSet(collection);
        hashSet.removeAll(getStopWords());
        Iterator<String> it = collection.iterator();
        while (it.hasNext()) {
            hashSet.addAll(this.stemmer.findStems(it.next(), (POS) null));
        }
        return hashSet;
    }
}
