/* Filename: IndicationDescFactory.java
 * Creator: Raquel Hervas
 * Format: Java 2 v1.6.0
 * Date created: 26/10/2009
 */
package nil.ucm.indications2.core.rep;

import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Map.Entry;

import nil.ucm.indications2.core.rep.rules.ArticlesAreUsuallyDistinctiveModifiersRule;
import edu.mit.discourse.core.rep.referent.IReference;
import edu.mit.discourse.core.rep.referent.IReferent;
import edu.mit.discourse.core.rep.referent.RefPair;
import edu.mit.discourse.core.rep.referent.ReferentRep;
import edu.mit.parsing.core.rep.pos.IPosTag;
import edu.mit.parsing.core.rep.pos.POSRep;
import edu.mit.parsing.core.rep.token.IToken;
import edu.mit.parsing.core.rep.token.TokenRep;
import edu.mit.story.core.desc.DescSet;
import edu.mit.story.core.desc.IDesc;
import edu.mit.story.core.desc.IDescSet;
import edu.mit.story.core.desc.ISegment;
import edu.mit.story.core.desc.Segment;
import edu.mit.story.core.desc.factory.AbstractDescFactory2;
import edu.mit.story.core.model.IStoryData;
import edu.mit.story.core.model.IStoryModel;
import edu.mit.story.core.model.change.Change;
import edu.mit.story.core.model.change.Flag;
import edu.mit.story.core.model.change.StoryChangeEvent;
import edu.mit.story.core.position.DistinctRegionSet;
import edu.mit.story.core.position.HasPositionSet;
import edu.mit.story.core.position.IHasPosition;
import edu.mit.story.core.position.IHasPositionSet;
import edu.mit.story.core.position.IntervalSet;
import edu.mit.story.core.position.PositionUtils;

/** 
 *   Factory for the Indication Structure representations. This initial version do only
 * recognize the nuclei and modifiers inside a Reference.
 *
 * @author Raquel Hervas
 * @author M.A. Finlayson
 * @version 1.0, (Jan. 11, 2010)
 * @since nil.ucm.indications.core 1.0.0
 */
public class IndicationDescFactory extends	AbstractDescFactory2<IndexedIndication> {

	// Parts of speech we are interested in
	private final Set<String> nucleusTags = new HashSet<String>(Arrays.asList("NN","NNS","NNP","NNPS","PRP"));
	
	public IndicationDescFactory() {
		super(IndicationStructureRep.getInstance(), ReferentRep.getInstance());
	}

	/* 
	 * (non-Javadoc) @see edu.mit.story.core.desc.factory.AbstractDescFactory2#calculateRegions(edu.mit.story.core.model.change.StoryChangeEvent)
	 */
	@Override
	protected DistinctRegionSet calculateRegions(StoryChangeEvent event) {
		
		//Collect the both Referent changes
		Change referentChange = event.getChange(ReferentRep.getInstance());
		
		//Collect the regions that changed
		IHasPositionSet<IDesc> referentRegions = (referentChange == null) ? null : referentChange.getChanges(Flag.DESCS);
		
		//This is what we will return
		IntervalSet result = new IntervalSet();
		
		//Add all referents that overlap the changed intervals
		IDescSet refDescs = event.provider.getData().getDescriptions(ReferentRep.getInstance());
		
		if(referentRegions != null){
			for(IHasPosition region : referentRegions){
				result.addAll(refDescs.openSet(region));
			}	
		}
		
		return result;
	}

	/* 
	 * (non-Javadoc) @see edu.mit.story.core.desc.factory.AbstractDescFactory2#calculateData(edu.mit.story.core.position.IHasPosition, edu.mit.story.core.model.IStoryModel)
	 */
	@Override
	protected IHasPositionSet<IndexedIndication> calculateData(IHasPosition region, IStoryModel model) {
		IDescSet refDescs = model.getData().getDescriptions(ReferentRep.getInstance()).openSet(region);
		
		
		// first get all the references overlapping the region
		Map<RefPair, List<IReference>> overlapMap = new LinkedHashMap<RefPair, List<IReference>>();
		IReferent referent;
		for(IDesc refDesc : refDescs){
			referent = (IReferent)refDesc.getData();
			for(IReference reference : referent.getReferences()){
				if(!PositionUtils.overlapsOpen(reference, region)) continue;
				overlapMap.put(new RefPair(refDesc, reference), new LinkedList<IReference>());
			}
		}
		
		// then calculate which are subreferences of which
		for(Entry<RefPair, List<IReference>> entry : overlapMap.entrySet()){
			for(RefPair pair : overlapMap.keySet()){
				if(entry.getKey() == pair) continue;
				if(PositionUtils.isInside(pair.ref, entry.getKey().ref)) entry.getValue().add(pair.ref);
			}
		}
		
		// return this to caller
		IHasPositionSet<IndexedIndication> result = new HasPositionSet<IndexedIndication>();
		
		// calculate indications
		IDescSet posDescs = model.supports(POSRep.getInstance()) ? model.getData().getDescriptions(POSRep.getInstance()) : new DescSet(POSRep.getInstance());
		IndexedIndication idxInd;
		for(Entry<RefPair, List<IReference>> entry : overlapMap.entrySet()){
			idxInd = calculateIndication(entry.getKey(), entry.getValue(), posDescs, model.getData());
			if(idxInd != null) result.add(idxInd);
		}
		return result;
	}

	/**
	 * TODO: Write comment
	 *
	 * @param desc
	 * @param ref
	 * @param posDescs
	 * @param data
	 * @return
	 * @since nil.ucm.indications2.core 1.0.0
	 */
	protected IndexedIndication calculateIndication(RefPair pair, List<IReference> subRefs, IDescSet posDescs, IStoryData data){
		
		IDescSet tokenDescs = Segment.extractSegmentDescriptions(pair.ref);
		
		List<INucleus> nuclei;
		List<IModifier> modifiers;
		
		// only one token, must be the nucleus
		if(tokenDescs.size() == 1){
			nuclei = Collections.<INucleus>singletonList(new Nucleus(tokenDescs.first(), data));
			modifiers = Collections.emptyList();
			return new IndexedIndication(pair.ref, new IndicationStructure(pair.desc, pair.ref.getID(), nuclei, modifiers, false));
		}
		
		// get map of tokens to tags
		Map<IDesc, String> tagMap = new HashMap<IDesc, String>(tokenDescs.size());
		IDescSet subset;
		String tag;
		for(IDesc tokenDesc : tokenDescs){
			subset = posDescs.matchSet(tokenDesc);
			tag = subset.isEmpty() ? null : ((IPosTag)subset.first().getData()).getTag().getTagString(); 
			tagMap.put(tokenDesc, tag);
		}
		
		nuclei = new LinkedList<INucleus>();
		modifiers = new LinkedList<IModifier>();
		
		// take care of sub-references
		IntervalSet subRefIntervals = new IntervalSet(subRefs);
		IDescSet modTokens;
		ISegment segment;
		for(IHasPosition interval : subRefIntervals){
			modTokens = tokenDescs.closedSet(interval);
			segment = new Segment(modTokens, TokenRep.getInstance(), data);
			modifiers.add(new Modifier(Collections.singleton(segment)));
			for(IDesc modToken : modTokens) tagMap.remove(modToken);
		}
		
		// anything not in a subreference, put it in a modifier or nucleus
		IDescSet nucTokens = new DescSet(TokenRep.getInstance());
		modTokens = new DescSet(TokenRep.getInstance());
		for(Entry<IDesc, String> entry : tagMap.entrySet()){
			
			if(entry.getValue() == null){
				// no tag
				if(!isArticle(entry.getKey())){
					nucTokens.add(entry.getKey());
				} else {
					modTokens.add(entry.getKey());
				}
			} else {
				if(nucleusTags.contains(entry.getValue())){
					nucTokens.add(entry.getKey());
				} else {
					modTokens.add(entry.getKey());
				}	
			}
		}
		
		// do nuclei
		if(!nucTokens.isEmpty()){
			segment = new Segment(nucTokens, TokenRep.getInstance(), data);
			nuclei.add(new Nucleus(Collections.singleton(segment)));
			modTokens.removeAll(segment.getDescs());
		}
		
		// do modifiers
		for(IDesc modToken : modTokens) modifiers.add(new Modifier(modToken, data));
		
		// if no nuclei, pick the last modifier to be the nucleus
		if(nuclei.isEmpty()){
			IModifier mod = modifiers.remove(modifiers.size()-1);
			nuclei.add(new Nucleus(mod.getSegments()));
		}
		return new IndexedIndication(pair.ref, new IndicationStructure(pair.desc, pair.ref.getID(), nuclei, modifiers, false));
	}
	
	protected boolean isArticle(IDesc token){
		String tokenStr = ((IToken)token.getData()).getSurface().toLowerCase();
		boolean result = ArticlesAreUsuallyDistinctiveModifiersRule.articles.contains(tokenStr);
		return result;
	}
}
