/* Filename: KappaCalculator.java
 * Creator: M.A. Finlayson
 * Format: Java 2 v1.6.0
 * Date created: Feb 8, 2010
 */
package nil.ucm.indications2.ui.agreement;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import nil.ucm.indications2.core.rep.IIndicationStructure;
import nil.ucm.indications2.core.rep.IModifier;
import nil.ucm.indications2.core.rep.INucleus;
import nil.ucm.indications2.core.rep.IndicationStructureRep;
import nil.ucm.indications2.core.rep.IndicationType;

import org.eclipse.core.resources.IProject;
import org.eclipse.core.resources.IResource;
import org.eclipse.core.runtime.CoreException;

import edu.mit.discourse.core.rep.referent.IReference;
import edu.mit.discourse.core.rep.referent.IReferent;
import edu.mit.discourse.core.rep.referent.ReferentRep;
import edu.mit.parsing.core.rep.token.IToken;
import edu.mit.parsing.core.rep.token.TokenRep;
import edu.mit.story.core.desc.DescSet;
import edu.mit.story.core.desc.IDesc;
import edu.mit.story.core.desc.IDescSet;
import edu.mit.story.core.desc.ISegment;
import edu.mit.story.core.model.IStoryModel;
import edu.mit.story.core.model.XMLBackedStoryModel;
import edu.mit.story.core.position.IHasPositionSet;
import edu.mit.story.core.rep.character.CharRep;
import edu.mit.story.core.util.StringUtils;
import edu.mit.story.ui.ops.CreateFileOperation;

/** 
 * TODO: Write comment
 *
 * @author M.A. Finlayson
 * @version $Rev$, $LastChangedDate$
 * @since nil.ucm.indications2.ui 1.0.0
 */
public class KappaCalculator {
	
	//Table with the name of the files, and the results for constituents and function kappas
	private Map<String,float[]> totalKappas = new Hashtable<String,float[]>();
	
	public void calculate(IProject one, IProject two){
		
		// call agreement code here
		System.out.println("Calculating agreement between " + one.getName() + " and " + two.getName() + "...");
		
		totalKappas.clear();
		
		try 
		{
			IResource[] filesOne = one.members(IResource.FILE);
			IResource[] filesTwo = two.members(IResource.FILE);
			
			Arrays.sort(filesOne, new ResourceFileNameComparator());
			Arrays.sort(filesTwo, new ResourceFileNameComparator());
			
			String[] namesFilesTwo = getNamesOfFiles(filesTwo);
			
			for (int i=0; i<filesOne.length; i++) 
			{		
				String ext = filesOne[i].getFileExtension();
				
				if (ext == null) continue;
				if (!ext.equalsIgnoreCase("sty")) continue;
				
				int pos = Arrays.binarySearch(namesFilesTwo, filesOne[i].getName());
				if ( pos >= 0) 
				{
					calculateAgreementFiles(filesOne[i],filesTwo[pos]);
				}
			}
		} 
		catch (CoreException e) {
			e.printStackTrace();
		}
		
		String finalKappas = kappasForFile();
		System.out.println("\n---COPY AND PASTE IN EXCEL:\n\n" + finalKappas);
	}

	/**
	 * TODO: Write comment
	 *
	 * @param filesTwo
	 * @return
	 * @since nil.ucm.indications2.ui 1.0.0
	 */
	private String[] getNamesOfFiles(IResource[] filesTwo) {
		
		String[] result = new String[filesTwo.length];
		
		for (int i=0; i< filesTwo.length; i++) {
			result[i] = filesTwo[i].getName();
		}
		
		return result;
	}

	/**
	 * TODO: Write comment
	 *
	 * @param resource
	 * @param resource2
	 * @since nil.ucm.indications2.ui 1.0.0
	 */
	private void calculateAgreementFiles(IResource resource1, IResource resource2)
	{
		StringBuilder builder = new StringBuilder();
		StringUtils.loadStreamIntoBuffer(builder, CreateFileOperation.getContent(resource1.getRawLocation()));
		String xmlOne = builder.toString();
	
		builder = new StringBuilder();
		StringUtils.loadStreamIntoBuffer(builder, CreateFileOperation.getContent(resource2.getRawLocation()));
		String xmlTwo = builder.toString();
		
		XMLBackedStoryModel modelOne = new XMLBackedStoryModel(CharRep.getInstance());
		XMLBackedStoryModel modelTwo = new XMLBackedStoryModel(CharRep.getInstance());
		
		modelOne.setXML(xmlOne);
		modelTwo.setXML(xmlTwo);
		
		System.out.println("-- " + resource1.getName() + " --");
		
		List<long[]> pairedTokens = tokenPairing(modelOne,modelTwo);
		System.out.println("TOKENS: " + printPairs(pairedTokens));
		
		List<long[]> pairedReferences = referencePairing(modelOne,modelTwo,pairedTokens);
		System.out.println("REFERENCES: " + printPairs(pairedReferences));
		
		List<long[]> pairedIndications = indicationPairing(modelOne,modelTwo,pairedReferences);
		System.out.println("INDICATIONS: " + printPairs(pairedIndications));
		
		float kappaConst = calculateKappaConstituents(modelOne,modelTwo,pairedIndications,pairedTokens);
		
		float kappaFunctions = calculateKappaFunctions(modelOne,modelTwo,pairedIndications,pairedTokens);
		
		System.out.println("Kappa constituents: " + kappaConst + "\nKappa functions: " + kappaFunctions);
		
		float[] kappas = new float[2];
		kappas[0] = kappaConst;
		kappas[1] = kappaFunctions;
		this.totalKappas.put(resource1.getName(), kappas);
		
	}

	/**
	 * TODO: Write comment
	 *
	 * @return
	 * @since nil.ucm.indications2.ui 1.0.0
	 */
	private String kappasForFile() {
		
		String result = "Name\tConstituents Kappa\tFunction Kappa\n";
		
		for (String file : totalKappas.keySet()) {
			
			float[] kappas = totalKappas.get(file);
			result = result + file + "\t" + kappas[0] + "\t" + kappas[1] + "\n";
			result = result.replace('.',',');
		}
		
		return result;
	}

	/**
		 * TODO: Write comment
		 *
		 * @param modelOne
		 * @param modelTwo
		 * @param pairedIndications
		 * @param pairedTokens 
		 * @return
		 * @since nil.ucm.indications2.ui 1.0.0
		 */
		@SuppressWarnings("unchecked")
		private float calculateKappaFunctions(IStoryModel modelOne,IStoryModel modelTwo, List<long[]> pairedIndications, List<long[]> pairedTokens) {
			
			//The matrix used for calculating the kappa for the functions is:
			//
			//				DI		DE		OT
			// ind207n1		0		2		0
			// ind207m1		1		1		0
			// ...
			// There is a row for each nucleus and modifier that was agreed by the annotators. We calculate how
			//many annotators considered that nucleus or modifier as distinctive, descriptive or other.
			
			Map<String,short[]> constituentsTable = new Hashtable<String,short[]>();
			
			for (long[] pairInd : pairedIndications) {
				
				//If two indications are paired is because they contain the same tokens
				IIndicationStructure indOne = (IIndicationStructure)modelOne.getData().getDescription(pairInd[0]).getData();
				IIndicationStructure indTwo = (IIndicationStructure)modelTwo.getData().getDescription(pairInd[1]).getData();
			
				for (INucleus nucOne : indOne.getNuclei()) {
					
					boolean found = false;
					INucleus nucTwo = null;
					short[] types = {0,0,0};
					
					Iterator<INucleus> itNucTwo = (Iterator<INucleus>) indTwo.getNuclei().iterator();
					while (itNucTwo.hasNext() && !found) {
						//Two nucleus are equal if they have the same equivalent tokens
						nucTwo = itNucTwo.next();
						found = equalsInTokens(nucOne.getSegments(),nucTwo.getSegments(),pairedTokens);
					}
					
					if (found) { //Nuclei are equal
						
						types = updateTypes(nucOne.getType(),types);
						types = updateTypes(nucTwo.getType(),types);
						constituentsTable.put(nucOne.getDisplayText() + "-" + pairInd[0], types);
					}
				}
				
				for (IModifier modOne : indOne.getModifiers()) {
					
					boolean found = false;
					IModifier modTwo = null;
					short[] types = {0,0,0};
					
					Iterator<IModifier> itModTwo = (Iterator<IModifier>) indTwo.getModifiers().iterator();
					while (itModTwo.hasNext() && !found) {
						//Two nucleus are equal if they have the same equivalent tokens
						modTwo = itModTwo.next();
						found = equalsInTokens(modOne.getSegments(),modTwo.getSegments(),pairedTokens);
					}
					
					if (found) { //Nuclei are equal
						
						types = updateTypes(modOne.getType(),types);
						types = updateTypes(modTwo.getType(),types);
						constituentsTable.put(modOne.getDisplayText() + "-" + pairInd[0], types);
					}
				}
			}
			
			System.out.println(tableConsToString(constituentsTable));
			
			short[][] mat = createMatrix(constituentsTable.size(),constituentsTable.values());
			
	//		System.out.println(printMatrix(mat));
			
			return FleissKappa.computeKappa(mat);
		}

	private short[][] createMatrix(int size, Collection<short[]> values) {
		
		short[][] mat = new short[size][3];
		int counter = 0;
		for (short[] tuple : values) {
			
			mat[counter] = tuple;
			counter++;
		}
		
		return mat;
	}

	/**
	 * TODO: Write comment
	 *
	 * @param constituentsTable
	 * @return
	 * @since nil.ucm.indications2.ui 1.0.0
	 */
	private String tableConsToString(Map<String, short[]> constituentsTable) {
	
		String result = "";
		
		for (String id : constituentsTable.keySet()) {
			
			short[] pair = constituentsTable.get(id);
			result = result + id + "=[" + pair[0] + "," + pair[1] + "," + pair[2] + "], ";
		}
		
		return result;
	}

	/**
	 * TODO: Write comment
	 *
	 * @param segments
	 * @param segments2
	 * @param pairedTokens 
	 * @return
	 * @since nil.ucm.indications2.ui 1.0.0
	 */
	private boolean equalsInTokens(IHasPositionSet<ISegment> segments,	IHasPositionSet<ISegment> segments2, List<long[]> pairedTokens) {
		
		if (segments.size() != segments2.size()) return false;
		
		Iterator<ISegment> itSegOne = segments.iterator();
		Iterator<ISegment> itSegTwo = segments2.iterator();
		while (itSegOne.hasNext() && itSegTwo.hasNext()) {
			
			ISegment segOne = itSegOne.next();
			ISegment segTwo = itSegTwo.next();
			
			if (segOne.getDescs().size() != segTwo.getDescs().size()) return false;
			
			Iterator<IDesc> itTokensOne = segOne.getDescs().iterator();
			Iterator<IDesc> itTokensTwo = segTwo.getDescs().iterator();
			
			while (itTokensOne.hasNext() && itTokensTwo.hasNext()) {
				
				IDesc tokenOne = itTokensOne.next();
				IDesc tokenTwo = itTokensTwo.next();
				
				if (equivalentTokenId(tokenOne.getID(), pairedTokens) != tokenTwo.getID()) return false;
			}
		}
		
		return true;
	}

	private short[] updateTypes(IndicationType type, short[] types) {
		
		switch (type) {
			case DISTINCTIVE: types[0] = (short) (types[0] + 1); break;
			case DESCRIPTIVE: types[1] = (short) (types[1] + 1); break;
			default: types[2] = (short) (types[2] + 1); break;
		}
		
		return types;
	}

	/**
		 * TODO: Write comment
		 * @param modelTwo 
		 * @param modelOne 
		 *
		 * @param pairedIndications
		 * @param pairedTokens 
		 * @param pairedIndications2 
		 * @return
		 * @since nil.ucm.indications2.ui 1.0.0
		 */
		private float calculateKappaConstituents(IStoryModel modelOne, IStoryModel modelTwo, List<long[]> pairedIndications, List<long[]> pairedTokens) {
			
			//The matrix used for calculating the kappa for the constituents is:
			//
			//           Nucleus	Modifier	
			// ind207t3		2			0	
			// ind207t2		1			1	
			// ...
			// There is a row for each token of each indication. We calculate how many annotators considered
			//that token as nucleus or modifier
			
			Map<Long,short[]> tokenTable = new Hashtable<Long,short[]>();
			
			for (long[] pairInd : pairedIndications) {
				
				//If two indications are paired is because they contain the same tokens
				IIndicationStructure indOne = (IIndicationStructure)modelOne.getData().getDescription(pairInd[0]).getData();
				IIndicationStructure indTwo = (IIndicationStructure)modelTwo.getData().getDescription(pairInd[1]).getData();
	
				//We add in the list all the tokens in all the segments of all nuclei and modifiers
				for (INucleus nuc : indOne.getNuclei()) {
					
					for (ISegment seg : nuc.getSegments()) {
						
						for (IDesc token : seg.getDescs()) {
							//The tokens are identified in the table with the id corresponding to the second file
							long tokenId = equivalentTokenId(token.getID(), pairedTokens);
							short[] nucModCount = new short[2];
							nucModCount[0] = 1;
							nucModCount[1] = 0;
							tokenTable.put(tokenId, nucModCount);
						}
					}
					
				}
				
				for (IModifier mod : indOne.getModifiers()) {
					
					for (ISegment seg : mod.getSegments()) {
						
						for (IDesc token : seg.getDescs()) {
							//The tokens are identified in the table with the id corresponding to the second file
							long tokenId = equivalentTokenId(token.getID(), pairedTokens);
							short[] nucModCount = new short[2];
							nucModCount[0] = 0;
							nucModCount[1] = 1;
							tokenTable.put(tokenId, nucModCount);
						}
					}
					
				}
				
				//We add in the list all the tokens in all the segments of all nuclei and modifiers of the second file
				for (INucleus nuc : indTwo.getNuclei()) {
					
					for (ISegment seg : nuc.getSegments()) {
						
						for (IDesc token : seg.getDescs()) {
							//The tokens are identified in the table with the id corresponding to the second file
							long tokenId = token.getID();
							short[] nucModCount = tokenTable.get(tokenId);
							if (nucModCount != null) //This happens when the token did not exist in the previous file
								nucModCount[0] = (short) (nucModCount[0] + 1);
						}
					}
					
				}
				
				for (IModifier mod : indTwo.getModifiers()) {
					 
					for (ISegment seg : mod.getSegments()) {
						
						for (IDesc token : seg.getDescs()) {
							//The tokens are identified in the table with the id corresponding to the second file
							long tokenId = token.getID();
							short[] nucModCount = tokenTable.get(tokenId);
							if (nucModCount != null) //This happens when the token did not exist in the previous file
								nucModCount[1] = (short) (nucModCount[1] + 1);
						}
					}
					
				}
				
				//It is possible to have tokens that are part of a nucleus or modifier in one of the files, but are not annotated in the other.
				//Each line in tokenTable must sum up 2 (because we have two columns). If not, the kappa algorithm crashes.
				List<Long> toRemove = new ArrayList<Long>();
				for (long id : tokenTable.keySet()) {
					
					short[] nucModCount = tokenTable.get(id);
					if (nucModCount[0] + nucModCount[1] != 2)
						toRemove.add(id);
				}
				
				for (long id : toRemove) {
					tokenTable.remove(id);
				}
			}
			
			System.out.println(tableTokensToString(tokenTable));
			
			short[][] mat = createMatrix(tokenTable.size(),tokenTable.values());
			
	//		System.out.println(printMatrix(mat));
			
			return FleissKappa.computeKappa(mat);
		
		}

	/**
	 * TODO: Write comment
	 *
	 * @param tokenTable
	 * @return
	 * @since nil.ucm.indications2.ui 1.0.0
	 */
	private String tableTokensToString(Map<Long, short[]> tokenTable) {
		
		String result = "";
		
		for (long id : tokenTable.keySet()) {
			
			short[] pair = tokenTable.get(id);
			result = result + id + "=[" + pair[0] + "," + pair[1] + "], ";
		}
		
		return result;
	}

	/**
	 * TODO: Write comment
	 *
	 * @param modelOne
	 * @param modelTwo
	 * @param pairedReferences
	 * @return
	 * @since nil.ucm.indications2.ui 1.0.0
	 */
	private List<long[]> indicationPairing(IStoryModel modelOne,IStoryModel modelTwo, List<long[]> pairedReferences) {
		
		List<long[]> pairedIndications = new ArrayList<long[]>();
		
		List<IReference> refOneNoInd = new ArrayList<IReference>();
		List<IReference> refTwoNoInd = new ArrayList<IReference>();
		
		//All the references must have an indication, and only one. Therefore we can iterate over the references
		//  pairs and pair the corresponding indications
		
		for (long[] pairRef : pairedReferences) {
			
			IReference refOne = ((IReferent)modelOne.getData().getDescription(pairRef[0]).getData()).getReference((int) pairRef[1]);
			IReference refTwo = ((IReferent)modelTwo.getData().getDescription(pairRef[2]).getData()).getReference((int) pairRef[3]);
			
			IDescSet indOneSet = modelOne.getData().getDescriptions(IndicationStructureRep.getInstance()).matchSet(refOne);
			IDescSet indTwoSet = modelTwo.getData().getDescriptions(IndicationStructureRep.getInstance()).matchSet(refTwo);
			
			if (indOneSet.size() != 0 && indTwoSet.size() != 0) {
			
				IDesc indOne = indOneSet.first();
				IDesc indTwo = indTwoSet.first();
				
				long[] pairInd = new long[2];
				pairInd[0] = indOne.getID();
				pairInd[1] = indTwo.getID();
				
				pairedIndications.add(pairInd);
			}
			
			if (indOneSet.size() == 0) {
				refOneNoInd.add(refOne);
			}
			if (indTwoSet.size() == 0) {
				refTwoNoInd.add(refTwo);
			}
		}
		
		System.out.println("References in text one without indication: " + refOneNoInd.toString());
		System.out.println("References in text two without indication: " + refTwoNoInd.toString());
		
		return pairedIndications;
	}

	/**
	 * TODO: Write comment
	 *
	 * @param modelOne
	 * @param modelTwo
	 * @param pairedTokens
	 * @since nil.ucm.indications2.ui 1.0.0
	 */
	private List<long[]> referencePairing(IStoryModel modelOne,IStoryModel modelTwo, List<long[]> pairedTokens) {
		
		List<long[]> pairedReferences = new ArrayList<long[]>();
		
		List<IReference> referencesOne = getAllReferences(modelOne);
		List<IReference> referencesTwo = getAllReferences(modelTwo);
		
		//The list of all references is ordered by offset and length in the text
		Collections.sort(referencesOne, new ReferenceComparator());
		Collections.sort(referencesTwo, new ReferenceComparator());
		
		//For each reference in the first text, we search in the second text if there is an equivalent
		// reference taking into account the token pairings.
		
		//The list of references in modelOne are iterated one by one. Each of them is searched in order in the
		//  the list of references of modelTwo:
		//  - If the reference is found in modelTwo, it is paired. The corresponding reference in modelTwo is deleted
		//  from the list.
		//  - If the reference is not found in modelTwo, it is stored in a list of not found references.
		//At the end we have two lists with the not found references, one for each file.
		
		List<IReference> notFoundInOne = new ArrayList<IReference>();
		
		for(IReference refOne : referencesOne) {
			
			Iterator<IReference> itReferencesTwo = referencesTwo.iterator();
			boolean found = false;
			IReference refTwo = null;
			
			while (itReferencesTwo.hasNext() && !found) {
				
				refTwo = itReferencesTwo.next();
				if (equivalentReferences(refOne,refTwo,pairedTokens)) found = true;
			}
			
			if (found) {
				
				long[] tuple = new long[4]; //[referentOne,referenceOne,referentTwo,referenceTwo]
				tuple[0] = modelOne.getData().getDescriptions(ReferentRep.getInstance()).matchSet(refOne.getParent().calculatePosition()).first().getID();
				tuple[1] = refOne.getID();
				tuple[2] = modelTwo.getData().getDescriptions(ReferentRep.getInstance()).matchSet(refTwo.getParent().calculatePosition()).first().getID();
				tuple[3] = refTwo.getID();
				pairedReferences.add(tuple);
				referencesTwo.remove(refTwo);
			}
			else {
				notFoundInOne.add(refOne);
			}
			
	
		}
		
		System.out.println("References in text one without pairing: " + notFoundInOne.toString());
		System.out.println("References in text two without pairing: " + referencesTwo.toString());
		
		return pairedReferences;
	}

	/**
	 * TODO: Write comment
	 *
	 * @param refOne
	 * @param refTwo
	 * @param pairedTokens
	 * @return
	 * @since nil.ucm.indications2.ui 1.0.0
	 */
	private boolean equivalentReferences(IReference refOne, IReference refTwo,List<long[]> pairedTokens) {
		
		IHasPositionSet<ISegment> segmentsOne = refOne.getSegments();
		IHasPositionSet<ISegment> segmentsTwo = refTwo.getSegments();
		
		if (segmentsOne.size() != segmentsTwo.size()) return false;
		
		//Taking into account the token pairings, we check if the segments of both references are equivalent
		boolean equalSegs = true;
		Iterator<ISegment> itSegOne = segmentsOne.iterator();
		Iterator<ISegment> itSegTwo = segmentsTwo.iterator();
		while (itSegOne.hasNext() && itSegTwo.hasNext() && equalSegs) {
			
			ISegment segOne = itSegOne.next();
			ISegment segTwo = itSegTwo.next();
			
			IDescSet tokensOne = segOne.getDescs();
			IDescSet tokensTwo = segTwo.getDescs();
			
			if (tokensOne.size() != tokensTwo.size()) return false;
			
			boolean equalTokens = true;
			Iterator<IDesc> itTokensOne = tokensOne.iterator();
			Iterator<IDesc> itTokensTwo = tokensTwo.iterator();
			while (itTokensOne.hasNext() && itTokensTwo.hasNext() && equalTokens) {
				
				IDesc tokenOne = itTokensOne.next();
				IDesc tokenTwo = itTokensTwo.next();
				
				long tokenOneId = equivalentTokenId(tokenOne.getID(),pairedTokens);
				
				equalTokens = (tokenOneId == tokenTwo.getID());
			}
			
			equalSegs = equalTokens;
		}
				
		return equalSegs;
	}

	/**
	 * TODO: Write comment
	 *
	 * @param id
	 * @param pairedTokens
	 * @return
	 * @since nil.ucm.indications2.ui 1.0.0
	 */
	private long equivalentTokenId(long id, List<long[]> pairedTokens) {
		
		long result = -1;
		
		for (long[] pair : pairedTokens) {
			
			if (pair[0] == id) result = pair[1];
		}
		
		return result;
	}

	/**
	 * TODO: Write comment
	 *
	 * @param modelOne
	 * @return
	 * @since nil.ucm.indications2.ui 1.0.0
	 */
	private List<IReference> getAllReferences(IStoryModel model) {
		
		List<IReference> result = new ArrayList<IReference>();
		
		for(IDesc desc : model.getData().getDescriptions(ReferentRep.getInstance())){
			for(IReference ref : ((IReferent)desc.getData()).getReferences()){
				result.add(ref);
			}
		}
		
		return result;
	}

	/**
	 * TODO: Write comment
	 *
	 * @param pairedTokens
	 * @return
	 * @since nil.ucm.indications2.ui 1.0.0
	 */
	private String printPairs(List<long[]> pairs) {
		
		String result = "(";
		
		for (int i=0; i<pairs.size(); i++) {
			result = result + Arrays.toString(pairs.get(i)) + ",";
		}
		
		result = result + ")";
		
		return result;
	}

	//	private String printMatrix(short[][] matrix) {
	//		
	//		String result = "";
	//		
	//		for (int i=0; i<matrix.length; i++) {
	//			
	//			result = result + Arrays.toString(matrix[i]) + "\n";
	//		}
	//		
	//		return result;
	//	}
	
	/**
	 * TODO: Write comment
	 *
	 * @param modelOne
	 * @param modelTwo
	 * @return
	 * @since nil.ucm.indications2.ui 1.0.0
	 */
	private List<long[]> tokenPairing(IStoryModel modelOne,	IStoryModel modelTwo) {
		
		List<long[]> pairedTokens = new ArrayList<long[]>();
		
		IDescSet tokensInOne = modelOne.getData().getDescriptions(TokenRep.getInstance());
		IDescSet tokensInTwo = modelTwo.getData().getDescriptions(TokenRep.getInstance());
		
		//The list of tokens in modelOne are iterated one by one. Each of them is searched in order in the
		//  the list of tokens of modelTwo:
		//  - If the token is found in modelTwo, it is paired. The corresponding token in modelTwo is deleted
		//  from the set.
		//  - If the token is not found in modelTwo, it is stored in a list of not found tokens.
		//At the end we have two lists with the not found tokens, one for each file.
		
		IDescSet notFoundInOne = new DescSet(TokenRep.getInstance());
		
		Iterator<IDesc> itTokensOne = tokensInOne.iterator();
		while (itTokensOne.hasNext()) {
			
			IDesc tokenDescOne = itTokensOne.next();
			
			Iterator<IDesc> itTokensTwo = tokensInTwo.iterator();
			boolean found = false;
			IDesc tokenDescTwo = null;
			
			while (itTokensTwo.hasNext() && !found) {
				
				tokenDescTwo = itTokensTwo.next();
				if (((IToken)tokenDescOne.getData()).getSurface().equalsIgnoreCase(((IToken)tokenDescTwo.getData()).getSurface()) ) {
					
					found = true;
				}
			}
			
			if (found) {
				
				long[] pair = new long[2];
				pair[0] = tokenDescOne.getID();
				pair[1] = tokenDescTwo.getID();
				pairedTokens.add(pair);
				tokensInTwo.remove(tokenDescTwo);
			}
			else {
				
				notFoundInOne.add(tokenDescOne);
			}
			
		}
		
		System.out.println("Tokens in text one without pairing: " + notFoundInOne.toString());
		System.out.println("Tokens in text two without pairing: " + tokensInTwo.toString());
		
		return pairedTokens;
	}

	protected class ResourceFileNameComparator implements Comparator<IResource> {
		
		public int compare(IResource file1, IResource file2) {
			
			return (file1.getName().compareTo(file2.getName()));
		}
	}

	protected class ReferenceComparator implements Comparator<IReference> {
		
		public int compare(IReference ref1, IReference ref2) {
			
			if (ref1.getOffset() < ref2.getOffset()) {
				return -1;
			}
			else if (ref1.getOffset() > ref2.getOffset()) {
				return 1;
			}
			else {
				if (ref1.getLength() <= ref2.getLength()) return -1;
				else return 1;
			}
		}
	}
	

}
