package weka.filters.unsupervised.instance;

import java.util.Arrays;
import weka.filters.Filter;
import weka.filters.UnsupervisedFilter;
import weka.core.AttributeStats;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.neighboursearch.NearestNeighbourSearch;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.OptionHandler;
import weka.core.Option;
import weka.core.UnassignedDatasetException;
import weka.core.Utils;
import java.util.Enumeration;
import java.util.Vector;
/** 
 <!-- globalinfo-start -->
 * Implements a ENN over-sampling algorithm. The minority classes are over-sampled by creating synthetic examples
 * <p/>
 <!-- globalinfo-end -->
 * 
 <!-- options-start -->
 * Valid options are: <p/>
 * 
 * <pre> -N &lt;num&gt;
 *  Specify the ENN over-sampling factor (default 1)</pre>
 * 
 * <pre> -K &lt;num&gt;
 *  Specify the factor of nearest neighbors for each
 *  minority class sample (default 2)</pre>
 * 
 *  <pre> -I
 *  Inverts percent values for minority classes</pre>
 * 
 * <pre> -P &lt;num&gt;
 *  Specify a percent value to decide when a class  
 *  is a minority class (default 50)</pre>
 *  
 <!-- options-end -->
 *
 * @author 
 * @version $Revision: 1.0 $ 
 */

public class ENN 
  extends Filter 
  implements UnsupervisedFilter, OptionHandler {   

    /** for serialization */
    private static final long serialVersionUID = 11L;

    /** The random number generator seed */
    private int m_RandomSeed = 1;

    /**Factor for nearest neighbors generation*/
    private int m_kNeighborsFactor = 5;
    /**
     * Returns a string describing this classifier
     * 
     * @return a description of the classifier suitable for
     * displaying in the explorer/experimenter gui
     */
    public String globalInfo() {		
            return "Removes examples for the imbalanced classes using Wilson Editing algorithm";
    }
    
    /**
     * Returns an enumeration describing the available options.
     *
     * @return an enumeration of all the available options.
     */    
    public Enumeration listOptions() {          
        Vector newVector = new Vector();
        newVector.addElement(new Option(
            "\tSpecify the factor of nearest neighbors for\n"
            +"\teach class sample (default 5)",
            "K", 3, "-K <num>"));
        return newVector.elements();		
    }
    /**
     * Parses a given list of options. <p/>
     * 
     <!-- options-start -->
     * Valid options are: <p/>
     * 
     * <pre> -K &lt;num&gt;
     *  Specify the factor of nearest neighbors for each
     *  minority class sample (default 2)</pre>
     *    
     <!-- options-end -->
     *
     * @param options the list of options as an array of strings
     * @throws Exception if an option is not supported
     */
    public void setOptions(String[] options) throws Exception {
        String sOption = Utils.getOption('K', options);
        if (sOption.length() != 0) {
            setKNeighborsFactor(Integer.parseInt(sOption));
        } else {
            setKNeighborsFactor(5);
        }

        if (getInputFormat() != null) {
            setInputFormat(getInputFormat());
        }
    }
    /**
     * Gets the current settings of the filter.
     *
     * @return an array of strings suitable for passing to setOptions
    */
    public String[] getOptions() {
        Vector<String>	vOptions;
        vOptions = new Vector<String>();
        vOptions.add("-K"); 
        vOptions.add("" + getKNeighborsFactor());
        
        return vOptions.toArray(new String[vOptions.size()]);		
    }
   /**
    * Returns the tip text for this property
    * 
    * @return tip text for this property suitable for
    * displaying in the explorer/experimenter gui
    */
    public String randomSeedTipText() {
        return "The seed used for random sampling.";
    }
    /**
     * Gets the random number seed.
     *
     * @return the random number seed.
     */
    public int getRandomSeed() {
        return m_RandomSeed;
    }
    /**
     * Sets the random number seed.
     *
     * @param newSeed the new random number seed.
     */
    public void setRandomSeed(int newSeed) {
        m_RandomSeed = newSeed;
    }
 
   /**
    * Returns the tip text for this property
    * 
    * @return tip text for this property suitable for
    * displaying in the explorer/experimenter gui
    */
    public String KNeighborsFactorTipText() {
        return "The factor of nearest neighbors for each minority class sample.";
    }    
    /**
     * Gets the factor of nearest neighbors for each minority class sample
     *
     * @return the factor of nearest neighbors for each minority class sample
     */    
    public int getKNeighborsFactor(){
        return m_kNeighborsFactor;
    }
    /**
     * Sets the factor of nearest neighbors for each minority class sample
     *
     * @param nKNF factor of nearest neighbors for each minority class sample
     */  
    public void setKNeighborsFactor(int nNeighbors){
        if(nNeighbors > 0)
            m_kNeighborsFactor = nNeighbors;
        else
            throw new IllegalArgumentException("k factor must be at least 1");       
    }
 
     /** 
     * Returns the Capabilities of this filter.
     *
     * @return            the capabilities of this object
     * @see               Capabilities
     */        
    public Capabilities getCapabilities() {
        Capabilities result = super.getCapabilities();
        // attributes
        result.enableAllAttributes();
        result.enable(Capability.MISSING_VALUES);
        // class
        result.enableAllClasses();
        result.enable(Capability.MISSING_CLASS_VALUES);
        result.enable(Capability.NO_CLASS);            
        return result;
    }

    /**
     * Sets the format of the input instances.
     *
     * @param instanceInfo an Instances object containing the input 
     * instance structure (any instances contained in the object are 
     * ignored - only the structure is required).
     * @return true if the outputFormat may be collected immediately
     * @throws IllegalArgumentException if the input format can't be set 
     * successfully
     */
    public boolean setInputFormat(Instances instanceInfo) 
    throws Exception {	
        if (!instanceInfo.classAttribute().isNominal() || instanceInfo.classIndex() < 0) {
                throw new IllegalArgumentException("This filter requires nominal class");
        }
        m_FirstBatchDone = false;
        super.setInputFormat(instanceInfo);
        setOutputFormat(instanceInfo);        
        return true;
    }
    /**
     * Input an instance for filtering. Filter requires all
     * training instances be read before producing output.
     *
     * @param instance the input instance
     * @return true if the filtered instance may now be
     * collected with output().
     * @throws IllegalStateException if no input structure has been defined
     */	
    public boolean input(Instance instance) {
        if (getInputFormat() == null) {
            throw new IllegalStateException("No input instance format defined");
        }
        if (m_NewBatch) {
            resetQueue();
            m_NewBatch = false;
        }
        if (m_FirstBatchDone) {
            push(instance);
            return true;
        } else {
            bufferInput(instance);
            return false;
        }
    }
    /**
     * Signify that this batch of input to the filter is finished. 
     * If the filter requires all instances prior to filtering,
     * output() may now be called to retrieve the filtered instances.
     *
     * @return true if there are instances pending output
     * @throws IllegalStateException if no input structure has been defined
     */
    public boolean batchFinished() {
        if (getInputFormat() == null) {
            throw new IllegalStateException("No input instance format defined");
        }

        if (!isFirstBatchDone()) {
            // Do the subsample, and clear the input instances.			
            try {
                applyWilsonEditing();
            } 
            catch(IllegalArgumentException iArgException) { 
                throw iArgException;   //progressing this kind of exceptions will show a error dialog box
            }
            catch (Exception ex) {
                // TODO Auto-generated catch block
                ex.printStackTrace();
                for(int nInstance = 0; nInstance < getInputFormat().numInstances(); nInstance++)					
                    push((Instance)getInputFormat().instance(nInstance));
            }
        }
        flushInput();
        m_NewBatch = true;
        m_FirstBatchDone = true;
        return (numPendingOutput() != 0);
    }  
	
    /**
     * Applies Wilson Editing algorithm to the current set of input instances.
     * The output instances are pushed onto the output queue.
     */    
    private void applyWilsonEditing() throws Exception {
        Instances sourceInstances = getInputFormat();
        int numSourceInstances  = sourceInstances.numInstances();
        int nClassIndex = sourceInstances.classIndex(); // Get the class attribute 
        AttributeStats stats = sourceInstances.attributeStats(nClassIndex);
        int nClasses = stats.distinctCount; // Gets the number of distinct classes
        int[] nNerarestNeighborsClasses = new int[nClasses];  // array to return the number of synthetic neighbors
        int nAgreeNeighbors;
        Double nClassValue = 0.0; 
        int classValue;
        Instance currI = null; // current Instance
        int nIdx = 0;
        boolean bRemove;
        int[] markedInstances = new int[numSourceInstances]; 
        int nNumMarkedInstances = 0;
        Instance[] kNNInstances = null; //kNearest Neighbors indexes
        
        LinearNNSearchENN linNSearch = new LinearNNSearchENN(sourceInstances);
        // 1. For each source instance
        for (int currInstanceIndex = 0; currInstanceIndex < numSourceInstances; currInstanceIndex++){ 
            // 1.1. Initialize counters of classes
            for(nIdx=0; nIdx < nClasses; nIdx++){
                nNerarestNeighborsClasses[nIdx] = 0;
            }
            currI = sourceInstances.instance(currInstanceIndex);                
            //  1.2. Calculate their neighbors
            linNSearch.kNearestNeighbours(currI, m_kNeighborsFactor);
            //  1.3. Get the neigbors founds
            kNNInstances = linNSearch.getNearestInstances();
            //  1.4. Count the classes of nearest neigbors
            for (nIdx = 0; nIdx < m_kNeighborsFactor; nIdx++){
                 classValue = (int)kNNInstances[nIdx].classValue();
                 nNerarestNeighborsClasses[classValue]++;
            }
            //  1.5. Check if current instance must be removed
            classValue = (int)currI.classValue();
            nAgreeNeighbors = nNerarestNeighborsClasses[classValue];
            bRemove = false;
            
            for(nIdx=0;(nIdx<nNerarestNeighborsClasses.length) && !bRemove; nIdx++){
                if(nIdx!=classValue){
                    bRemove = nNerarestNeighborsClasses[nIdx] > nAgreeNeighbors;
                }
            }                     
           // 1.6 If current instance must be removed then 
           //     add it to the markedInstances array
            if(bRemove) {
                markedInstances[nNumMarkedInstances] = currInstanceIndex;
                nNumMarkedInstances++;           
            }
        }
        //2. Put the instances not marked merging sourceInstances and markedInstances arrays
        Arrays.sort(markedInstances, 0, markedInstances.length);
        nIdx = 0;
        for(int nMarkedInstanceIdx=0; nMarkedInstanceIdx < nNumMarkedInstances; nMarkedInstanceIdx++) {
            for (; nIdx < markedInstances[nMarkedInstanceIdx]; nIdx++){                 
                push((Instance)sourceInstances.instance(nIdx));                
            }
            nIdx++;
        }
        for (; nIdx < numSourceInstances; nIdx++){
            push((Instance)sourceInstances.instance(nIdx));                
        }
    }
   

    /**
     * runs the filter instance with the given options.
     * 
     * @param filter	the filter to run
     * @param options	the command line options
     */
    public static void runFilter(Filter filter, String[] options) {
    	try {
    		if (Utils.getFlag('b', options)) {
    			Filter.batchFilterFile(filter, options);
    		} 
    		else {
    			Filter.filterFile(filter, options);
    		}
    	} catch (Exception ex) {
    		if((ex.toString().indexOf("Help requested") == -1) 
    				&& (ex.toString().indexOf("Filter options") == -1) ){
    			ex.printStackTrace();
    		}
    		else{
    			System.err.println(ex.getMessage());
    		}
    	}
    }        
    /**
     * Main method for testing this class.
     *
     * @param args should contain arguments to the filter: use -h for help
     */
    public static void main(String [] args) {
        try {
            if (args.length == 0) {
                throw new Exception("First argument must be the class name of a Filter");
            }
            String fname = args[0];
            Filter f = (Filter)Class.forName(fname).newInstance();
            args[0] = "";
            runFilter(f, args);
        } catch (Exception ex) {
            ex.printStackTrace();
            System.err.println(ex.getMessage());
        }
    }
    
    public class LinearNNSearchENN
    extends NearestNeighbourSearch {

    /** for serialization. */
    private static final long serialVersionUID = 1915484723703917241L;

    /** Array holding the distances of the nearest neighbours. It is filled up
     *  both by nearestNeighbour() and kNearestNeighbours(). 
     */
    protected double[] m_Distances;
    /** Array holding the indexes of the nearest neighbours. It is filled up
      * both by nearestNeighbour() and kNearestNeighbours(). 
     */
    protected Instance[] m_nnInstances;
      
    /** Whether to skip instances from the neighbours that are identical to the query instance. */
    protected boolean m_SkipIdentical = false;
    /** Whether to skip instancies from the neighbours that are of different class */
    protected boolean m_SkipDifferentClass = false;

    /**
     * Constructor. Needs setInstances(Instances) 
     * to be called before the class is usable.
     */
    public LinearNNSearchENN() {
      super();
    }
    
    /**
     * Constructor that uses the supplied set of 
     * instances.
     * 
     * @param insts	the instances to use
     */
    public LinearNNSearchENN(Instances insts) {
      super(insts);
      m_DistanceFunction.setInstances(insts);
    }
    
    /**
     * Returns a string describing this nearest neighbour search algorithm.
     * 
     * @return 		a description of the algorithm for displaying in the 
     * 			explorer/experimenter gui
     */
    public String globalInfo() {
      return 
          "Class implementing the brute force search algorithm for nearest "
        + "neighbour search.";  
    }
    
    /**
     * Returns an enumeration describing the available options.
     *
     * @return 		an enumeration of all the available options.
     */
    public Enumeration listOptions() {
      Vector result = new Vector();
      
      result.add(new Option(
  	"\tSkip identical instances (distances equal to zero).\n",
  	"S", 1,"-S"));
      
      return result.elements();
    }
    
    /**
     * Parses a given list of options. <p/>
     *
     <!-- options-start -->
     * Valid options are: <p/>
     * 
     * <pre> -S
     *  Skip identical instances (distances equal to zero).
     * </pre>
     * 
     <!-- options-end -->
     *
     * @param options 	the list of options as an array of strings
     * @throws Exception 	if an option is not supported
     */
    public void setOptions(String[] options) throws Exception {
      super.setOptions(options);

      setSkipIdentical(Utils.getFlag('S', options));
    }

    /**
     * Gets the current settings.
     *
     * @return 		an array of strings suitable for passing to setOptions()
     */
    public String[] getOptions() {
      Vector<String>	result;
      String[]		options;
      int			i;
      
      result = new Vector<String>();
      
      options = super.getOptions();
      for (i = 0; i < options.length; i++)
        result.add(options[i]);
      
      if (getSkipIdentical())
        result.add("-S");

      return result.toArray(new String[result.size()]);
    }

    /**
     * Returns the tip text for this property.
     * 
     * @return 		tip text for this property suitable for
     * 			displaying in the explorer/experimenter gui
     */
    public String skipIdenticalTipText() {
      return "Whether to skip identical instances (with distance 0 to the target)";
    }
    
    /**
     * Sets the property to skip identical instances (with distance zero from 
     * the target) from the set of neighbours returned.
     * 
     * @param skip 	if true, identical intances are skipped
     */
    public void setSkipIdentical(boolean skip) {
      m_SkipIdentical = skip;
    }
    
    /**
     * Gets whether if identical instances are skipped from the neighbourhood.
     * 
     * @return 		true if identical instances are skipped
     */
    public boolean getSkipIdentical() {
      return m_SkipIdentical;
    }

    /**
     * Returns the tip text for this property.
     * 
     * @return 		tip text for this property suitable for
     * 			displaying in the explorer/experimenter gui
     */
    public String skipDifferentClassTipText() {
      return "Whether different instances are skipped ";
    }
    
    /**
     * Sets the property whether to skip instancies from the neighbours 
     * that are of different class
     * 
     * @param skip 	if true, identical intances are skipped
     */
    public void setSkipDifferentClass(boolean skip) {
      m_SkipDifferentClass = skip;
    }
    
    /**
     * Gets whether to skip instancies from the neighbours that are of different class 
     * 
     * @return 		true if instances with different classes are skipped
     */
    public boolean getSkipDifferentClass() {
      return m_SkipDifferentClass;
    }  
    /** 
     * Returns the nearest instance in the current neighbourhood to the supplied
     * instance.
     *  
     * @param target 	The instance to find the nearest neighbour for.
     * @return		the nearest instance
     * @throws Exception 	if the nearest neighbour could not be found.
     */
    public Instance nearestNeighbour(Instance target) throws Exception {
      return (kNearestNeighbours(target, 1)).instance(0);
    }
    
    /**
     * Returns k nearest instances in the current neighbourhood to the supplied
     * instance.
     *  
     * @param target 	The instance to find the k nearest neighbours for.
     * @param kNN		The number of nearest neighbours to find.
     * @return		the k nearest neighbors
     * @throws Exception  if the neighbours could not be found.
     */
    public Instances kNearestNeighbours(Instance target, int kNN) throws Exception {
    
      //debug
      boolean print=false;
      boolean bSkipDifferentClass = false;
      double tarjetClassValue = 0.0;
      if(m_Stats!=null)
        m_Stats.searchStart();
      if(m_SkipDifferentClass) {
          try {
              tarjetClassValue = target.classValue();
              bSkipDifferentClass = true;
          }
          catch(UnassignedDatasetException ex)
          {
              bSkipDifferentClass = false;
          }
      }
      MyHeap heap = new MyHeap(kNN);
      double distance; int firstkNN=0;
      for(int i=0; i<m_Instances.numInstances(); i++) {
        if(target == m_Instances.instance(i)) //for hold-one-out cross-validation
          continue;
        if(bSkipDifferentClass){
            try {
                if(tarjetClassValue != m_Instances.instance(i).classValue())
                    continue;
            }
            catch(UnassignedDatasetException ex){
                continue;
            }
        }
        if(m_Stats!=null) 
          m_Stats.incrPointCount();
        if(firstkNN<kNN) {
          if(print)
            System.out.println("K(a): "+(heap.size()+heap.noOfKthNearest()));
          distance = m_DistanceFunction.distance(target, m_Instances.instance(i), Double.POSITIVE_INFINITY, m_Stats);
          if(distance == 0.0 && m_SkipIdentical)
            if(i<m_Instances.numInstances()-1)
              continue;
            else
              heap.put(i, distance);
          heap.put(i, distance);
          firstkNN++;
        }
        else {
          MyHeapElement temp = heap.peek();
          if(print)
            System.out.println("K(b): "+(heap.size()+heap.noOfKthNearest()));
          distance = m_DistanceFunction.distance(target, m_Instances.instance(i), temp.distance, m_Stats);
          if(distance == 0.0 && m_SkipIdentical)
            continue;
          if(distance < temp.distance) {
            heap.putBySubstitute(i, distance);
          }
          else if(distance == temp.distance) {
            heap.putKthNearest(i, distance);
          }

        }
      }
      
      
      m_Distances = new double[heap.size()+heap.noOfKthNearest()];
      m_nnInstances = new Instance[heap.size()+heap.noOfKthNearest()];
      int i=1; MyHeapElement h;
      while(heap.noOfKthNearest()>0) {
        h = heap.getKthNearest();
        m_nnInstances[m_nnInstances.length-i] =  m_Instances.instance(h.index);
        m_Distances[m_Distances.length-i] = h.distance;
        i++;
      }
      while(heap.size()>0) {
        h = heap.get();
        m_nnInstances[m_nnInstances.length-i] = m_Instances.instance(h.index);
        m_Distances[m_Distances.length-i] = h.distance;
        i++;
      }
      
      m_DistanceFunction.postProcessDistances(m_Distances);
      
      Instances neighbours = new Instances(m_Instances, heap.size()+heap.noOfKthNearest());
      for(int k=0; k<heap.size()+heap.noOfKthNearest(); k++) {
        neighbours.add(m_nnInstances[k]);
      }
      
      if(m_Stats!=null)
        m_Stats.searchFinish();
      
      return neighbours;    
    }
    
    /** 
     * Returns the distances of the k nearest neighbours. The kNearestNeighbours
     * or nearestNeighbour must always be called before calling this function. If
     * this function is called before calling either the kNearestNeighbours or 
     * the nearestNeighbour, then it throws an exception. If, however, if either
     * of the nearestNeighbour functions are called at any point in the 
     * past then no exception is thrown and the distances of the training set from
     * the last supplied target instance (to either one of the nearestNeighbour 
     * functions) is/are returned.
     *
     * @return 		array containing the distances of the 
     * 			nearestNeighbours. The length and ordering of the 
     * 			array is the same as that of the instances returned 
     * 			by nearestNeighbour functions.
     * @throws Exception 	if called before calling kNearestNeighbours
     *            	or nearestNeighbours.
     */
    public double[] getDistances() throws Exception {
      if(m_Distances==null)
        throw new Exception("No distances available. Please call either "+
                            "kNearestNeighbours or nearestNeighbours first.");
      return m_Distances;    
    }

    /** 
     * Returns the indexes of the k nearest neighbours. The kNearestNeighbours
     * or nearestNeighbour must always be called before calling this function. If
     * this function is called before calling either the kNearestNeighbours or 
     * the nearestNeighbour, then it throws an exception. If, however, if either
     * of the nearestNeighbour functions are called at any point in the 
     * past then no exception is thrown and the distances of the training set from
     * the last supplied target instance (to either one of the nearestNeighbour 
     * functions) is/are returned.
     *
     * @return 		array containing the indexes of the 
     * 			nearestNeighbours. The length and ordering of the 
     * 			array is the same as that of the instances returned 
     * 			by nearestNeighbour functions.
     * @throws Exception 	if called before calling kNearestNeighbours
     *            	or nearestNeighbours.
     */
    public Instance[] getNearestInstances() throws Exception {
      if(m_nnInstances==null)
        throw new Exception("No indexes available. Please call either "+
                            "kNearestNeighbours or nearestNeighbours first.");
      return m_nnInstances;    
    }
    
    /** 
     * Sets the instances comprising the current neighbourhood.
     * 
     * @param insts 	The set of instances on which the nearest neighbour 
     * 			search is carried out. Usually this set is the 
     * 			training set. 
     * @throws Exception	if setting of instances fails
     */
    public void setInstances(Instances insts) throws Exception {
      m_Instances = insts;
      m_DistanceFunction.setInstances(insts);
    }
    
    /** 
     * Updates the LinearNNSearchENN to cater for the new added instance. This 
     * implementation only updates the ranges of the DistanceFunction class, 
     * since our set of instances is passed by reference and should already have 
     * the newly added instance.
     * 
     * @param ins 	The instance to add. Usually this is the instance that 
     * 			is added to our neighbourhood i.e. the training 
     * 			instances.
     * @throws Exception	if the given instances are null
     */
    public void update(Instance ins) throws Exception {
      if(m_Instances==null)
        throw new Exception("No instances supplied yet. Cannot update without"+
                            "supplying a set of instances first.");
      m_DistanceFunction.update(ins);
    }
    
    /** 
     * Adds the given instance info. This implementation updates the range
     * datastructures of the DistanceFunction class.
     * 
     * @param ins 	The instance to add the information of. Usually this is
     * 			the test instance supplied to update the range of 
     * 			attributes in the  distance function.
     */
    public void addInstanceInfo(Instance ins) {
      if(m_Instances!=null)
        try{ update(ins); }
        catch(Exception ex) { ex.printStackTrace(); }
    }

    @Override
    public String getRevision() {
  	  // TODO Auto-generated method stub
  	  return null;
    }
  }
    
}