/* $Id: hdrl_sigclip.c,v 1.2 2013-10-16 11:31:07 cgarcia Exp $
 *
 * This file is part of the HDRL
 * Copyright (C) 2012,2013 European Southern Observatory
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */

/*
 * $Author: cgarcia $
 * $Date: 2013-10-16 11:31:07 $
 * $Revision: 1.2 $
 * $Name: not supported by cvs2svn $
 */

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

/*-----------------------------------------------------------------------------
                                   Includes
-----------------------------------------------------------------------------*/

#include "hdrl_sigclip.h"
#include "hdrl_utils.h"
#include "hdrl_collapse.h"

#include <cpl.h>
#include <string.h>
#include <math.h>


/*-----------------------------------------------------------------------------
                                   Static
 -----------------------------------------------------------------------------*/

static double hdrl_iqr(const cpl_vector *) ;
static cpl_error_code hdrl_sort_double_pairs(double *, double *, cpl_size) ;

/*----------------------------------------------------------------------------*/
/**
  @defgroup hdrl_sigclip   SIGMA-CLIPPING Module
 
  This module provides an iterative sigma-clipping functionality. 
 */
/*----------------------------------------------------------------------------*/

/**@{*/

/* ---------------------------------------------------------------------------*/
/**
  @brief Create parameter list for sigma-clip parameters
  @param full_prefix    prefix for parameter name
  @param alias_prefix   prefix for cli alias 
  @param context        context of parameter
  @param defaults       default parameters values
  @return cpl_parameterlist
 
  Creates a parameterlist out of all parameters of the sigma-clip algorithm.
 */
/* ---------------------------------------------------------------------------*/
cpl_parameterlist * hdrl_sigclip_parameter_create_parlist(
        const char              *   full_prefix,
        const char              *   alias_prefix,
        const char              *   context,
        const hdrl_parameter    *   defaults)
{
    cpl_ensure(full_prefix && context && alias_prefix && defaults,
            CPL_ERROR_NULL_INPUT, NULL);
    const char          *   full_sep = strlen(full_prefix) > 0 ? "." : "";
    const char          *   alias_sep = strlen(alias_prefix) > 0 ? "." : "";
    cpl_parameterlist   *   parlist = cpl_parameterlist_new();

    /* --prefix.kappa-low */
    hdrl_setup_vparameter(parlist, full_prefix, full_sep, "",
            alias_prefix, alias_sep, "kappa-low", context,
            "Low kappa factor for kappa-sigma clipping algorithm",
            CPL_TYPE_DOUBLE,
            hdrl_collapse_sigclip_parameter_get_kappa_low(defaults));
    
     /* --prefix.kappa-high */
    hdrl_setup_vparameter(parlist, full_prefix, full_sep, "",
            alias_prefix, alias_sep, "kappa-high", context,
            "High kappa factor for kappa-sigma clipping algorithm",
            CPL_TYPE_DOUBLE,
            hdrl_collapse_sigclip_parameter_get_kappa_high(defaults));
 
    /* --prefix.niter */
    hdrl_setup_vparameter(parlist, full_prefix, full_sep, "",
            alias_prefix, alias_sep, "niter", context,
            "Maximum number of clipping iterations for kappa-sigma clipping",
            CPL_TYPE_INT,
            hdrl_collapse_sigclip_parameter_get_niter(defaults));
    
    if (cpl_error_get_code()) {
        cpl_parameterlist_delete(parlist);
        return NULL;
    }

    return parlist;
}

/* ---------------------------------------------------------------------------*/
/**
 * @brief parse parameterlist for sigclip parameters to init corresponding hdrl
 * structure parameters
 *
 * @param parlist    parameter list to parse
 * @param prefix     prefix of parameter name
 * @param kappa_low  pointer to storage to save kappa_low or NULL
 * @param kappa_high pointer to storage to save kappa_high or NULL
 * @param niter      pointer to storage to save niter or NULL
 * @see   hdrl_kappa_sigma_clip_get_parlist()
 * @return cpl_error_code
 *
 * parameterlist should have been created with
 * hdrl_kappa_sigma_clip_get_parlist or have the same name hierachy
 */
/* ---------------------------------------------------------------------------*/
cpl_error_code hdrl_sigclip_parameter_parse_parlist(
        const cpl_parameterlist *   parlist,
        const char              *   prefix,
        double                  *   kappa_low,
        double                  *   kappa_high,
        int                     *   niter)
{
    cpl_ensure_code(prefix && parlist, CPL_ERROR_NULL_INPUT);
    const char * sep = strlen(prefix) > 0 ? "." : "";
    char * name;

    if (kappa_low) {
        name = cpl_sprintf("%s%ssigclip.kappa-low", prefix, sep);
        const cpl_parameter * par = cpl_parameterlist_find_const(parlist, name);
        *kappa_low = cpl_parameter_get_double(par);
        cpl_free(name);
    }

    if (kappa_high) {
        name = cpl_sprintf("%s%ssigclip.kappa-high", prefix, sep);
        const cpl_parameter * par = cpl_parameterlist_find_const(parlist, name);
        *kappa_high = cpl_parameter_get_double(par);
        cpl_free(name);
    }

    if (niter) {
        name = cpl_sprintf("%s%ssigclip.niter", prefix, sep);
        const cpl_parameter * par = cpl_parameterlist_find_const(parlist, name);
        *niter = cpl_parameter_get_int(par);
        cpl_free(name);
    }
    return cpl_error_get_code();
}
 
/*----------------------------------------------------------------------------*/
/**
  @brief    Compute mean image value using kappa-sigma clipping method
  @param source         Input image 
  @param error          Input error image
  @param kappa_low      Number of sigmas for lower threshold
  @param kappa_high     Number of sigmas for upper threshold        
  @param iter           Number of iterations
  @param mean_ks        The kappa-sigma clipped mean
  @param mean_ks_err    The propagated error of the kappa-sigma clipped mean
  @param naccepted      Number of accepted values
  @param reject_low     Values lower than this have been rejected
  @param reject_high    Values higher than this have been rejected
  @return   @c CPL_ERROR_NONE or the appropriate error code.
  @see hdrl_kappa_sigma_clip()
  
  This function converts the image inputs into the proper data types in
  order to call the hdrl_kappa_sigma_clip() function.
 */
/*----------------------------------------------------------------------------*/
cpl_error_code hdrl_kappa_sigma_clip_image(
        const cpl_image   * source,
        const cpl_image   * error,
        const double        kappa_low,
        const double        kappa_high,
        const int           iter,
        double            * mean_ks,
        double            * mean_ks_err,
        cpl_size          * naccepted,
        double            * reject_low,
        double            * reject_high)
{
    cpl_vector * vec_source = NULL;
    cpl_vector * vec_error = NULL;

    /* Check Entries */
    cpl_error_ensure(source != NULL, CPL_ERROR_NULL_INPUT,
            return CPL_ERROR_NULL_INPUT, "Null input source image!");
    cpl_error_ensure(error != NULL, CPL_ERROR_NULL_INPUT,
            return CPL_ERROR_NULL_INPUT, "Null input error image!");
    cpl_error_ensure(cpl_image_get_size_x(source)==cpl_image_get_size_x(error),
            CPL_ERROR_INCOMPATIBLE_INPUT, return CPL_ERROR_INCOMPATIBLE_INPUT,
            "source and error image musty have same X size");
    cpl_error_ensure(cpl_image_get_size_y(source)==cpl_image_get_size_y(error),
            CPL_ERROR_INCOMPATIBLE_INPUT, return CPL_ERROR_INCOMPATIBLE_INPUT,
            "source and error image musty have same Y size");

    /* compress images to vectors excluding the bad pixels */
    vec_source = hdrl_image_to_vector(source, NULL);
    vec_error = hdrl_image_to_vector(error, cpl_image_get_bpm_const(source));

    if (vec_source != NULL && vec_error != NULL) {
        /* Call here the real sigma-clipping function */
        hdrl_kappa_sigma_clip(vec_source, vec_error, kappa_low, kappa_high,
                             iter, mean_ks, mean_ks_err, naccepted, reject_low,
                             reject_high);
    }
    /* no good pixels */
    else {
        *mean_ks = NAN;
        *mean_ks_err = NAN;
        *naccepted = 0;
        *reject_low = NAN;
        *reject_high = NAN;
    }

    cpl_msg_debug(cpl_func, "mean_ks, mean_ks_err, naccepted:  %g, %g, %ld",
                  *mean_ks, *mean_ks_err, (long)*naccepted);

    cpl_vector_delete(vec_source);
    cpl_vector_delete(vec_error);
    return cpl_error_get_code();
}

/* ---------------------------------------------------------------------------*/
/**
 * @brief get first index that compares greater than value
 * @param vec vector to check
 * @param val upper bound to check
 */
/* ---------------------------------------------------------------------------*/
static long get_upper_bound(cpl_vector * vec, double val)
{
    double * d = cpl_vector_get_data(vec);
    long count = cpl_vector_get_size(vec);
    long first = 0;
    while (count > 0)
    {
        long step = count / 2;
        long it = first + step;
        if (!(val < d[it])) {
            first = it + 1;
            count -= step + 1;
        }
        else
            count = step;
    }
    return first;
}

/* ---------------------------------------------------------------------------*/
/**
 * @brief get index that compares does not compare less than value
 * @param vec vector to check
 * @param val upper bound to check
 */
/* ---------------------------------------------------------------------------*/
static long get_lower_bound(cpl_vector * vec, double val)
{
    double * d = cpl_vector_get_data(vec);
    long count = cpl_vector_get_size(vec);
    long first = 0;
    while (count > 0)
    {
        long step = count / 2;
        long it = first + step;
        if (d[it] < val) {
            first = it + 1;
            count -= step + 1;
        }
        else
            count = step;
    }
    return first;
}

/*----------------------------------------------------------------------------*/
/**
  @brief   Compute mean using kappa-sigma clipping.
  @param   vec         The vector for which mean is to be computed.
  @param   vec_err     The error of vec. 
  @param   kappa_low   Number of sigmas for lower threshold.
  @param   kappa_high  Number of sigmas for upper threshold.
  @param   iter        Number of iterations.
  @param   mean_ks     The kappa-sigma clipped mean.
  @param   mean_ks_err The propagated error of the kappa-sigma clipped mean.
  @param   naccepted   Number of accepted values.
  @param   reject_low  Values lower than this have been rejected.
  @param   reject_high Values higher than this have been rejected
  @return   @c CPL_ERROR_NONE or the appropriate error code.

  The function computes the arithmetic mean of a vector after rejecting 
  outliers using kappa-sigma clipping. Robust estimates of the mean and 
  standard deviation are used to derive the interval within which values in 
  the vector are considered good.

  The sigma-clipping is applied on the vec vector data. The vec_err vector 
  is used for the error computation.

  An iterative process of rejection of the outlier elements of vec is
  applied. iter specifies the maximum number of iterations.

  At each iteration, the median and sigma values of the vector are computed and
  used to derive low and high thresholds (\f$median-kappa\_low \times sigma\f$
  and \f$median+kappa\_low \times sigma\f$). The values of vec outside those
  bounds are rejected and the remaining values are passed to the next
  iteration.

  The mean value of the remaining elements is stored into mean_ks.
  mean_ks_err contains \f$\frac{\sum_i{val_i^{2}}}{N}\f$ where \f$val_i\f$
  are the remaining elements of vec_err and N the number of those elements. 
  The N value is stored in naccepted.

  reject_low and reject_high are the final thresholds differenciating the 
  rejected pixels from the others.

  The iterative process is illustrated here:
  \image html sigclip_algorithm.png

  Note that the \f$\sigma\f$ used for the thresholding in the different
  iterations is not the standard deviation but the scaled interquartile range
  (IQR) of the distribution. The scaling is \f$\sigma = \frac{IQR}{1.349}\f$.

  As illustrated below, the IQR is the distance between the upper and
  the lower quartiles of a distribution.
  \image html iqr.svg

  The IQR is a more robust estimate of the scale of the distribution than the
  standard deviation but only has 61% of the asymptotic statistical efficiency
  for normal distributed data. This higher error in scale parameter only has
  limited influence on the result as it is only used determination of clipping
  thresholds.

 */
/*----------------------------------------------------------------------------*/
cpl_error_code hdrl_kappa_sigma_clip(
        const cpl_vector  * vec,
        const cpl_vector  * vec_err,
        const double        kappa_low,
        const double        kappa_high,
        const int           iter,
        double            * mean_ks,
        double            * mean_ks_err,
        cpl_size          * naccepted,
        double            * reject_low,
        double            * reject_high)
{
    /*    VARIABLES ON THE FUNCTION SCOPE:

          vec_image       a deep copy of the input vector vec.
          mean_ks         kappa-sigma clip mean (return variable).
          mean_ks         kappa-sigma clip mean (return variable).
    */

    cpl_vector   * vec_image = NULL;
    cpl_vector   * vec_image_err = NULL;

    cpl_size       vec_size;
    double lower_bound;
    double upper_bound;

    cpl_error_ensure(vec != NULL, CPL_ERROR_NULL_INPUT,
                     return CPL_ERROR_NULL_INPUT, "Null input vector data");
    cpl_error_ensure(vec_err != NULL, CPL_ERROR_NULL_INPUT,
                     return CPL_ERROR_NULL_INPUT, "Null input vector errors");
    cpl_error_ensure(cpl_vector_get_size(vec) == cpl_vector_get_size(vec_err),
                     CPL_ERROR_INCOMPATIBLE_INPUT,
                     return CPL_ERROR_INCOMPATIBLE_INPUT,
                     "input data and error vectors must have same sizes");
    cpl_error_ensure(mean_ks != NULL, CPL_ERROR_NULL_INPUT,
                     return CPL_ERROR_NULL_INPUT, "Null input mean storage");
    cpl_error_ensure(iter > 0, CPL_ERROR_ILLEGAL_INPUT,
                     return CPL_ERROR_ILLEGAL_INPUT,
                     "iter must be larger than 0");

    vec_image = cpl_vector_duplicate(vec);
    vec_image_err = cpl_vector_duplicate(vec_err);
    vec_size = cpl_vector_get_size(vec_image);

    /*    BEGIN ITERATION OF KAPPA-SIGMA CLIP */
    /*    VARIABLES ON ITERATION SCOPE
          median          median of the vector.
          sigma           standard deviation of the vector.
          lower_bound     lower bound after kappa-sigma clip.
          upper_bound     upper bound after kappa-sigma clip.
          lower_index     approx. index of vec corresponding to lower_bound.
          upper_index     approx. index of vec corresponding to upper_bound.
          vec_temp        temporary vector used to swap.
          vec_temp_err    temporary error vector used to swap.
    */
    hdrl_sort_double_pairs(cpl_vector_get_data(vec_image),
                          cpl_vector_get_data(vec_image_err), vec_size);

    for(int it = 0; it < iter; it++) {
        double median, sigma;
        cpl_size lower_index, upper_index;
        cpl_vector * vec_temp, * vec_temp_err;

        /* Nothing to do if only one data point */
        vec_size = cpl_vector_get_size(vec_image);
        if(vec_size == 1) {
            lower_bound = cpl_vector_get(vec_image, 0);
            upper_bound = lower_bound;
            break;
        }

        /*  STEPS OF KAPPA SIGMA CLIP
            1. Sort the vector.
            2. Find mean, and standard deviation (sigma).
            3. Find lower, and upper bound after kappa-sigma clip.
            4. Find index which corresponds to lower and upper bound
            5. Extract the vector within the index bound.
        */

        /* Use median as a robust estimator of the mean */
        median = cpl_vector_get_median_const(vec_image);

        /* standard deviation from inter-quartile range as appropriate
           for a Gaussian distribution */
        sigma = hdrl_iqr(vec_image) / 1.349;

        lower_bound = median - kappa_low * sigma;
        upper_bound = median + kappa_high * sigma;

        lower_index = get_lower_bound(vec_image, lower_bound);
        upper_index = get_upper_bound(vec_image, upper_bound);
        upper_index = CX_MAX(upper_index - 1, 0);

        /* Stop if no outliers were found */
        if ((lower_index == 0) && (upper_index == vec_size - 1))
            break;

        vec_temp = cpl_vector_extract(vec_image, lower_index, upper_index, 1);
        vec_temp_err = cpl_vector_extract(vec_image_err, 
                                          lower_index, upper_index, 1);

        cpl_vector_set_size(vec_image, cpl_vector_get_size(vec_temp));
        cpl_vector_set_size(vec_image_err, cpl_vector_get_size(vec_temp_err));

        cpl_vector_copy(vec_image, vec_temp);
        cpl_vector_copy(vec_image_err, vec_temp_err);
        cpl_vector_delete(vec_temp);
        cpl_vector_delete(vec_temp_err);
    }

    /*    COMPUTE THE KAPPA-SIGMA CLIP MEAN */
    *mean_ks  = cpl_vector_get_mean(vec_image);

    if (naccepted) *naccepted = cpl_vector_get_size(vec_image);

    if (mean_ks_err) {
        /*Propagate the errors (cpl_vector_power is very slow PIPE-4330) */
        cpl_vector_multiply(vec_image_err, vec_image_err);
        /*workaround for missing function cpl_vector_get_sum() */
        *mean_ks_err = sqrt(cpl_vector_get_mean(vec_image_err) /
                            (int)cpl_vector_get_size(vec_image_err));
    }

    if (reject_low) *reject_low = lower_bound;
    if (reject_high) *reject_high = upper_bound;

    /* CLEAN, AND RETURN */
    cpl_vector_delete(vec_image);
    cpl_vector_delete(vec_image_err);
    return cpl_error_get_code();
}

/**@}*/

/*----------------------------------------------------------------------------*/
/**
  @internal
  @brief   Compute inter-quartile range (iqr) of a vector. 
  @param   vec a sorted vector for which the iqr is to be computed.
  @return  The iqr

  The function computes the iqr of a vector. In case of an error,
  @f$-1.0@f$ is returned.
  For efficiency reasons the vector is required to be sorted before being
  passed into this function. If this is the case is not verified.

  As an estimate for the scale the IQR / 1.349 has an asymptotic statistical
  efficiency of 61% for normal distributed (or thin tailed) data. The
  efficiency is higher than the standard deviation for thick tailed data (e.g.
  outliers).
 */
/*----------------------------------------------------------------------------*/
static double hdrl_iqr(const cpl_vector * sort_vec)
{
    double       iqr = -1.0;
    long         N, N25;

    N = cpl_vector_get_size(sort_vec);

    /* 4 cases depending on the number of elements */
    if (N % 4 == 0) {
        N25 = N / 4;
        iqr =   0.75 * cpl_vector_get(sort_vec, 3 * N25 - 1)
              + 0.25 * cpl_vector_get(sort_vec, 3 * N25)
              - 0.25 * cpl_vector_get(sort_vec, N25 - 1)
              - 0.75 * cpl_vector_get(sort_vec, N25);
        return fabs(iqr);
    }
    else if (N % 4 == 1) {
        N25 = (N - 1) / 4;
        iqr =   cpl_vector_get(sort_vec, 3 * N25)
              - cpl_vector_get(sort_vec, N25);
        return fabs(iqr);
    }
    else if (N % 4 == 2) {
        N25 = (N - 2) / 4;
        iqr =   0.25 * cpl_vector_get(sort_vec, 3 * N25)
              + 0.75 * cpl_vector_get(sort_vec, 3 * N25 + 1)
              - 0.75 * cpl_vector_get(sort_vec, N25)
              - 0.25 * cpl_vector_get(sort_vec, N25 + 1);
        return fabs(iqr);
    }
    else {
        N25 = (N - 3) / 4;
        iqr =   0.50 * cpl_vector_get(sort_vec, 3 * N25 + 1)
              + 0.50 * cpl_vector_get(sort_vec, 3 * N25 + 2)
              - 0.50 * cpl_vector_get(sort_vec, N25)
              - 0.50 * cpl_vector_get(sort_vec, N25 + 1);
        return fabs(iqr);
    }
}

/*---------------------------------------------------------------------------*/
/**
  @brief   Sort an array @a u1 of doubles, and permute an array @a u2
           in the same way as @a u1 is permuted.
  @param   u1   Pointer to the first array.
  @param   u2   Pointer to the second array.
  @param   n    The common length of both arrays.
  @return   @c CPL_ERROR_NONE or the appropriate error code.
 */
/*---------------------------------------------------------------------------*/
static cpl_error_code hdrl_sort_double_pairs(double *u1, double *u2, cpl_size n)
{
    cpl_vector * biu1 = NULL;
    cpl_vector * biu2 = NULL;
    cpl_bivector * bi_all = NULL;

    cpl_ensure_code(n >= 1, CPL_ERROR_ILLEGAL_INPUT);
    cpl_error_ensure(u1 != NULL, CPL_ERROR_NULL_INPUT,
                     return CPL_ERROR_NULL_INPUT, "NULL pointer to 1st array");
    cpl_error_ensure(u2 != NULL, CPL_ERROR_NULL_INPUT,
                     return CPL_ERROR_NULL_INPUT, "NULL pointer to 2nd array");

    biu1 = cpl_vector_wrap(n, u1);
    biu2 = cpl_vector_wrap(n, u2);
    bi_all = cpl_bivector_wrap_vectors(biu1, biu2);
    cpl_bivector_sort(bi_all, bi_all, CPL_SORT_ASCENDING, CPL_SORT_BY_X);

    /* cleaning up */
    cpl_bivector_unwrap_vectors(bi_all);
    cpl_vector_unwrap(biu1);
    cpl_vector_unwrap(biu2);

    return CPL_ERROR_NONE;
}
