/* Copyright (C) 2003, 2004 Peter J. Verveer
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met: 
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above
 *    copyright notice, this list of conditions and the following
 *    disclaimer in the documentation and/or other materials provided
 *    with the distribution.
 *
 * 3. The name of the author may not be used to endorse or promote
 *    products derived from this software without specific prior
 *    written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.      
 */

#include "ni_support.h"
#include "ni_measure.h"
#include <stdlib.h>
#include <math.h>
#include <float.h>
#include <assert.h>

typedef struct {
  Int32 index1, index2;
  void* next;
} _index_pair;

int NI_Label(PyArrayObject* input, PyArrayObject* strct, int *max_label,
             PyArrayObject** output, PyObject* output_in)
{
  int kk, jj, ssize, size, filter_size, shifts[NI_MAXDIM], *offsets = NULL;
  int mask_value, *oo, irank, itype, idims[NI_MAXDIM], sdims[NI_MAXDIM];
  PyArrayObject *tmp = NULL;
  Bool *ps;
  char *pi, *po;
  Int32 index = 0, *index_map = NULL;
  NI_FilterIterator fi;
  NI_Iterator ii, io;
  _index_pair *pairs = NULL;

  assert(input != NULL);
  assert(strct != NULL);
  assert(max_label != NULL);

  /* complex type not supported: */
  itype = NI_GetArrayType(input);
  if (itype == tComplex32 || itype == tComplex64) {
    PyErr_SetString(PyExc_RuntimeError, "complex arrays not supported");
    goto exit;
  }
  /* structuring element must be of bool type: */
  if (NI_GetArrayType(strct) != tBool) {
    PyErr_SetString(PyExc_RuntimeError, "structure type must be Bool");
    goto exit;
  }

  irank = NI_GetArrayRank(input);
  /* input and structure must have equal rank: */
  if (NI_GetArrayRank(strct) != irank) {
    PyErr_SetString(PyExc_RuntimeError, 
                    "structure rank must be equal to input rank");
    goto exit;
  }
  ssize = 1;
  NI_GetArrayDimensions(strct, sdims);
  for(kk = 0; kk < irank; kk++) {
    ssize *= sdims[kk];
    if (sdims[kk] != 3) {
    PyErr_SetString(PyExc_RuntimeError, "structure dimensions must "
                    "equal to 3");
      goto exit;
    }
  }

  /* we only use the first half of the structure data, so we make a 
     temporary structure for use with the filter functions: */
  tmp = NI_NewArray(tBool, irank, sdims);
  if (!tmp) {
    PyErr_NoMemory();
    goto exit;
  }
  NI_CopyArray(tmp, strct);
  ps = (Bool*)NI_GetArrayData(tmp);
  filter_size = 0;
  for(kk = 0; kk < ssize / 2; kk++)
    if (ps[kk])
      ++filter_size;
  for(kk = ssize / 2; kk < ssize; kk++)
    ps[kk] = 0;
  
  /* get output array */
  NI_GetArrayDimensions(input, idims);
  if (!NI_OutputArray(tInt32, irank, idims, output_in, output))
    goto exit;

  /* get data and size */
  pi = NI_GetArrayData(input);
  po = NI_GetArrayData(*output);
  size = NI_Elements(*output);
  if (!NI_InitPointIterator(input, &ii))
    goto exit;
  if (!NI_InitPointIterator(*output, &io))
    goto exit;

  /* set all elements in the output corresponding to non-zero elements
     in input to -1: */
  for(kk = 0; kk < size; kk++) {
    Int32 *p = (Int32*)po;
    switch (itype) {
    case tBool:
      *p = *(Bool*)pi ? -1 : 0;
      break;
    case tUInt8:
      *p = *(UInt8*)pi ? -1 : 0;
      break;
    case tUInt16:
      *p = *(UInt16*)pi ? -1 : 0;
      break;
    case tUInt32:
      *p = *(UInt32*)pi ? -1 : 0;
      break;
#if HAS_UINT64
    case tUInt64:
      *p = *(UInt64*)pi ? -1 : 0;
      break;
#endif
    case tInt8:
      *p = *(Int8*)pi ? -1 : 0;
      break;
    case tInt16:
      *p = *(Int16*)pi ? -1 : 0;
      break;
    case tInt32:
      *p = *(Int32*)pi ? -1 : 0;
      break;
    case tInt64:
      *p = *(Int64*)pi ? -1 : 0;
      break;
    case tFloat32:
      *p = *(Float32*)pi ? -1 : 0;
      break;
    case tFloat64:
      *p = *(Float64*)pi ? -1 : 0;
      break;
    default:
      PyErr_SetString(PyExc_RuntimeError, "data type not supported");
      goto exit;
    }
    NI_ITERATOR_NEXT2(ii, io, pi, po);
  }

  /* we don't use shifts: */
  for(kk = 0; kk < irank; kk++)
    shifts[kk] = 0;
  /* calculate the filter offsets: */
  if (!NI_InitFilterOffsetsFromArray(*output, tmp, shifts,
                                     NI_EXTEND_CONSTANT, &offsets,
                                     &mask_value, NULL))
    goto exit;
  /* initialize filter iterator: */
  if (!NI_InitFilterIterator(irank, sdims, filter_size,  idims, shifts,
                             &fi))
    goto exit;
  
  /* reset output iterator: */
  NI_ITERATOR_RESET(io);
  po = NI_GetArrayData(*output);

  /* iterator over the elements: */
  oo = offsets;
  for(jj = 0; jj < size; jj++) {
    if (*(Int32*)po < 0) {
      Int32 neighbor = 0;
      /* iterate over structuring element: */
      for(kk = 0; kk < filter_size; kk++) {
        int offset = oo[kk];
        if (offset != mask_value) {
          Int32 tt = *(Int32*)(po + offset);
          if (tt > 0) {
            /* this element is next to an already found object: */
            if (neighbor && neighbor != tt) {
              /* we have two objects that must be merged later: */
              _index_pair* tp = (_index_pair*)malloc(sizeof(_index_pair));
              if (!tp) {
                PyErr_NoMemory();
                goto exit;
              }
              tp->next = pairs;
              /* the pairs must be ordered: */
              if (neighbor < tt) {
                tp->index1 = neighbor;
                tp->index2 = tt;
              } else {
                tp->index1 = tt;
                tp->index2 = neighbor;
              }
              pairs = tp;
            } else {
              neighbor = tt;
            }
          }
        }
      }
      if (neighbor) {
        /* this point belongs to an existing object */
        *(Int32*)po = neighbor;
      } else {
        /* this may be a new object: */
        *(Int32*)po = ++index;
      }
    }
    NI_FILTER_NEXT(fi, io, oo, po);
  }

  *max_label = index;

  /* merge any touching objects: */
  if (pairs) {
    Int32 counter;
    index_map = (Int32*)malloc(index * sizeof(Int32));
    if (!index_map) {
      PyErr_NoMemory();
      goto exit;
    }
    for(jj = 0; jj < index; jj++)
      index_map[jj] = (Int32)jj;
    while (pairs) {
      Int32 idx1 = pairs->index1 - 1;
      Int32 idx2 = pairs->index2 - 1;
      if (index_map[idx2] == idx1 || index_map[idx2] == idx2) {
        /* if this pair was already processed, or if idx2 was not 
           mapped yet, we delete this pair and map idx2 to idx1: */
        _index_pair *tp = pairs;
        pairs = tp->next;
        free(tp);
        index_map[idx2] = idx1;
      } else {
        /* idx2 was already mapped, therefore we find what it was
           mapped to and change the current pair to the result of that
           and idx1. Since the pair is not destroyed, it will be
           re-processed with the adapted values.  */
        idx2 = index_map[idx2];
        /* keep the pairs ordered: */
        if (idx1 < idx2) {
          pairs->index1 = idx1 + 1;
          pairs->index2 = idx2 + 1;
        } else {
          pairs->index1 = idx2 + 1;
          pairs->index2 = idx1 + 1;
        }
      }
    }
    for(jj = 0; jj < index; jj++) {
      /* if the current index maps to a index that is also mapped,
         change it to map to that index. Since an index always maps to
         a lower index or to itself, this will make sure that at the
         end all indices map to an unmapped index. */
      if (index_map[index_map[jj]] < index_map[jj])
        index_map[jj] = index_map[index_map[jj]];
    }
    /* renumber the indices that are not mapped: */
    counter = 0;
    for(jj = 0; jj < index; jj++)
      if (index_map[jj] == jj)
        index_map[jj] = ++counter;
      else
        index_map[jj] = index_map[index_map[jj]];
  }

  /* relabel the output if we merged some objects: */
  if (index_map) {
    *max_label = 0;
    NI_ITERATOR_RESET(io);
    po = NI_GetArrayData(*output);
    for(jj = 0; jj < size; jj++) {
      Int32 p = *(Int32*)po;
      if (p > 0 )
        *(Int32*)po = index_map[p - 1];
      if (*(Int32*)po > *max_label)
        *max_label = *(Int32*)po;
      NI_ITERATOR_NEXT(io, po);
    }
  }

 exit:
  if (offsets) free(offsets);
  if (index_map) free(index_map);
  while (pairs) {
    _index_pair *tp = pairs;
    pairs = (_index_pair*)pairs->next;
    free(tp);
  }
  Py_XDECREF(tmp);
  return PyErr_Occurred() ? 0 : 1;
}

#define NI_FIND_OBJECT_POINT(_pi, _regions, _rank,  _dimensions, \
                             _max_label, _ii, _type)             \
{                                                                \
  int _kk, _sindex = *(_type*)_pi - 1;                           \
  if (_sindex >= 0 && _sindex < _max_label) {                    \
    if (!_regions[_sindex]) {                                    \
      /* this is a new object: */                                \
      _regions[_sindex] =                                        \
        (NI_ObjectRegion*)malloc(sizeof(NI_ObjectRegion));       \
      if (!_regions[_sindex]) {                                  \
        PyErr_NoMemory();                                        \
        goto exit;                                               \
      }                                                          \
      for(_kk = 0; _kk < _rank; _kk++) {                         \
        _regions[_sindex]->start[_kk] = _dimensions[_kk];        \
        _regions[_sindex]->end[_kk] = 0;                         \
      }                                                          \
    }                                                            \
    /* adapt the object region size and position: */             \
    for(_kk = 0; _kk < _rank; _kk++) {                           \
      int _cc = _ii.coordinates[_kk];                            \
      if (_cc < _regions[_sindex]->start[_kk])                   \
        _regions[_sindex]->start[_kk] = _cc;                     \
      if (_cc + 1 > _regions[_sindex]->end[_kk])                 \
        _regions[_sindex]->end[_kk] = _cc + 1;                   \
    }                                                            \
  }                                                              \
}

int NI_FindObjects(PyArrayObject* input, int max_label, 
                   NI_ObjectRegion** regions)
{
  int size, jj, irank, itype, idims[NI_MAXDIM];
  NI_Iterator ii;
  char *pi;

  assert(input != NULL);
  assert(max_label >= 0);

  /* get input data, size and iterator: */
  pi = NI_GetArrayData(input);
  size = NI_Elements(input);
  if (!NI_InitPointIterator(input, &ii))
    goto exit;

  irank = NI_GetArrayRank(input);
  itype = NI_GetArrayType(input);
  NI_GetArrayDimensions(input, idims);

  /* iterate over all points: */
  for(jj = 0 ; jj < size; jj++) {
    switch (itype) {
    case tBool:
      NI_FIND_OBJECT_POINT(pi, regions, irank, idims, max_label, ii, 
                           Bool);
      break;
    case tUInt8:
      NI_FIND_OBJECT_POINT(pi, regions, irank, idims, max_label, ii,
                           UInt8);
      break;
    case tUInt16:
      NI_FIND_OBJECT_POINT(pi, regions, irank, idims, max_label, ii,
                           UInt16);
      break;
    case tUInt32:
      NI_FIND_OBJECT_POINT(pi, regions, irank, idims, max_label, ii,
                           UInt32);
      break;
#if HAS_UINT64
    case tUInt64:
      NI_FIND_OBJECT_POINT(pi, regions, irank, idims, max_label, ii,
                           UInt64);
      break;
#endif
    case tInt8:
      NI_FIND_OBJECT_POINT(pi, regions, irank, idims, max_label, ii,
                           Int8);
      break;
    case tInt16:
      NI_FIND_OBJECT_POINT(pi, regions, irank, idims, max_label, ii,
                           Int16);
      break;
    case tInt32:
      NI_FIND_OBJECT_POINT(pi, regions, irank, idims, max_label, ii,
                           Int32);
      break;
    case tInt64:
      NI_FIND_OBJECT_POINT(pi, regions, irank, idims, max_label, ii,
                           Int64);
      break;
    default:
      PyErr_SetString(PyExc_RuntimeError, "data type not supported");
      goto exit;
    }
    NI_ITERATOR_NEXT(ii, pi);
  }

 exit:
  if (PyErr_Occurred()) {
    for(jj = 0; jj < max_label; jj++)
      if (regions[jj]) {
        free(regions[jj]);
        regions[jj] = NULL;
      }
    return 0;
  } else {
    return 1;
  }
}


/* macro to get input value: */
#if HAS_UINT64
#define NI_GET_VALUE(_pi, _v, _type)                                  \
{                                                                     \
  switch(_type) {                                                     \
  case tBool:                                                         \
    _v = (*(Bool*)_pi) != 0;                                          \
    break;                                                            \
  case tUInt8:                                                        \
    _v = *(UInt8*)_pi;                                                \
    break;                                                            \
  case tUInt16:                                                       \
    _v = *(UInt16*)_pi;                                               \
    break;                                                            \
  case tUInt32:                                                       \
    _v = *(UInt32*)_pi;                                               \
    break;                                                            \
  case tInt8:                                                         \
    _v = *(Int8*)_pi;                                                 \
    break;                                                            \
  case tInt16:                                                        \
    _v = *(Int16*)_pi;                                                \
    break;                                                            \
  case tInt32:                                                        \
    _v = *(Int32*)_pi;                                                \
    break;                                                            \
  case tInt64:                                                        \
    _v = *(Int64*)_pi;                                                \
    break;                                                            \
  case tUInt64:                                                       \
    _v = *(UInt64*)_pi;                                               \
    break;                                                            \
  case tFloat32:                                                      \
    _v = *(Float32*)_pi;                                              \
    break;                                                            \
  case tFloat64:                                                      \
    _v = *(Float64*)_pi;                                              \
    break;                                                            \
  default:                                                            \
      PyErr_SetString(PyExc_RuntimeError, "data type not supported"); \
      return 0;                                                       \
  }                                                                   \
}
#else
#define NI_GET_VALUE(_pi, _v, _type)                                  \
{                                                                     \
  switch(_type) {                                                     \
  case tBool:                                                         \
    _v = (*(Bool*)_pi) != 0;                                          \
    break;                                                            \
  case tUInt8:                                                        \
    _v = *(UInt8*)_pi;                                                \
    break;                                                            \
  case tUInt16:                                                       \
    _v = *(UInt16*)_pi;                                               \
    break;                                                            \
  case tUInt32:                                                       \
    _v = *(UInt32*)_pi;                                               \
    break;                                                            \
  case tInt8:                                                         \
    _v = *(Int8*)_pi;                                                 \
    break;                                                            \
  case tInt16:                                                        \
    _v = *(Int16*)_pi;                                                \
    break;                                                            \
  case tInt32:                                                        \
    _v = *(Int32*)_pi;                                                \
    break;                                                            \
  case tInt64:                                                        \
    _v = *(Int64*)_pi;                                                \
    break;                                                            \
  case tFloat32:                                                      \
    _v = *(Float32*)_pi;                                              \
    break;                                                            \
  case tFloat64:                                                      \
    _v = *(Float64*)_pi;                                              \
    break;                                                            \
  default:                                                            \
      PyErr_SetString(PyExc_RuntimeError, "data type not supported"); \
      return 0;                                                       \
  }                                                                   \
}
#endif

/* macro to get label value: */
#if HAS_UINT64
#define NI_GET_LABEL(_pm, _label, _type)                              \
{                                                                     \
  if (_pm) {                                                          \
    switch(_type) {                                                   \
    case tBool:                                                       \
      _label = *(Bool*)_pm;                                           \
      break;                                                          \
    case tUInt8:                                                      \
      _label = *(UInt8*)_pm;                                          \
      break;                                                          \
    case tUInt16:                                                     \
      _label = *(UInt16*)_pm;                                         \
      break;                                                          \
    case tUInt32:                                                     \
      _label = *(UInt32*)_pm;                                         \
      break;                                                          \
    case tUInt64:                                                     \
      _label = *(UInt64*)_pm;                                         \
      break;                                                          \
    case tInt8:                                                       \
      _label = *(Int8*)_pm;                                           \
      break;                                                          \
    case tInt16:                                                      \
      _label = *(Int16*)_pm;                                          \
      break;                                                          \
    case tInt32:                                                      \
      _label = *(Int32*)_pm;                                          \
       break;                                                         \
    case tInt64:                                                      \
      _label = *(Int64*)_pm;                                          \
       break;                                                         \
    case tFloat32:                                                    \
      _label = *(Float32*)_pm;                                        \
      break;                                                          \
    case tFloat64:                                                    \
      _label = *(Float64*)_pm;                                        \
      break;                                                          \
    default:                                                          \
      PyErr_SetString(PyExc_RuntimeError, "data type not supported"); \
      return 0;                                                       \
    }                                                                 \
  }                                                                   \
}
#else
#define NI_GET_LABEL(_pm, _label, _type)                              \
{                                                                     \
  if (_pm) {                                                          \
    switch(_type) {                                                   \
    case tBool:                                                       \
      _label = *(Bool*)_pm;                                           \
      break;                                                          \
    case tUInt8:                                                      \
      _label = *(UInt8*)_pm;                                          \
      break;                                                          \
    case tUInt16:                                                     \
      _label = *(UInt16*)_pm;                                         \
      break;                                                          \
    case tUInt32:                                                     \
      _label = *(UInt32*)_pm;                                         \
      break;                                                          \
    case tInt8:                                                       \
      _label = *(Int8*)_pm;                                           \
      break;                                                          \
    case tInt16:                                                      \
      _label = *(Int16*)_pm;                                          \
      break;                                                          \
    case tInt32:                                                      \
      _label = *(Int32*)_pm;                                          \
       break;                                                         \
    case tInt64:                                                      \
      _label = *(Int64*)_pm;                                          \
       break;                                                         \
    case tFloat32:                                                    \
      _label = *(Float32*)_pm;                                        \
      break;                                                          \
    case tFloat64:                                                    \
      _label = *(Float64*)_pm;                                        \
      break;                                                          \
    default:                                                          \
      PyErr_SetString(PyExc_RuntimeError, "data type not supported"); \
      return 0;                                                       \
    }                                                                 \
  }                                                                   \
}
#endif

int NI_Statistics(PyArrayObject *input, PyArrayObject *labels, 
                  int min_label, int max_label, int *indices, 
                  int n_results, double *sum, int *total,
                  double *variance, double *minimum, double *maximum,
                  int* min_pos, int* max_pos)
{
  char *pi = NULL, *pm = NULL;
  NI_Iterator ii, mi;
  int jj, size, mtype = 0, itype = 0, idx = 0, label = 1, doit = 1;

  assert(input != NULL);

  /* input iterator: */
  if (!NI_InitPointIterator(input, &ii))
    return 0;
  /* input data: */
  pi = NI_GetArrayData(input);
  /* input type: */
  itype = NI_GetArrayType(input);
  if (labels) {
    /* input and labels must have equal size: */
    if (!NI_ShapeEqual(input, labels)) {
      PyErr_SetString(PyExc_RuntimeError, 
                      "input and labels dimensions must be equal");
      return 0;
    }
    /* iterator, data pointer and type of labels array: */
    if (!NI_InitPointIterator(labels, &mi))
      return 0;
    pm = NI_GetArrayData(labels);
    mtype = NI_GetArrayType(labels);
  }

  /* input size: */
  size = NI_Elements(input);
  
  for(jj = 0; jj < n_results; jj++) {
    if (sum)
      sum[jj] = 0.0;
    if (total)
      total[jj] = 0;
    if (variance)
      variance[jj] = 0;
    if (minimum)
      minimum[jj] = DBL_MAX;
    if (maximum)
      maximum[jj] = DBL_MIN;
    if (min_pos)
      min_pos[jj] = 0;
    if (max_pos)
      max_pos[jj] = 0;
  }
  /* iterate over array: */
  for(jj = 0; jj < size; jj++) {
    NI_GET_LABEL(pm, label, mtype);
    if (min_label >= 0) {
      if (label >= min_label && label <= max_label) {
        idx = indices[label - min_label];
        doit = idx >= 0;
      } else {
        doit = 0;
      } 
    } else {
      doit = label != 0;
    }
    if (doit) {
      double val;
      NI_GET_VALUE(pi, val, itype);
      if (sum)
        sum[idx] += val;
      if (total)
        total[idx]++;
      if (minimum && val < minimum[idx]) {
        minimum[idx] = val;
        if (min_pos)
          min_pos[idx] = jj;
      }
      if (maximum && val > maximum[idx]) {
        maximum[idx] = val;
        if (max_pos)
          max_pos[idx] = jj;
      }
    }
    if (labels) {
      NI_ITERATOR_NEXT2(ii, mi, pi, pm);
    } else {
      NI_ITERATOR_NEXT(ii, pi);
    }
  }

  if (minimum) {
    for(jj = 0; jj < n_results; jj++) {
      if (!(minimum[jj] < DBL_MAX))
        minimum[jj] = 0.0;
    }
  }
  if (maximum) {
    for(jj = 0; jj < n_results; jj++) {
      if (!(maximum[jj] > DBL_MIN))
        maximum[jj] = 0.0;
    }
  }

  if (variance) {
    int do_var = 0;
    for(jj = 0; jj < n_results; jj++)
      if (total[jj] > 1) {
        do_var = 1;
        break;
      }
    if (do_var) {
      /* reset input iterator: */
      NI_ITERATOR_RESET(ii);
      pi = NI_GetArrayData(input);
      if (labels) {
        /* reset label iterator: */
        NI_ITERATOR_RESET(mi);
        pm = NI_GetArrayData(labels);
      }
      for(jj = 0; jj < size; jj++) {
        NI_GET_LABEL(pm, label, mtype);
        if (min_label >= 0) {
          if (label >= min_label && label <= max_label) {
            idx = indices[label - min_label];
            doit = idx >= 0;
          } else {
            doit = 0;
          } 
        } else {
          doit = label != 0;
        }
        if (doit) {
          double val;
          NI_GET_VALUE(pi, val, itype);
          val = val - sum[idx] / total[idx];
          variance[idx] += val * val;
        }
        if (labels) {
          NI_ITERATOR_NEXT2(ii, mi, pi, pm);
        } else {
          NI_ITERATOR_NEXT(ii, pi);
        }
      }
      for(jj = 0; jj < n_results; jj++)
        variance[jj] = (total[jj] > 1 ? 
                        variance[jj] / (total[jj] - 1) : 0.0);
    }
  }
  
  return 1;
}


int NI_CenterOfMass(PyArrayObject *input, PyArrayObject *labels, 
                    int min_label, int max_label, int *indices, 
                    int n_results, double *center_of_mass)
{
  char *pi = NULL, *pm = NULL;
  NI_Iterator ii, mi;
  int jj, kk, size, mtype = 0, itype = 0, idx = 0, label = 1, doit = 1;
  int irank;
  double *sum = NULL;

  assert(input != NULL);

  /* input iterator: */
  if (!NI_InitPointIterator(input, &ii))
    goto exit;
  /* input data: */
  pi = NI_GetArrayData(input);
  /* input type: */
  itype = NI_GetArrayType(input);
  if (labels) {
    /* input and labels must have equal size: */
    if (!NI_ShapeEqual(input, labels)) {
      PyErr_SetString(PyExc_RuntimeError, 
                      "input and labels dimensions must be equal");
      goto exit;
    }
    /* iterator, data pointer and type of labels array: */
    if (!NI_InitPointIterator(labels, &mi))
      goto exit;
    pm = NI_GetArrayData(labels);
    mtype = NI_GetArrayType(labels);
  }

  /* input size: */
  size = NI_Elements(input);

  sum = (double*)malloc(n_results * sizeof(double));
  if (!sum) {
    PyErr_NoMemory();
    goto exit;
  }
  
  irank = NI_GetArrayRank(input);
  for(jj = 0; jj < n_results; jj++) {
    sum[jj] = 0.0;
    for(kk = 0; kk < irank; kk++)
      center_of_mass[jj * irank + kk] = 0.0;
  }
  /* iterate over array: */
  for(jj = 0; jj < size; jj++) {
    NI_GET_LABEL(pm, label, mtype);
    if (min_label >= 0) {
      if (label >= min_label && label <= max_label) {
        idx = indices[label - min_label];
        doit = idx >= 0;
      } else {
        doit = 0;
      } 
    } else {
      doit = label != 0;
    }
    if (doit) {
      double val;
      NI_GET_VALUE(pi, val, itype);
      sum[idx] += val;
      for(kk = 0; kk < irank; kk++)
        center_of_mass[idx * irank + kk] += val * ii.coordinates[kk];
    }
    if (labels) {
      NI_ITERATOR_NEXT2(ii, mi, pi, pm);
    } else {
      NI_ITERATOR_NEXT(ii, pi);
    }
  }

  for(jj = 0; jj < n_results; jj++)
    for(kk = 0; kk < irank; kk++)
      center_of_mass[jj * irank + kk] /= sum[jj];

 exit:
  if (sum)
    free(sum);
  return  PyErr_Occurred() == NULL;
}


int NI_Histogram(PyArrayObject *input, PyArrayObject *labels, 
                 int min_label, int max_label, int *indices, 
                 int n_results, PyArrayObject **histograms,
                 double min, double max, int nbins)
{
  char *pi = NULL, *pm = NULL;
  NI_Iterator ii, mi;
  int jj, kk, size, mtype = 0, itype = 0, idx = 0, label = 1, doit = 1;
  Int32 **ph = NULL;
  double bsize;
  
  assert(input != NULL);
  
  if (nbins < 1) {
    PyErr_SetString(PyExc_RuntimeError,
                    "number of bins must be at least one");
    goto exit;
  }
 
  if (min >= max) {
    PyErr_SetString(PyExc_RuntimeError,
                    "histogram maximum must be larger than its minimum");
    goto exit;
  }
 
  /* input iterator: */
  if (!NI_InitPointIterator(input, &ii))
    goto exit;
  /* input data: */
  pi = NI_GetArrayData(input);
  /* input type: */
  itype = NI_GetArrayType(input);
  if (labels) {
    /* input and labels must have equal size: */
    if (!NI_ShapeEqual(input, labels)) {
      PyErr_SetString(PyExc_RuntimeError, 
                      "input and labels dimensions must be equal");
      goto exit;
    }
    /* iterator, data pointer and type of labels array: */
    if (!NI_InitPointIterator(labels, &mi))
      goto exit;
    pm = NI_GetArrayData(labels);
    mtype = NI_GetArrayType(labels);
  }

  ph = (Int32**)malloc(n_results * sizeof(Int32*));
  if (!ph) {
    PyErr_NoMemory();
    goto exit;
  }
  for(jj = 0; jj < n_results; jj++) {
    histograms[jj] = NI_NewArray(tInt32, 1, &nbins);
    if (!histograms[jj]) {
      PyErr_NoMemory();
      goto exit;
    }
    ph[jj] = (Int32*)NI_GetArrayData(histograms[jj]);
    for(kk = 0; kk < nbins; kk++)
      ph[jj][kk] = 0;
  }

  bsize = (max - min) / (double)nbins;

  /* input size: */
  size = NI_Elements(input);
  /* iterate over array: */
  for(jj = 0; jj < size; jj++) {
    NI_GET_LABEL(pm, label, mtype);
    if (min_label >= 0) {
      if (label >= min_label && label <= max_label) {
        idx = indices[label - min_label];
        doit = idx >= 0;
      } else {
        doit = 0;
      } 
    } else {
      doit = label != 0;
    }
    if (doit) {
      int bin;
      double val;
      NI_GET_VALUE(pi, val, itype);
      if (val >= min && val < max) {
        bin = (int)((val - min) / bsize);
        ++(ph[idx][bin]);
      }
    }
    if (labels) {
      NI_ITERATOR_NEXT2(ii, mi, pi, pm);
    } else {
      NI_ITERATOR_NEXT(ii, pi);
    }
  }

 exit:
  if (ph)
    free(ph);
  return  PyErr_Occurred() == NULL;
}

#define DONE_TYPE UInt8
#define COST_TYPE UInt16
#define WS_MAXDIM 7

typedef struct {
  int index;
  COST_TYPE cost;
  void *next, *prev;
  DONE_TYPE done;
} NI_WatershedElement;

int NI_WatershedIFT(PyArrayObject* input, PyArrayObject* markers, 
                    PyArrayObject* strct, PyArrayObject** output, 
                    PyObject* output_in)
{
  char *pv, *pl;
  int irank, itype, mtype, idims[NI_MAXDIM], sdims[NI_MAXDIM], ssize;
  int size, jj, hh, kk, maxval, strides[WS_MAXDIM], coordinates[WS_MAXDIM];
  int *nstrides = NULL, nneigh;
  NI_WatershedElement *temp = NULL, **first = NULL, **last = NULL;
  PyArrayObject *values = NULL;
  Bool *ps = NULL;
  
  /* get input rank, type and shape */
  irank = NI_GetArrayRank(input);
  itype = NI_GetArrayType(input);
  NI_GetArrayDimensions(input, idims);

  /* structuring element must be of bool type: */
  if (NI_GetArrayType(strct) != tBool) {
    PyErr_SetString(PyExc_RuntimeError, "structure type must be Bool");
    goto exit;
  }
  /* the structure array must be contigous: */
  if (!PyArray_ISCONTIGUOUS(strct)) {
    PyErr_SetString(PyExc_RuntimeError,
                    "structure array must be contiguous");
    goto exit;
  }
  /* input and structure must have equal rank: */
  if (NI_GetArrayRank(strct) != irank) {
    PyErr_SetString(PyExc_RuntimeError, 
                    "structure rank must be equal to input rank");
    goto exit;
  }
  ssize = 1;
  NI_GetArrayDimensions(strct, sdims);
  for(kk = 0; kk < irank; kk++) {
    ssize *= sdims[kk];
    if (sdims[kk] != 3) {
    PyErr_SetString(PyExc_RuntimeError, "structure dimensions must "
                    "equal to 3");
      goto exit;
    }
  }

  if (irank > WS_MAXDIM) {
    PyErr_SetString(PyExc_RuntimeError, "too many dimensions");
    goto exit;
  }

  /* The algorithm works only for integer data types. 32 bit is not 
     practical, so only 8 and 16 bit are supported. */
  if (!(itype == tUInt8 || itype == tUInt16)) {
    PyErr_SetString(PyExc_RuntimeError,
                    "input type must be UInt8 or UInt16");
    goto exit;
  }
  
  mtype = NI_GetArrayType(markers);
  if (!(mtype == tInt8 || mtype == tInt16 || mtype == tInt32 ||
        mtype == tUInt8 || mtype == tUInt16 || mtype == tUInt32)) {
    PyErr_SetString(PyExc_RuntimeError, 
                    "marker array must be of an integer type");
    goto exit;
  }

  if (!NI_ShapeEqual(input, markers)) {
    PyErr_SetString(PyExc_RuntimeError,  
                    "input and markers must have equal shape");
    goto exit;
  }
  
  size = NI_Elements(input);
  /* Storage for the temporary queue data. */
  temp = (NI_WatershedElement*)malloc(size * sizeof(NI_WatershedElement));
  if (!temp) {
    PyErr_NoMemory();
    goto exit;
  }

  /* Make sure that the input is contiguous in memory. */
  values = NI_ContiguousArray(input);
  if (!values) {
    PyErr_SetString(PyExc_RuntimeError, "cannot convert input");
    goto exit;
  }

  /* allocate output */
  if (!NI_OutputArray(mtype, irank, idims, output_in, output))
    goto exit;
  NI_CopyArray(*output, markers);

  pv = NI_GetArrayData(values);

  /* Initialization and find the maximum of the input. */
  maxval = 0;
  for(jj = 0; jj < size; jj++) {
    int ival = 0;
    switch(itype) {
    case tUInt8:
      ival = ((UInt8*)pv)[jj];
      break;
    case tUInt16:
      ival = ((UInt16*)pv)[jj];
      break;
    default:
      PyErr_SetString(PyExc_RuntimeError, "data type not supported");
      goto exit;
    }
    temp[jj].index = jj;
    temp[jj].done = 0;
    if (ival > maxval)
      maxval = ival;
  }

  /* Allocate and initialize the storage for the queue. */
  first = (NI_WatershedElement**)malloc((maxval + 1) * 
                                        sizeof(NI_WatershedElement*));
  last = (NI_WatershedElement**)malloc((maxval + 1) * 
                                       sizeof(NI_WatershedElement*));
  if (!first || !last) {
    PyErr_NoMemory();
    goto exit;
  }
  for(hh = 0; hh <= maxval; hh++) {
    first[hh] = NULL;
    last[hh] = NULL;
  }

  pl = NI_GetArrayData(*output);

  /* initialize all nodes */
  for(jj = 0; jj < irank; jj++)
    coordinates[jj] = 0;
  for(jj = 0; jj < size; jj++) {
    /* get marker */
    int label = 0;
    switch(mtype) {
    case tInt8:
      label = ((Int8*)pl)[jj];
      break;
    case tInt16:
      label = ((Int16*)pl)[jj];
      break;
    case tInt32:
      label = ((Int32*)pl)[jj];
      break;
    case tUInt8:
      label = ((UInt8*)pl)[jj];
      break;
    case tUInt16:
      label = ((UInt16*)pl)[jj];
      break;
    case tUInt32:
      label = ((UInt32*)pl)[jj];
      break;
    default:
      PyErr_SetString(PyExc_RuntimeError, "data type not supported");
      goto exit;
    }

    if (label != 0) {
      /* This node is a marker */
      temp[jj].cost = 0;
      if (!first[0]) {
        first[0] = &(temp[jj]);
        first[0]->next = NULL;
        first[0]->prev = NULL;
        last[0] = first[0];
      } else {
        if (label > 0) {
          /* object markers are enqueued at the beginning, so they are
             processed first. */
          temp[jj].next = first[0];
          temp[jj].prev = NULL;
          first[0]->prev = &(temp[jj]);
          first[0] = &(temp[jj]);
        } else {
          /* background markers are enqueued at the end, so they are
             processed after the object markers. */
          temp[jj].next = NULL;
          temp[jj].prev = last[0];
          last[0]->next = &(temp[jj]);
          last[0] = &(temp[jj]);
        }
      }
    } else {
      /* This node is not a marker */
      temp[jj].cost = maxval + 1;
      temp[jj].next = NULL;
      temp[jj].prev = NULL;
    }

    for(hh = irank - 1; hh >= 0; hh--)
      if (coordinates[hh] < idims[hh] - 1) {
        coordinates[hh]++;                        
        break;                                                
      } else {                                                
        coordinates[hh] = 0;                        
      }        
  }

  ps = (Bool*)NI_GetArrayData(strct);
  nneigh = 0;
  for (kk = 0; kk < ssize; kk++)
    if (ps[kk] && kk != (ssize / 2))
      ++nneigh;

  nstrides = (int*)malloc(nneigh * sizeof(int));
  if (!nstrides) {
    PyErr_NoMemory();
    goto exit;
  }

  strides[irank - 1] = 1;
  for(hh = irank - 2; hh >= 0; hh--)
    strides[hh] = idims[hh + 1] * strides[hh + 1];

  for(kk = 0; kk < irank; kk++)
    coordinates[kk] = -1;

  for(kk = 0; kk < nneigh; kk++)
    nstrides[kk] = 0;

  jj = 0;
  for(kk = 0; kk < ssize; kk++) {
    if (ps[kk]) {
      int offset = 0;
      for(hh = 0; hh < irank; hh++)
        offset += coordinates[hh] * strides[hh];
      if (offset != 0)
        nstrides[jj++] += offset;
    }
    for(hh = irank - 1; hh >= 0; hh--)
      if (coordinates[hh] < 1) {
        coordinates[hh]++;                        
        break;                                                
      } else {                                                
        coordinates[hh] = -1;                        
      }        
  }

  /* Propagation phase: */
  for(jj = 0; jj <= maxval; jj++) {
    while (first[jj]) {
      /* dequeue first element: */
      NI_WatershedElement *v = first[jj];
      first[jj] = first[jj]->next;
      if (first[jj])
        first[jj]->prev = NULL;
      v->prev = NULL;
      v->next = NULL;
      /* Mark element as done: */
      v->done = 1;
      /* Iterate over the neighbors of the element: */
      for(hh = 0; hh < nneigh; hh++) {
        int v_index = v->index, p_index = v->index, idx, outside = 0;
        int qq, cc;
        p_index += nstrides[hh];
        /* check if the neighbor is within the extent of the array: */
        idx = p_index;
        for (qq = 0; qq < irank; qq++) {
          cc = idx / strides[qq];
          if (cc < 0 || cc >= idims[qq]) {
            outside = 1;
            break;
          }
          idx -= cc * strides[qq];
        }
        if (!outside) {
          NI_WatershedElement *p = &(temp[p_index]);
          if (!(p->done)) {
            /* If the neighbor was not processed yet: */
            int max, pval, vval, wvp, pcost, label;
            switch(itype) {
            case tUInt8:
              vval = ((UInt8*)pv)[v_index];
              pval = ((UInt8*)pv)[p_index];
              break;
            case tUInt16:
              vval = ((UInt16*)pv)[v_index];
              pval = ((UInt16*)pv)[p_index];
              break;
            default:
              PyErr_SetString(PyExc_RuntimeError,
                              "data type not supported");
              goto exit;
            }
            /* Calculate cost: */
            wvp = pval - vval;
            if (wvp < 0)
              wvp = -wvp;
            /* Find the maximum of this cost and the current 
               element cost: */
            pcost = p->cost;
            max = v->cost > wvp ? v->cost : wvp;
            if (max < pcost) {
              /* If this maximum is less than the neighbors cost,
                 adapt the cost and the label of the neighbor: */
              p->cost = max;
              switch(mtype) {
              case tInt8:
                label = ((Int8*)pl)[v_index];
                ((Int8*)pl)[p_index] = label;
                break;
              case tInt16:
                label = ((Int16*)pl)[v_index];
                ((Int16*)pl)[p_index] = label;
                break;
              case tInt32:
                label = ((Int32*)pl)[v_index];
                ((Int32*)pl)[p_index] = label;
                break;
              case tUInt8:
                label = ((UInt8*)pl)[v_index];
                ((UInt8*)pl)[p_index] = label;
                break;
              case tUInt16:
                label = ((UInt16*)pl)[v_index];
                ((UInt16*)pl)[p_index] = label;
                break;
              case tUInt32:
                label = ((UInt32*)pl)[v_index];
                ((UInt32*)pl)[p_index] = label;
                break;
              default:
                PyErr_SetString(PyExc_RuntimeError,
                                "data type not supported");
                goto exit;
              }
              /* If the neighbor is in a queue, remove it: */
              if (p->next || p->prev) {
                NI_WatershedElement *prev = p->prev, *next = p->next;
                if (first[pcost] == p)
                  first[pcost] = next;
                if (last[pcost] == p)
                  last[pcost] = prev;
                if (prev)
                  prev->next = next;
                if (next) 
                  next->prev = prev;
              }
              /* Insert the neighbor in the appropiate queue: */
              if (label < 0) {
                p->prev = last[max];
                p->next = NULL;
                if (last[max])
                  last[max]->next = p;
                last[max] = p;
                if (!first[max])
                  first[max] = p;
              } else {
                p->next = first[max];
                p->prev = NULL;
                if (first[max])
                  first[max]->prev = p;
                first[max] = p;
                if (!last[max])
                  last[max] = p;
              }
            }
          }
        }
      }
    }
  }

 exit:
  if (temp)
    free(temp);
  if (first)
    free(first);
  if (last)
    free(last);
  if (nstrides)
    free(nstrides);
  Py_XDECREF(values);
  return PyErr_Occurred() ? 0 : 1;
}
