/* Copyright (C) 2003, 2004 Peter J. Verveer
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met: 
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above
 *    copyright notice, this list of conditions and the following
 *    disclaimer in the documentation and/or other materials provided
 *    with the distribution.
 *
 * 3. The name of the author may not be used to endorse or promote
 *    products derived from this software without specific prior
 *    written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.      
 */

#include "ni_support.h"
#include "ni_interpolation.h"
#include <stdlib.h>
#include <math.h>
#include <assert.h>

/* calculate the B-spline interpolation coefficients for given x: */
static void
spline_coefficients(double x, int order, double *result) 
{
  int hh;
  double y, start;

  assert(result != NULL);

  if (order & 1) {
    start = (int)floor(x) - order / 2;
  } else {
    start = (int)floor(x + 0.5) - order / 2;
  }

  for(hh = 0; hh <= order; hh++)  {
    y = fabs(start - x + hh);

    switch(order) {
    case 1:
      result[hh] = y > 1.0 ? 0.0 : 1.0 - y;
      break;
    case 2:
      if (y < 0.5) {
        result[hh] = 0.75 - y * y;
      } else if (y < 1.5) {
        y = 1.5 - y;
        result[hh] = 0.5 * y * y;
      } else {
        result[hh] = 0.0;
      }
      break;
    case 3:
      if (y < 1.0) {
        result[hh] = 
          (y * y * (y - 2.0) * 3.0 + 4.0) / 6.0;
      } else if (y < 2.0) {
        y = 2.0 - y;
        result[hh] = y * y * y / 6.0;
      } else {
        result[hh] = 0.0;
      }        
      break;
    case 4:
      if (y < 0.5) {
        y *= y;
        result[hh] = y * (y * 0.25 - 0.625) + 115.0 / 192.0;
      } else if (y < 1.5) {
        result[hh] = y * (y * (y * (5.0 / 6.0 - y / 6.0) - 1.25) + 
                          5.0 / 24.0) + 55.0 / 96.0;
      } else if (y < 2.5) {
        y -= 2.5;
        y *= y;
        result[hh] = y * y / 24.0;
      } else {
        result[hh] = 0.0;
      }
      break;
    case 5:
      if (y < 1.0) {
        double f = y * y;
        result[hh] = 
          f * (f * (0.25 - y / 12.0) - 0.5) + 0.55;
      } else if (y < 2.0) {
        result[hh] = y * (y * (y * (y * (y / 24.0 - 0.375) 
                                    + 1.25) -  1.75) + 0.625) + 0.425;
      } else if (y < 3.0) {
        double f = 3.0 - y;
        y = f * f;
        result[hh] = f * y * y / 120.0;
      } else {
        result[hh] = 0.0;
      }
      break;
    }
  }
}

/* map a coordinate outside the borders, according to the requested
   boundary condition: */
static double 
map_coordinate(double in, int len, int mode, int border_flag_value)
{
  if (in < 0) {
    switch (mode) {
    case NI_EXTEND_MIRROR:
      if (len <= 1) {
        in = 0;
      } else {
        int sz2 = 2 * len - 2;
        in = sz2 * (int)(-in / sz2) + in;
        in = in <= 1 - len ? in + sz2 : -in;
      }
      break;
    case NI_EXTEND_REFLECT:
      if (len <= 1) {
        in = 0;
      } else {
        int sz2 = 2 * len;
        if (in < -sz2)
          in = sz2 * (int)(-in / sz2) + in;
        in = in < -len ? in + sz2 : -in - 1;
      }
      break;
    case NI_EXTEND_WRAP: 
      if (len <= 1) {
        in = 0;
      } else {
        int sz = len;
        in += sz * (int)(-in / sz); 
        if (in < 0)
          in += sz;
      }
      break;
    case NI_EXTEND_NEAREST:
      in = 0;
      break;
    case NI_EXTEND_CONSTANT:
      in = border_flag_value;
      break;
    }
  } else if (in >= len) {
    switch (mode) {
    case NI_EXTEND_MIRROR:
      if (len <= 1) {
        in = 0;
      } else {
        int sz2 = 2 * len - 2;
        in -= sz2 * (int)(in / sz2);
        if (in >= len)
          in = sz2 - in;
      }
      break;
    case NI_EXTEND_REFLECT:
      if (len <= 1) {
        in = 0;
      } else {
        int sz2 = 2 * len;
        in -= sz2 * (int)(in / sz2);
        if (in >= len)
          in = sz2 - in - 1;
      }
      break;
    case NI_EXTEND_WRAP: 
      if (len <= 1) {
        in = 0;
      } else {
        int sz = len;
        in -= sz * (int)(in / sz); 
      }
      break;
    case NI_EXTEND_NEAREST:
      in = len - 1;
      break;
    case NI_EXTEND_CONSTANT:
      in = border_flag_value;
      break;
    }
  }

  return in;
}

#define BUFFER_SIZE 256000
#define TOLERANCE 1e-15

/* one-dimensional spline filter: */
int NI_SplineFilter1D(PyArrayObject *input, int order, int axis, 
                      PyArrayObject **output, PyObject *output_in, 
                      NumarrayType output_type)
{
  int hh, kk, ll, lines, len, npoles = 0, more, irank, itype;
  int idims[NI_MAXDIM];
  double *buffer = NULL, weight, pole[2];
  NI_LineBuffer iline_buffer, oline_buffer;

  assert(input != NULL);
  assert(output != NULL);

  /* only spline order more than one, needs filtering. orders larger than
     5 are not supported: */
  if (order < 2 || order > 5) {
    PyErr_SetString(PyExc_RuntimeError, "spline order not supported");
    goto exit;
  }

  irank = NI_GetArrayRank(input);

  /* allow for negative axis specification: */
  if (axis < 0)
    axis += irank;

  /* check axis specification: */
  if (irank > 0 && (axis < 0 || axis >= irank)) {
    PyErr_SetString(PyExc_RuntimeError, "invalid axis specified");
    goto exit;
  }

  /* complex data not supported: */
  itype = NI_GetArrayType(input);
  if (itype == tComplex32 || itype == tComplex64 ||
      output_type == tComplex32 || output_type == tComplex64) {
    PyErr_SetString(PyExc_RuntimeError, "complex arrays not supported");
    goto exit;
  }

  /* if no output type is specifified, use input type: */
  if (output_type == tAny)
    output_type = (NumarrayType)itype;

  /* allocate output array: */
  NI_GetArrayDimensions(input, idims);
  if (!NI_OutputArray(output_type, irank, idims, output_in, output))
    goto exit;


  len = irank > 0 ? idims[axis] : 1;
  if (len < 1)
    goto exit;

  /* these are used in the spline filter calculation below: */
  switch (order) {
  case 2:
    npoles = 1;
    pole[0] = sqrt(8.0) - 3.0;
    break;
  case 3:
    npoles = 1;
    pole[0] = sqrt(3.0) - 2.0;
    break;
  case 4:
    npoles = 2;
    pole[0] = sqrt(664.0 - sqrt(438976.0)) + sqrt(304.0) - 19.0;
    pole[1] = sqrt(664.0 + sqrt(438976.0)) - sqrt(304.0) - 19.0;
    break;
  case 5:
    npoles = 2;
    pole[0] = sqrt(67.5 - sqrt(4436.25)) + sqrt(26.25) - 6.5;
    pole[1] = sqrt(67.5 + sqrt(4436.25)) - sqrt(26.25) - 6.5;
    break;
  default:
    break;
  }

  weight = 1.0;
  for(kk = 0; kk < npoles; kk++)
    weight *= (1.0 - pole[kk]) * (1.0 - 1.0 / pole[kk]);

  /* allocate an initialize the line buffer, only a single one is used,
     because the calculation is in-place: */
  lines = -1;
  if (!NI_AllocateLineBuffer(input, axis, 0, 0, &lines, BUFFER_SIZE,
                             &buffer))
    goto exit;
  if (!NI_InitLineBuffer(input, axis, 0, 0, lines, buffer,
                         NI_EXTEND_DEFAULT, 0.0, &iline_buffer))
    goto exit;
  if (!NI_InitLineBuffer(*output, axis, 0, 0, lines, buffer,
                         NI_EXTEND_DEFAULT, 0.0, &oline_buffer))
    goto exit;
  
  /* iterate over all the array lines: */
  do {
    /* copy lines from array to buffer: */
    if (!NI_ArrayToLineBuffer(&iline_buffer, &lines, &more))
      goto exit;
    /* iterate over the lines in the buffer: */
    for(kk = 0; kk < lines; kk++) {
      /* get line: */
      double *ln = NI_GET_LINE(iline_buffer, kk);
      /* spline filter: */
      if (len > 1) {
        for(ll = 0; ll < len; ll++)
          ln[ll] *= weight;
        for(hh = 0; hh < npoles; hh++) {                                
          double p = pole[hh];                                        
          int max = (int)ceil(log(TOLERANCE) / log(fabs(p)));
          if (max < len) {                                                
            double zn = p;                                                
            double sum = ln[0];                                        
            for(ll = 1; ll < max; ll++) {                                
              sum += zn * ln[ll];                                        
              zn *= p;                                                
            }                                                        
            ln[0] = sum;                                                
          } else {                                                        
            double zn = p;                                                
            double iz = 1.0 / p;                                        
            double z2n = pow(p, (double)(len - 1));                        
            double sum = ln[0] + z2n * ln[len - 1];                        
            z2n *= z2n * iz;                                        
            for(ll = 1; ll <= len - 2; ll++) {                        
              sum += (zn + z2n) * ln[ll];                                
              zn *= p;                                                
              z2n *= iz;                                                
            }                                                        
            ln[0] = sum / (1.0 - zn * zn);
          }                                                                
          for(ll = 1; ll < len; ll++)                                
            ln[ll] += p * ln[ll - 1];                                
          ln[len-1] = (p / (p * p - 1.0)) * (ln[len-1] + p * ln[len-2]);
          for(ll = len - 2; ll >= 0; ll--)                                
            ln[ll] = p * (ln[ll + 1] - ln[ll]);                        
        }                                                                
      }
    }
    /* copy lines from buffer to array: */
    if (!NI_LineBufferToArray(&oline_buffer))
      goto exit;
  } while(more);

 exit:
  if (buffer) free(buffer);
  return PyErr_Occurred() ? 0 : 1;
}


static int 
NI_GeometricTransformLow(PyArrayObject *input, 
                         int (*map)(int*, double*, int, int, void*),
                         void* map_data, double* matrix, double* shift,
                         PyArrayObject *coordinates, 
                         int *output_dimensions,
                         int output_rank, NumarrayType output_type, 
                         PyArrayObject **output, PyObject *output_in,
                         int order, int mode, double cval)
{
  char *po, *pi, *pc = NULL;
  int **edge_offsets = NULL, **data_offsets = NULL, filter_size, size;
  int ftmp[NI_MAXDIM], *fcoordinates = NULL, *foffsets = NULL, jj, kk, hh;
  int cstride = 0, irank, itype, coordinates_type = tAny, idims[NI_MAXDIM];
  int coordinates_dims[NI_MAXDIM], coordinates_strides[NI_MAXDIM];
  int istrides[NI_MAXDIM], ll;
  double **splvals = NULL, icoor[NI_MAXDIM];
  NI_Iterator io, ic;

  assert(input != NULL);
  assert(output != NULL);

  irank = NI_GetArrayRank(input);
  NI_GetArrayDimensions(input, idims);

  /* check rank: */
  if (irank < 1 || output_rank < 1) {
    PyErr_SetString(PyExc_RuntimeError,
                    "input and output ranks must be > 0");
    goto exit;
  }

  /* check spline order: */
  if (order < 0 || order > 5) {
    PyErr_SetString(PyExc_RuntimeError, "spline order not supported");
    goto exit;
  }

  /* complex types are not supported: */
  itype = NI_GetArrayType(input);
  if (itype == tComplex32 || itype == tComplex64 ||
      output_type == tComplex32 || output_type == tComplex64) {
    PyErr_SetString(PyExc_RuntimeError, "complex arrays not supported");
    goto exit;
  }

  /* if the mapping is from array coordinates: */
  if (coordinates) {
    /* check coordinates rank: */
    if (output_rank != NI_GetArrayRank(coordinates) - 1) {
      PyErr_SetString(PyExc_RuntimeError, 
                      "coordinate array rank incorrect");
      goto exit;
    }
    /* check coordinates shape: */
    NI_GetArrayDimensions(coordinates, coordinates_dims);
    for(jj = 0; jj < output_rank; jj++)
      if (output_dimensions[jj] != coordinates_dims[jj + 1]) {
        PyErr_SetString(PyExc_RuntimeError, 
                        "coordinate array shape incorrect");
        goto exit;
      }
    if (coordinates_dims[0] != irank) {
      PyErr_SetString(PyExc_RuntimeError, 
                      "coordinate array shape incorrect");
      goto exit;
    }
    /* complex type not supported: */
    coordinates_type = NI_GetArrayType(coordinates);
    if (coordinates_type == tComplex32 || coordinates_type == tComplex64) {
      PyErr_SetString(PyExc_RuntimeError, 
                      "complex coordinate arrays not supported");
      goto exit;
    }
    /* initialze a line iterator along the first axis: */
    if (!NI_InitLineIterator(coordinates, &ic, 0))
      goto exit;
    pc = NI_GetArrayData(coordinates);   
    NI_GetArrayStrides(coordinates, coordinates_strides);
    cstride = coordinates_strides[0];
  }

  /* offsets used at the borders: */
  edge_offsets = (int**)malloc(irank * sizeof(int*));
  data_offsets = (int**)malloc(irank * sizeof(int*));
  if (!edge_offsets || !data_offsets) {
    PyErr_NoMemory();
    goto exit;
  }
  for(jj = 0; jj < irank; jj++)
    data_offsets[jj] = NULL;
  for(jj = 0; jj < irank; jj++) {
    data_offsets[jj] = (int*)malloc((order + 1) * sizeof(int));
    if (!data_offsets[jj]) {
      PyErr_NoMemory();
      goto exit;
    }
  }
  /* will hold the spline coefficients: */
  splvals = (double**)malloc(irank * sizeof(double*));
  if (!splvals) {
    PyErr_NoMemory();
    goto exit;
  }
  for(jj = 0; jj < irank; jj++)
    splvals[jj] = NULL;
  for(jj = 0; jj < irank; jj++) {
    splvals[jj] = (double*)malloc((order + 1) * sizeof(double));
    if (!splvals[jj]) {
      PyErr_NoMemory();
      goto exit;
    }
  }

  /* check mode: */
  if (mode < NI_EXTEND_FIRST || mode > NI_EXTEND_LAST) {
    PyErr_SetString(PyExc_RuntimeError, "mode not supported");
    goto exit;
  }
  
  filter_size = 1;
  for(jj = 0; jj < irank; jj++)
    filter_size *= order + 1;

  /* allocate output: */
  if (!NI_OutputArray(output_type, output_rank, output_dimensions, 
                      output_in, output))
    goto exit;

  /* initialize output iterator: */
  if (!NI_InitPointIterator(*output, &io))
    goto exit;

  /* get data pointers: */
  pi = NI_GetArrayData(input);
  po = NI_GetArrayData(*output);
    

  /* make a table of all possible coordinates within the spline filter: */
  fcoordinates = (int*)malloc(irank * filter_size * sizeof(int));
  /* make a table of all offsets within the spline filter: */
  foffsets = (int*)malloc(filter_size * sizeof(int));
  if (!fcoordinates || !foffsets) {
    PyErr_NoMemory();
    goto exit;
  }
  NI_GetArrayStrides(input, istrides);
  for(jj = 0; jj < irank; jj++)
    ftmp[jj] = 0;
  kk = 0;
  for(hh = 0; hh < filter_size; hh++) {    
    for(jj = 0; jj < irank; jj++)
      fcoordinates[jj + hh * irank] = ftmp[jj];
    foffsets[hh] = kk;
    for(jj = irank - 1; jj >= 0; jj--) {
      if (ftmp[jj] < order) {
        ftmp[jj]++;
        kk += istrides[jj];
        break;
      } else {
        ftmp[jj] = 0;
        kk -= istrides[jj] * order;
      }
    }
  }

  size = NI_Elements(*output);

  for(kk = 0; kk < size; kk++) {
    double t = 0.0;
    int constant = 0, edge = 0, offset = 0;
    if (map) {
      /* call mappint functions: */
      if (!map(io.coordinates, icoor, output_rank, irank, map_data)) {
        if (!PyErr_Occurred())
          PyErr_SetString(PyExc_RuntimeError,
                          "unknown error in mapping function");
        goto exit;
      }
    } else if (matrix) {
      /* do an affine transformation: */
      double *p = matrix;
      for(hh = 0; hh < irank; hh++) {
        icoor[hh] = 0.0;
        for(ll = 0; ll < output_rank; ll++)
          icoor[hh] += io.coordinates[ll] * *p++;
        icoor[hh] += shift[hh];
      }
    } else if (coordinates) {
      /* mapping is from an coordinates array: */
      char *p = pc;
      switch(coordinates_type) {
      case tBool:
        for(hh = 0; hh < irank; hh++) {
          icoor[hh] = *(Bool*)p;
          p += cstride;
        }
        break;
      case tUInt8:
        for(hh = 0; hh < irank; hh++) {
          icoor[hh] = *(UInt8*)p;
          p += cstride;
        }
        break;
      case tUInt16:
        for(hh = 0; hh < irank; hh++) {
          icoor[hh] = *(UInt16*)p;
          p += cstride;
        }
        break;
      case tUInt32:
        for(hh = 0; hh < irank; hh++) {
          icoor[hh] = *(UInt32*)p;
          p += cstride;
        }
        break;
#if HAS_UINT64
      case tUInt64:
        for(hh = 0; hh < irank; hh++) {
          icoor[hh] = *(UInt64*)p;
          p += cstride;
        }
        break;
#endif
      case tInt8:
        for(hh = 0; hh < irank; hh++) {
          icoor[hh] = *(Int8*)p;
          p += cstride;
        }
        break;
      case tInt16:
        for(hh = 0; hh < irank; hh++) {
          icoor[hh] = *(Int16*)p;
          p += cstride;
        }
        break;
      case tInt32:
        for(hh = 0; hh < irank; hh++) {
          icoor[hh] = *(Int32*)p;
          p += cstride;
        }
        break;
      case tInt64:
        for(hh = 0; hh < irank; hh++) {
          icoor[hh] = *(Int64*)p;
          p += cstride;
        }
        break;
      case tFloat32:
        for(hh = 0; hh < irank; hh++) {
          icoor[hh] = *(Float32*)p;
          p += cstride;
        }
        break;
      case tFloat64:
        for(hh = 0; hh < irank; hh++) {
          icoor[hh] = *(Float64*)p;
          p += cstride;
        }
        break;
      default:
        PyErr_SetString(PyExc_RuntimeError, 
                        "coordinate array data type not supported");
        goto exit;
      }
    }
    /* iterate over axes: */
    for(hh = 0; hh < irank; hh++) {
      /* if the input coordinate is outside the borders, map it: */
      double cc =  map_coordinate(icoor[hh], idims[hh], mode, -1);
      if (cc > -1.0) {
        /* find the filter location along this axis: */
        int start;
        if (order & 1) {
          start = (int)floor(cc) - order / 2;
        } else {
          start = (int)floor(cc + 0.5) - order / 2;
        }
        /* get the offset to the start of the filter: */
        offset += istrides[hh] * start;
        if (start < 0 || start + order >= idims[hh]) {
          /* implement border mapping, if outside border: */
          edge = 1;
          edge_offsets[hh] = data_offsets[hh];
          for(ll = 0; ll <= order; ll++) {
            int idx = start + ll;
            int len = idims[hh];
            if (len <= 1) {
              idx = 0;
            } else {
              int s2 = 2 * len - 2;
              if (idx < 0) {
                idx = s2 * (int)(-idx / s2) + idx;
                idx = idx <= 1 - len ? idx + s2 : -idx;
              } else if (idx >= len) {
                idx -= s2 * (int)(idx / s2);
                if (idx >= len)
                  idx = s2 - idx;
              }
            }
            /* calculate and store the offests at this edge: */
            edge_offsets[hh][ll] = istrides[hh] * (idx - start);
          }
        } else {
          /* we are not at the border, use precalculated offsets: */
          edge_offsets[hh] = NULL;
        }
        spline_coefficients(cc, order, splvals[hh]);
      } else {
        /* we use the constant border condition: */
        constant = 1;
        break;
      }
    }

    if (!constant) {                                                         
      int *ff = fcoordinates;               
      for(hh = 0; hh < filter_size; hh++) {                           
        double coeff = 0.0;
        int idx = 0;
        if (edge) {                                                         
          for(ll = 0; ll < irank; ll++) { 
            if (edge_offsets[ll])                 
              idx += edge_offsets[ll][ff[ll]];         
            else                                                         
              idx += ff[ll] * istrides[ll];
          }        
        } else {
          idx = foffsets[hh];
        }
        idx += offset;
        switch(itype) {
        case tBool:
          coeff = *(Bool*)(pi + idx);
          break;
        case tUInt8:
          coeff = *(UInt8*)(pi + idx);
          break;
        case tUInt16:
          coeff = *(UInt16*)(pi + idx);
          break;
        case tUInt32:
          coeff = *(UInt32*)(pi + idx);
          break;
#if HAS_UINT64
        case tUInt64:
          coeff = *(UInt64*)(pi + idx);
          break;
#endif
        case tInt8:
          coeff = *(Int8*)(pi + idx);
          break;
        case tInt16:
          coeff = *(Int16*)(pi + idx);
          break;
        case tInt32:
          coeff = *(Int32*)(pi + idx);
          break;
        case tInt64:
          coeff = *(Int64*)(pi + idx);
          break;
        case tFloat32:
          coeff = *(Float32*)(pi + idx);
          break;
        case tFloat64:
          coeff = *(Float64*)(pi + idx);
          break;
        default:
          PyErr_SetString(PyExc_RuntimeError, "data type not supported");
          goto exit;
        }
        /* calculate the interpolated value: */
        for(ll = 0; ll < irank; ll++)
          if (order > 0)                                                 
            coeff *= splvals[ll][ff[ll]];
        t += coeff;                                                         
        ff += irank;
      }                                                                 
    } else {
      t = cval;
    }
    /* store output value: */
    switch (output_type) {
    case tBool:
      *(Bool*)po = (Bool)t;
      break;
    case tUInt8:
      *(UInt8*)po = (UInt8)(t + 0.5);
      break;
    case tUInt16:
      *(UInt16*)po = (UInt16)(t + 0.5);
      break;
    case tUInt32:
      *(UInt32*)po = (UInt32)(t + 0.5);
      break;
#if HAS_UINT64
    case tUInt64:
      *(UInt64*)po = (UInt64)(t + 0.5);
      break;
#endif
    case tInt8:
      *(Int8*)po = (Int8)(t + 0.5);
      break;
    case tInt16:
      *(Int16*)po = (Int16)(t + 0.5);
      break;
    case tInt32:
      *(Int32*)po = (Int32)(t + 0.5);
      break;
    case tInt64:
      *(Int64*)po = (Int64)(t + 0.5);
      break;
    case tFloat32:
      *(Float32*)po = (Float32)t;
      break;
    case tFloat64:
      *(Float64*)po = (Float64)t;
      break;
    default:
      PyErr_SetString(PyExc_RuntimeError, "data type not supported");
      goto exit;
    }
    if (coordinates) {
      NI_ITERATOR_NEXT2(io, ic, po, pc);
    } else {
      NI_ITERATOR_NEXT(io, po);
    }
  }

 exit:
  if (edge_offsets) 
    free(edge_offsets);
  if (data_offsets) {
    for(jj = 0; jj < irank; jj++)
      free(data_offsets[jj]);
    free(data_offsets);
  }
  if (splvals) {
    for(jj = 0; jj < irank; jj++)
      free(splvals[jj]);
    free(splvals);
  }
  if (foffsets)
    free(foffsets);
  if (fcoordinates)
    free(fcoordinates);
  return PyErr_Occurred() ? 0 : 1;
}

int NI_GeometricTransform(PyArrayObject *input, 
                          int (*map)(int*, double*, int, int, void*),
                          void* map_data, int *output_dimensions,
                          int output_rank, NumarrayType output_type, 
                          PyArrayObject **output, PyObject *output_in,
                          int order, int mode, double cval)
{
  assert(map != NULL);

  return NI_GeometricTransformLow(input, map, map_data, NULL, NULL, NULL,
                                  output_dimensions, output_rank, output_type, 
                                  output, output_in, order, mode, cval);
}

int NI_MapCoordinates(PyArrayObject *input, PyArrayObject *coordinates,
                      int *output_dimensions, int output_rank, 
                      NumarrayType output_type, PyArrayObject **output, 
                      PyObject *output_in, int order, int mode, 
                      double cval)
{
  assert(coordinates != NULL);

  return NI_GeometricTransformLow(input, NULL, NULL, NULL, NULL,
                                  coordinates, output_dimensions,
                                  output_rank, output_type, output,
                                  output_in, order, mode, cval);
}

int NI_AffineTransform(PyArrayObject *input, double *matrix, double* shift,
                       int *output_dimensions, int output_rank, 
                       NumarrayType output_type, PyArrayObject **output, 
                       PyObject *output_in, int order, int mode, 
                       double cval)
{
  assert(matrix != NULL && shift != NULL);

  return NI_GeometricTransformLow(input, NULL, NULL, matrix, shift, NULL,
                                  output_dimensions, output_rank,
                                  output_type, output, output_in, order,
                                  mode, cval);
}

int NI_ZoomShift(PyArrayObject *input, double* zooms, double* shifts,
                   int *output_dimensions, int output_rank, 
                   NumarrayType output_type, PyArrayObject **output, 
                   PyObject *output_in, int order, int mode, double cval)
{
  char *pi, *po;
  int **zeros = NULL, **offsets = NULL, ***edge_offsets = NULL, jj, hh, kk;
  int ftmp[NI_MAXDIM], *fcoordinates = NULL, *foffsets = NULL, filter_size;
  int size, irank, itype, idims[NI_MAXDIM], istrides[NI_MAXDIM];
  double ***splvals = NULL;
  NI_Iterator io;

  assert(input != NULL);
  assert(zooms != NULL || shifts != NULL);
  assert(output_dimensions != NULL);
  assert(output != NULL);

  irank = NI_GetArrayRank(input);
  NI_GetArrayDimensions(input, idims);

  /* check ranks: */
  if (irank < 1 || output_rank < 1) {
    PyErr_SetString(PyExc_RuntimeError, 
                    "input and output ranks must be > 0");
    goto exit;
  }

  /* check spline order: */
  if (order < 0 || order > 5) {
    PyErr_SetString(PyExc_RuntimeError, "spline order not supported");
    goto exit;
  }

  /* complex types not supported: */
  itype = NI_GetArrayType(input);
  if (itype == tComplex32 || itype == tComplex64 ||
      output_type == tComplex32 || output_type == tComplex64) {
    PyErr_SetString(PyExc_RuntimeError, "complex arrays not supported");
    goto exit;
  }

  /* check boundary mode: */
  if (mode < NI_EXTEND_FIRST || mode > NI_EXTEND_LAST) {
    PyErr_SetString(PyExc_RuntimeError, "mode not supported");
    goto exit;
  }
  
  /* if the mode is 'constant' we need some temps later: */
  if (mode == NI_EXTEND_CONSTANT) {
    zeros = (int**)malloc(irank * sizeof(int*));
    if (!zeros) {
      PyErr_NoMemory();
      goto exit;
    }
    for(jj = 0; jj < irank; jj++)
      zeros[jj] = NULL;
    for(jj = 0; jj < irank; jj++) {
      zeros[jj] = (int*)malloc(output_dimensions[jj] * sizeof(int));
      if(!zeros[jj]) {
        PyErr_NoMemory();
        goto exit;
      }    
    }
  }
  
  /* store offsets, along each axis: */
  offsets = (int**)malloc(irank * sizeof(int*));
  /* store spline coefficients, along each axis: */
  splvals = (double***)malloc(irank * sizeof(double**));
  /* store offsets at all edges: */
  edge_offsets = (int***)malloc(irank * sizeof(int**));
  if (!offsets || !splvals || !edge_offsets) {
    PyErr_NoMemory();
    goto exit;
  }
  for(jj = 0; jj < irank; jj++) {
    offsets[jj] = NULL;
    splvals[jj] = NULL;
    edge_offsets[jj] = NULL;
  }
  for(jj = 0; jj < irank; jj++) {
    offsets[jj] = (int*)malloc(output_dimensions[jj] * sizeof(int));
    splvals[jj] = (double**)malloc(output_dimensions[jj] *
                                   sizeof(double*));
    edge_offsets[jj] = (int**)malloc(output_dimensions[jj] * 
                                     sizeof(double*));
    if (!offsets[jj] || !splvals[jj] || !edge_offsets[jj]) {
      PyErr_NoMemory();
      goto exit;
    }  
    for(hh = 0; hh < output_dimensions[jj]; hh++) {
      splvals[jj][hh] = NULL;
      edge_offsets[jj][hh] = NULL;
    }
  }

  NI_GetArrayStrides(input, istrides);

  /* precalculate offsets, and offsets at the edge: */
  for(jj = 0; jj < irank; jj++) {
    double shift = 0.0, zoom = 0.0;
    if (shifts)
      shift = shifts[jj];
    if (zooms)
      zoom = zooms[jj];
    for(kk = 0; kk < output_dimensions[jj]; kk++) {
      double cc = (double)kk;
      if (shifts)
        cc += shift;
      if (zooms)
        cc *= zoom;
      cc = map_coordinate(cc, idims[jj], mode, -1);
      if (cc > -1.0) {
        int start;
        if (zeros && zeros[jj])
          zeros[jj][kk] = 0;
        if (order & 1) {
          start = (int)floor(cc) - order / 2;
        } else {
          start = (int)floor(cc + 0.5) - order / 2;
        }
        offsets[jj][kk] = istrides[jj] * start;
        if (start < 0 || start + order >= idims[jj]) {
          edge_offsets[jj][kk] = (int*)malloc((order + 1) * sizeof(int));
          if (!edge_offsets[jj][kk]) {
            PyErr_NoMemory();
            goto exit;
          }
          for(hh = 0; hh <= order; hh++) {
            int idx = start + hh;
             int len = idims[jj];
            if (len <= 1) {
              idx = 0;
            } else {
              int s2 = 2 * len - 2;
              if (idx < 0) {
                idx = s2 * (int)(-idx / s2) + idx;
                idx = idx <= 1 - len ? idx + s2 : -idx;
              } else if (idx >= len) {
                idx -= s2 * (int)(idx / s2);
                if (idx >= len)
                  idx = s2 - idx;
              }
            }
            edge_offsets[jj][kk][hh] = istrides[jj] * (idx - start);
          }
        }
        if (order > 0) {
          splvals[jj][kk] = (double*)malloc((order + 1) * sizeof(double));
          if (!splvals[jj][kk]) {
            PyErr_NoMemory();
            goto exit;
          }        
          spline_coefficients(cc, order, splvals[jj][kk]);
        }        
      } else {
        zeros[jj][kk] = 1;
      }
    }
  }

  filter_size = 1;
  for(jj = 0; jj < irank; jj++)
    filter_size *= order + 1;

  /* allocate output: */
  if (!NI_OutputArray(output_type, output_rank, output_dimensions, 
                      output_in, output))
    goto exit;

  if (!NI_InitPointIterator(*output, &io))
    goto exit;

  pi = NI_GetArrayData(input);
  po = NI_GetArrayData(*output);
    
  /* store all coordinates and offsets with filter: */
  fcoordinates = (int*)malloc(irank * filter_size * sizeof(int));
  foffsets = (int*)malloc(filter_size * sizeof(int));
  if (!fcoordinates || !foffsets) {
    PyErr_NoMemory();
    goto exit;
  }

  for(jj = 0; jj < irank; jj++)
    ftmp[jj] = 0;
  kk = 0;
  for(hh = 0; hh < filter_size; hh++) {    
    for(jj = 0; jj < irank; jj++)
      fcoordinates[jj + hh * irank] = ftmp[jj];
    foffsets[hh] = kk;
    for(jj = irank - 1; jj >= 0; jj--) {
      if (ftmp[jj] < order) {
        ftmp[jj]++;
        kk += istrides[jj];
        break;
      } else {
        ftmp[jj] = 0;
        kk -= istrides[jj] * order;
      }
    }
  }

  size = NI_Elements(*output);

  for(kk = 0; kk < size; kk++) {                                         
    double t = 0.0;                                                         
    int edge = 0, oo = 0, zero = 0;
                                                                         
    for(hh = 0; hh < irank; hh++) {                                         
      if (zeros && zeros[hh][io.coordinates[hh]]) { 
        /* we use constant border condition */
        zero = 1;                                                         
        break;                                                         
      }                                                                 
      oo += offsets[hh][io.coordinates[hh]];                                 
      if (edge_offsets[hh][io.coordinates[hh]])                         
        edge = 1;                                                         
    }                                                                         
      
    if (!zero) {                                                         
      int *ff = fcoordinates;
      for(hh = 0; hh < filter_size; hh++) {
        double coeff = 0.0;
        int idx = 0;        
        if (edge) {
            /* use precalculated edge offsets: */
          for(jj = 0; jj < irank; jj++) {                                 
            if (edge_offsets[jj][io.coordinates[jj]])                 
              idx += edge_offsets[jj][io.coordinates[jj]][ff[jj]];         
            else                                                         
              idx += ff[jj] * istrides[jj];
          }
          idx += oo;
        } else {
          /* use normal offsets: */
          idx += oo + foffsets[hh];
        }
        switch(itype) {
        case tBool:
          coeff = *(Bool*)(pi + idx);
          break;
        case tUInt8:
          coeff = *(UInt8*)(pi + idx);
          break;
        case tUInt16:
          coeff = *(UInt16*)(pi + idx);
          break;
        case tUInt32:
          coeff = *(UInt32*)(pi + idx);
          break;
#if HAS_UINT64
        case tUInt64:
          coeff = *(UInt64*)(pi + idx);
          break;
#endif
        case tInt8:
          coeff = *(Int8*)(pi + idx);
          break;
        case tInt16:
          coeff = *(Int16*)(pi + idx);
          break;
        case tInt32:
          coeff = *(Int32*)(pi + idx);
          break;
        case tInt64:
          coeff = *(Int64*)(pi + idx);
          break;
        case tFloat32:
          coeff = *(Float32*)(pi + idx);
          break;
        case tFloat64:
          coeff = *(Float64*)(pi + idx);
          break;
        default:
          PyErr_SetString(PyExc_RuntimeError, "data type not supported");
          goto exit;
        }
        /* calculate interpolated value: */
        for(jj = 0; jj < irank; jj++)                 
          if (order > 0)                                                 
            coeff *= splvals[jj][io.coordinates[jj]][ff[jj]];         
        t += coeff;                                                         
        ff += irank;
      }                                                 
    }        else {
      t = cval;
    }                
    /* store output: */
    switch (output_type) {
    case tBool:
      *(Bool*)po = (Bool)t;
      break;
    case tUInt8:
      *(UInt8*)po = (UInt8)(t + 0.5);
      break;
    case tUInt16:
      *(UInt16*)po = (UInt16)(t + 0.5);
      break;
    case tUInt32:
      *(UInt32*)po = (UInt32)(t + 0.5);
      break;
#if HAS_UINT64
    case tUInt64:
      *(UInt64*)po = (UInt64)(t + 0.5);
      break;
#endif
    case tInt8:
      *(Int8*)po = (Int8)(t + 0.5);
      break;
    case tInt16:
      *(Int16*)po = (Int16)(t + 0.5);
      break;
    case tInt32:
      *(Int32*)po = (Int32)(t + 0.5);
      break;
    case tInt64:
      *(Int64*)po = (Int64)(t + 0.5);
      break;
    case tFloat32:
      *(Float32*)po = (Float32)t;
      break;
    case tFloat64:
      *(Float64*)po = (Float64)t;
      break;
    default:
      PyErr_SetString(PyExc_RuntimeError, "data type not supported");
      goto exit;
    }
    NI_ITERATOR_NEXT(io, po);
  }                                                                         

 exit:
  if (zeros) {
    for(jj = 0; jj < irank; jj++)
      if (zeros[jj])
        free(zeros[jj]);
    free(zeros);
  }
  if (offsets) {
    for(jj = 0; jj < irank; jj++)
      if (offsets[jj])
        free(offsets[jj]);
    free(offsets);
  }
  if (splvals) {
    for(jj = 0; jj < irank; jj++) {
      if (splvals[jj]) {
        for(hh = 0; hh < output_dimensions[jj]; hh++)
          if (splvals[jj][hh])
            free(splvals[jj][hh]);
        free(splvals[jj]);
      }
    }
    free(splvals);
  }
  if (edge_offsets) {
    for(jj = 0; jj < irank; jj++) {
      if (edge_offsets[jj]) {
        for(hh = 0; hh < output_dimensions[jj]; hh++)
          if (edge_offsets[jj][hh])
            free(edge_offsets[jj][hh]);
        free(edge_offsets[jj]);
      }
    }
    free(edge_offsets);
  }
  if (foffsets)
    free(foffsets);
  if (fcoordinates)
    free(fcoordinates);
  return PyErr_Occurred() ? 0 : 1;
}
