/*************************************************************************/
/*                                                                       */
/* Licensed Materials - Property of IBM                                  */
/*                                                                       */
/* (C) Copyright IBM Corp. 2009, 2010                                    */
/* All Rights Reserved                                                   */
/*                                                                       */
/* US Government Users Restricted Rights - Use, duplication or           */
/* disclosure restricted by GSA ADP Schedule Contract with IBM Corp.     */
/*                                                                       */
/* Inspired by Caltech's Java Applet Fluid Solver at                     */
/* www.multires.caltech.edu/teaching/demos/java/FluidSolver.java         */
/*                                                                       */
/* References:  Visual Simulation of Smoke                               */
/*              R. Fedkiw, J. Stam, H. W. Jensen                         */
/*              SIGGRAPH 2001 Annual Proceedings                         */
/*                                                                       */
/*************************************************************************/


#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <sys/stat.h>
#include <CL/opencl.h>
#include "clu.h"

#include "solver.h"
#include "clock.h"


/* 
 * ===  FUNCTION  ======================================================================
 *         Name:  clear_host
 *  Description:  set the contents of the different buffers to zero
 * =====================================================================================
 */
void
clear_host (struct fluid_solver_host *s)
{
  size_t sz = (size_t)s->size * sizeof (float);

  memset (s->uh, 0, sz);
  memset (s->vh, 0, sz);
  memset (s->dh, 0, sz);
  memset (s->uOldh, 0, sz);
  memset (s->vOldh, 0, sz);
  memset (s->dOldh, 0, sz);
  memset (s->curlh, 0, sz);
  memset (s->temph, 0, sz);
}

/* 
 * ===  FUNCTION  ======================================================================
 *         Name:  setup
 *  Description:  
 *        _ malloc memory for the different buffers that are used for host computation
 *        _ intializes the fluid_solver_host structure
 * =====================================================================================
 */
void
setup_host (struct fluid_solver_host *s, int n, float dt)
{
  s->n = n;
  s->dt = dt;
  s->size = (n + (2 * _PAD)) * (n + (2 * _PAD));
  s->decay = 0.0f;
  s->diff = 0.0f;
  s->visc = 0.0f;

  size_t sz = (size_t)s->size * sizeof (float);
  size_t si = (size_t)s->n * (size_t)s->n * sizeof (unsigned int);

  /* Allocates memory for the buffers needed for the  */
  s->dh = malloc (sz);
  s->uh = malloc (sz);
  s->vh = malloc (sz);
  s->dOldh = malloc (sz);
  s->uOldh = malloc (sz);
  s->vOldh = malloc (sz);
  s->temph = malloc (sz);
  s->curlh = malloc (sz);
  s->imgDatah = calloc (1, si);

  if ((!s->dh) || (!s->uh) || (!s->vh) || (!s->dOldh) || (!s->uOldh)
      || (!s->vOldh) || (!s->temph) || (!s->imgDatah))
  {
    fprintf (stderr,
	     "Fluid workload - cannot allocate memory for host buffers in file %s, at line %d\n",
	     __FILE__, __LINE__);
    exit (EXIT_FAILURE);
  }
}


/* 
 * ===  FUNCTION  ======================================================================
 *         Name:  cleanup_buffers_host
 *  Description:  free all allocated buffers 
 * =====================================================================================
 */
void cleanup_buffers_host (struct fluid_solver_host *s)
{
  free (s->dh);
  free (s->uh);
  free (s->vh);
  free (s->dOldh);
  free (s->uOldh);
  free (s->vOldh);
  free (s->temph);
  free (s->curlh);
  free (s->imgDatah);
}

void
insert_force_host (struct fluid_solver_host *s)
{
  int i;
  int n = s->n;
  int i0 = img_size / 2;
  int j0 = img_size / 2;
  float *dOld_ptr;
  float *uOld_ptr;
  float *vOld_ptr;

  dOld_ptr = (float *) &(s->dOldh[Ih (i0, j0)]);
  uOld_ptr = (float *) &(s->uOldh[Ih (i0, j0)]);
  vOld_ptr = (float *) &(s->vOldh[Ih (i0, j0)]);

  for (i = 0; i < INIT_SIZE; i++)
  {
    *dOld_ptr++ = 2000.0f;
    *uOld_ptr++ = 1000.0f;
    *vOld_ptr++ = 750.0f;
  }
}

static void
add_source_host (float *x, float *x0, float dt, int size)
{
  int i;
  for (i = 0; i < size; i++)
  {
    x[i] += dt * x0[i];
  }
}

static void
set_boundary_host (int b, float *x, int n)
{
  int i;

  for (i = _PAD; i < n + _PAD; i++)
  {
    x[Ih (_PAD - 1, i)] = b == 1 ? -x[Ih (_PAD, i)] : x[Ih (_PAD, i)];
    x[Ih ((n - 1) + _PAD + 1, i)] =
      b == 1 ? -x[Ih ((n - 1) + _PAD, i)] : x[Ih ((n - 1) + _PAD, i)];
    x[Ih (i, _PAD - 1)] = b == 2 ? -x[Ih (i, _PAD)] : x[Ih (i, _PAD)];
    x[Ih (i, (n - 1) + _PAD + 1)] =
      b == 2 ? -x[Ih (i, (n - 1) + _PAD)] : x[Ih (i, (n - 1) + _PAD)];
  }
}


static void
linear_solver_host (int b, float *x, float *xlast, float *x0, float a, float c, int n)
{
  int i, j, k;

  for (k = 0; k < 5; k++)
  {
    for (j = _PAD; j < n + _PAD; j++)
    {
      for (i = _PAD; i < n + _PAD; i++)
      {
	xlast[Ih (i, j)] = (a * (x[Ih (i - 1, j)] + x[Ih (i + 1, j)]
				 + x[Ih (i, j - 1)] + x[Ih (i, j + 1)])
			    + x0[Ih (i, j)]) / c;
      }
    }
    set_boundary_host (b, xlast, n);

    for (j = _PAD; j < n + _PAD; j++)
    {
      for (i = _PAD; i < n + _PAD; i++)
      {
	x[Ih (i, j)] = (a * (xlast[Ih (i - 1, j)] + xlast[Ih (i + 1, j)]
			     + xlast[Ih (i, j - 1)] +
			     xlast[Ih (i, j + 1)]) + x0[Ih (i, j)]) / c;
      }
    }
    set_boundary_host (b, x, n);
  }
}


static void
diffuse_host (int b, float *c, float *c0, float *temp, float diff, float dt, int n)
{
  float a = dt * diff * n * n;

  linear_solver_host (b, c, temp, c0, a, 1 + 4 * a, n);
}


static void
project_host (float *x, float *y, float *p, float *div, float *temp, int n)
{
  int i, j;

  for (i = _PAD; i < n + _PAD; i++)
  {
    for (j = _PAD; j < n + _PAD; j++)
    {
      div[Ih (i, j)] = (x[Ih (i + 1, j)] - x[Ih (i - 1, j)]
			+ y[Ih (i, j + 1)] - y[Ih (i, j - 1)]) * -0.5f / n;
      p[Ih (i, j)] = 0;
    }
  }

  set_boundary_host (0, div, n);
  set_boundary_host (0, p, n);

  linear_solver_host (0, p, temp, div, 1, 4, n);

  for (j = _PAD; j < n + _PAD; j++)
  {
    for (i = _PAD; i < n + _PAD; i++)
    {
      x[Ih (i, j)] -= 0.5f * n * (p[Ih (i + 1, j)] - p[Ih (i - 1, j)]);
      y[Ih (i, j)] -= 0.5f * n * (p[Ih (i, j + 1)] - p[Ih (i, j - 1)]);
    }
  }

  set_boundary_host (1, x, n);
  set_boundary_host (2, y, n);
}


static void
advect_host (int b, float *d, float *d0, float *du, float *dv, float dt, int n)
{
  int i0, j0, i1, j1;
  float x, y, s0, t0, s1, t1, dt0;
  int i, j;

  dt0 = dt * n;

  for (j = _PAD; j < n + _PAD; j++)
  {
    for (i = _PAD; i < n + _PAD; i++)
    {
      x = i - dt0 * du[Ih (i, j)];
      y = j - dt0 * dv[Ih (i, j)];

      x = (x > n + 0.5f) ? n + 0.5f : x;
      x = (x < 0.5f) ? 0.5f : x;

      i0 = (int) x;
      i1 = i0 + 1;

      y = (y > n + 0.5f) ? n + 0.5f : y;
      y = (y < 0.5f) ? 0.5f : y;

      j0 = (int) y;
      j1 = j0 + 1;

      s1 = x - i0;
      s0 = 1 - s1;
      t1 = y - j0;
      t0 = 1 - t1;

      d[Ih (i, j)] = s0 * (t0 * d0[Ih (i0, j0)] + t1 * d0[Ih (i0, j1)])
	+ s1 * (t0 * d0[Ih (i1, j0)] + t1 * d0[Ih (i1, j1)]);
    }
  }
  set_boundary_host (b, d, n);
}


static void
HSVtoRGB (float *r, float *g, float *b, float h, float s, float v)
{
  int i;
  float f, p, q, t;

  if (s == 0)
  {
    /*  achromatic (grey) */
    *r = *g = *b = v;
    return;
  }

  h *= 6.0f;			/*  sector 0 to 5 */
  i = floor (h);
  f = h - i;			/*  factorial part of h */
  p = v * (1 - s);
  q = v * (1 - s * f);
  t = v * (1 - s * (1 - f));

  switch (i)
  {
  case 0:
    *r = v;
    *g = t;
    *b = p;
    break;
  case 1:
    *r = q;
    *g = v;
    *b = p;
    break;
  case 2:
    *r = p;
    *g = v;
    *b = t;
    break;
  case 3:
    *r = p;
    *g = q;
    *b = v;
    break;
  case 4:
    *r = t;
    *g = p;
    *b = v;
    break;
  default:			/*  case 5: */
    *r = v;
    *g = p;
    *b = q;
    break;
  }
}


static inline float
clamp01 (float v)
{
  if (v < 0.0)
    v = 0.0;
  else if (v > 1.0)
    v = 1.0;
  return (v);
}


void
pack_img_host (struct fluid_solver_host *s)
{
  int i, j;
  float vel, hue;
  float df, uf, vf;
  float r, g, b;
  float *d = s->dh;
  float *u = s->uh;
  float *v = s->vh;
  unsigned int *img = s->imgDatah;
  int n = s->n;
  union {
    unsigned char c[4];
    unsigned int rgba;
  } pixel;


  for (j = _PAD; j < n + _PAD; j++)
  {
    for (i = _PAD; i < n + _PAD; i++)
    {
      df = d[Ih (i, j)];
      uf = u[Ih (i, j)];
      vf = v[Ih (i, j)];

      df = clamp01 (df);
      uf *= 10.0f;
      vf *= 10.0f;
      uf = fabs (uf);
      vf = fabs (vf);
      vel = clamp01 (uf + vf);
      
      vel = uf + vf;
      if (vel > 0.8375f) vel = 0.8375f;
      
      /* Compute HSV and convert to RGB. The hue is encoded from the 
       * sum of the horizontal and vertical velocities where low velocities
       * are colored blue and high velocity are magenta.
       */
      hue = ((vel > 0.667f) ? 1.667f : 0.667f) - vel;
      HSVtoRGB (&r, &g, &b, hue, 0.9f, df);

      pixel.c[0] = (unsigned char)(255.0f * r);
      pixel.c[1] = (unsigned char)(255.0f * g);
      pixel.c[2] = (unsigned char)(255.0f * b);
      pixel.c[3] = (unsigned char)(0xFF);
      
      img[Imh (i, j)] = pixel.rgba;
			
    }
  }
}


void
update_host (struct fluid_solver_host *s)
{
  /* update fluid simulation */

  size_t sz = (size_t)s->size * sizeof (float);

  /*  VelocitySolver */

  /*  add velocity that was input by mouse */

  add_source_host (s->uh, s->uOldh, s->dt, s->size);
  add_source_host (s->vh, s->vOldh, s->dt, s->size);

  /*  swapping arrays for economical mem use */
  /*  and calculating diffusion in velocity. */
  /*  Swap u array */

  diffuse_host (0, s->uOldh, s->uh, s->temph, s->visc, s->dt, s->n);

  /*  Swap v array */

  diffuse_host (0, s->vOldh, s->vh, s->temph, s->visc, s->dt, s->n);

  /*  we create an incompressible field */
  /*  for more effective advection. */

  project_host (s->uOldh, s->vOldh, s->uh, s->vh, s->temph, s->n);


  /*  self advect velocities */

  advect_host (1, s->uh, s->uOldh, s->uOldh, s->vOldh, s->dt, s->n);
  advect_host (2, s->vh, s->vOldh, s->uOldh, s->vOldh, s->dt, s->n);

  /*  make an incompressible field */

  project_host (s->uh, s->vh, s->uOldh, s->vOldh, s->temph, s->n);

  /*  clear all input velocities for next frame */

  memset (s->uOldh, 0, sz);
  memset (s->vOldh, 0, sz);


  /*  DensitySolver */

  /*  add density inputted by mouse */

  add_source_host (s->dh, s->dOldh, s->dt, s->size);

  /*  Swap d array */
  diffuse_host (0, s->dOldh, s->dh, s->temph, s->diff, s->dt, s->n);

  advect_host (0, s->dh, s->dOldh, s->uh, s->vh, s->dt, s->n);

  /*  clear input density array for next frame */

  memset (s->dOldh, 0, sz);
}


void
run_sim_host (struct fluid_solver_host *s, int num_frames)
{
  int i;
  float delta_host;

  clear_host (s);
  insert_force_host (s);

  startclock ();

  for (i = 0; i < num_frames; i++) {
    /*  Update simulation at this timestep. */
    update_host (s);

    /*  Paint based on density / velocity. */
    pack_img_host (s);
  }

  delta_host = stopclock ();
  printf ("Host rendered %d frames in  %f seconds. Rate = %f frames/sec\n",
	  num_frames, delta_host, (double)num_frames/(double)(delta_host));


}
