#ifndef ACOUSTIC_SOLVER_WITH_MESH_HPP
#define ACOUSTIC_SOLVER_WITH_MESH_HPP

#include <Kokkos_Core.hpp>

#include <rang.hpp>
#include <BlockPerfectGas.hpp>

#include <TinyVector.hpp>
#include <TinyMatrix.hpp>
#include <Mesh.hpp>
#include <MeshData.hpp>
#include <FiniteVolumesEulerUnknowns.hpp>

template<typename MeshData>
class AcousticSolverWithMesh 
{
  typedef typename MeshData::MeshType MeshType;
  typedef FiniteVolumesEulerUnknowns<MeshData> UnknownsType;

  MeshData& m_mesh_data;
  const MeshType& m_mesh;
  const typename MeshType::Connectivity& m_connectivity;

  constexpr static size_t dimension = MeshType::dimension;

  typedef TinyVector<dimension> Rd;
  typedef TinyMatrix<dimension> Rdd;

private:
  struct ReduceMin
  {
  private:
    const Kokkos::View<const double*> x_;

  public:
    typedef Kokkos::View<const double*>::non_const_value_type value_type;

    ReduceMin(const Kokkos::View<const double*>& x) : x_ (x) {}

    typedef Kokkos::View<const double*>::size_type size_type;
    
    KOKKOS_INLINE_FUNCTION void
    operator() (const size_type i, value_type& update) const
    {
      if (x_(i) < update) {
	update = x_(i);
      }
    }

    KOKKOS_INLINE_FUNCTION void
    join (volatile value_type& dst,
	  const volatile value_type& src) const
    {
      if (src < dst) {
	dst = src;
      }
    }

    KOKKOS_INLINE_FUNCTION void
    init (value_type& dst) const
    { // The identity under max is -Inf.
      dst= Kokkos::reduction_identity<value_type>::min();
    }
  };

  
  KOKKOS_INLINE_FUNCTION
  const Kokkos::View<const double*>
  computeRhoCj(const Kokkos::View<const double*>& rhoj,
	       const Kokkos::View<const double*>& cj)
  {
    Kokkos::parallel_for(m_mesh.numberOfCells(), KOKKOS_LAMBDA(const int& j) {
	m_rhocj[j] = rhoj[j]*cj[j];
      });
    return m_rhocj;
  }

  KOKKOS_INLINE_FUNCTION
  const Kokkos::View<const Rdd**>
  computeAjr(const Kokkos::View<const double*>& rhocj,
	     const Kokkos::View<const Rd**>& Cjr) {
    const Kokkos::View<const unsigned short*> cell_nb_nodes
      = m_connectivity.cellNbNodes();

    Kokkos::parallel_for(m_mesh.numberOfCells(), KOKKOS_LAMBDA(const int& j) {
	for (int r=0; r<cell_nb_nodes[j]; ++r) {
	  m_Ajr(j,r) = tensorProduct(rhocj(j)*Cjr(j,r), Cjr(j,r));
	}
      });

    return m_Ajr;
  }

  KOKKOS_INLINE_FUNCTION
  const Kokkos::View<const Rdd*>
  computeAr(const Kokkos::View<const Rdd**>& Ajr) {
    const Kokkos::View<const unsigned int**> node_cells = m_connectivity.nodeCells();
    const Kokkos::View<const unsigned short**> node_cell_local_node = m_connectivity.nodeCellLocalNode();
    const Kokkos::View<const unsigned short*> node_nb_cells = m_connectivity.nodeNbCells();

    Kokkos::parallel_for(m_mesh.numberOfNodes(), KOKKOS_LAMBDA(const int& r) {
	Rdd sum = zero;
	for (int j=0; j<node_nb_cells(r); ++j) {
	  const int J = node_cells(r,j);
	  const int R = node_cell_local_node(r,j);
	  sum += Ajr(J,R);
	}
	m_Ar(r) = sum;
      });

    return m_Ar;
  }

  KOKKOS_INLINE_FUNCTION
  const Kokkos::View<const Rd*>
  computeBr(const Kokkos::View<const Rdd**>& Ajr,
	    const Kokkos::View<const Rd**>& Cjr,
	    const Kokkos::View<const Rd*>& uj,
	    const Kokkos::View<const double*>& pj) {
    const Kokkos::View<const unsigned int**>& node_cells = m_connectivity.nodeCells();
    const Kokkos::View<const unsigned short**>& node_cell_local_node = m_connectivity.nodeCellLocalNode();
    const Kokkos::View<const unsigned short*>& node_nb_cells = m_connectivity.nodeNbCells();

    Kokkos::parallel_for(m_mesh.numberOfNodes(), KOKKOS_LAMBDA(const int& r) {
	Rd& br = m_br(r);
	br = zero;
	for (int j=0; j<node_nb_cells(r); ++j) {
	  const int J = node_cells(r,j);
	  const int R = node_cell_local_node(r,j);
	  br += Ajr(J,R)*uj(J) + pj(J)*Cjr(J,R);
	}
      });

    return m_br;
  }

  Kokkos::View<Rd*>
  computeUr(const Kokkos::View<const Rdd*>& Ar,
	    const Kokkos::View<const Rd*>& br) {
    inverse(Ar, m_inv_Ar);
    const Kokkos::View<const Rdd*> invAr = m_inv_Ar;
    Kokkos::parallel_for(m_mesh.numberOfNodes(), KOKKOS_LAMBDA(const int& r) {
	m_ur[r]=invAr(r)*br(r);
      });
    m_ur[0]=zero;
    m_ur[m_mesh.numberOfNodes()-1]=zero;

    return m_ur;
  }

  Kokkos::View<Rd**>
  computeFjr(const Kokkos::View<const Rdd**>& Ajr,
	     const Kokkos::View<const Rd*>& ur,
	     const Kokkos::View<const Rd**>& Cjr,
	     const Kokkos::View<const Rd*>& uj,
	     const Kokkos::View<const double*>& pj) {
    const Kokkos::View<const unsigned int**>& cell_nodes = m_connectivity.cellNodes();
    const Kokkos::View<const unsigned short*> cell_nb_nodes
      = m_connectivity.cellNbNodes();

    Kokkos::parallel_for(m_mesh.numberOfCells(), KOKKOS_LAMBDA(const int& j) {
	for (int r=0; r<cell_nb_nodes[j]; ++r) {
	  m_Fjr(j,r) = Ajr(j,r)*(uj(j)-ur(cell_nodes(j,r)))+pj(j)*Cjr(j,r);
	}
      });

    return m_Fjr;
  }

  void inverse(const Kokkos::View<const Rdd*>& A,
	       Kokkos::View<Rdd*>& inv_A) const {
    Kokkos::parallel_for(A.size(), KOKKOS_LAMBDA(const int& r) {
	inv_A(r) = Rdd{1./(A(r)(0,0))};
      });
  }

  void inverse(const Kokkos::View<const double*>& x,
	       Kokkos::View<double*>& inv_x) const {
    Kokkos::parallel_for(x.size(), KOKKOS_LAMBDA(const int& r) {
	inv_x(r) = 1./x(r);
      });
  }

  KOKKOS_INLINE_FUNCTION
  double acoustic_dt(const Kokkos::View<const double*>& Vj,
		     const Kokkos::View<const double*>& cj) const {
    Kokkos::parallel_for(m_mesh.numberOfCells(), KOKKOS_LAMBDA(const int& j){
	m_Vj_over_cj[j] = Vj[j]/cj[j];
      });

    double dt = std::numeric_limits<double>::max();
    Kokkos::parallel_reduce(m_mesh.numberOfCells(), ReduceMin(m_Vj_over_cj), dt);

    return dt;
  }

  KOKKOS_INLINE_FUNCTION
  void computeExplicitFluxes(const Kokkos::View<const Rd*>& xr,
			     const Kokkos::View<const Rd*>& xj,
			     const Kokkos::View<const double*>& rhoj,
			     const Kokkos::View<const Rd*>& uj,
			     const Kokkos::View<const double*>& pj,
			     const Kokkos::View<const double*>& cj,
			     const Kokkos::View<const double*>& Vj,
			     const Kokkos::View<const Rd**>& Cjr) {
    const Kokkos::View<const double*> rhocj  = computeRhoCj(rhoj, cj);
    const Kokkos::View<const Rdd**> Ajr = computeAjr(rhocj, Cjr);

    const Kokkos::View<const Rdd*> Ar = computeAr(Ajr);
    const Kokkos::View<const Rd*> br = computeBr(Ajr, Cjr, uj, pj);

    Kokkos::View<Rd*> ur = m_ur;
    Kokkos::View<Rd**> Fjr = m_Fjr;
    ur  = computeUr(Ar, br);
    Fjr = computeFjr(Ajr, ur, Cjr, uj, pj);
  }

  Kokkos::View<Rd*> m_br;
  Kokkos::View<Rdd**> m_Ajr;
  Kokkos::View<Rdd*> m_Ar;
  Kokkos::View<Rdd*> m_inv_Ar;
  Kokkos::View<Rd**> m_Fjr;
  Kokkos::View<Rd*> m_ur;
  Kokkos::View<double*> m_rhocj;
  Kokkos::View<double*> m_Vj_over_cj;


public:
  AcousticSolverWithMesh(MeshData& mesh_data,
			 UnknownsType& unknowns)
    : m_mesh_data(mesh_data),
      m_mesh(mesh_data.mesh()),
      m_connectivity(m_mesh.connectivity()),
      m_br("br", m_mesh.numberOfNodes()),
      m_Ajr("Ajr", m_mesh.numberOfCells(), m_connectivity.maxNbNodePerCell()),
      m_Ar("Ar", m_mesh.numberOfNodes()),
      m_inv_Ar("inv_Ar", m_mesh.numberOfNodes()),
      m_Fjr("Fjr", m_mesh.numberOfCells(), m_connectivity.maxNbNodePerCell()),
      m_ur("ur", m_mesh.numberOfNodes()),
      m_rhocj("rho_c", m_mesh.numberOfCells()),
      m_Vj_over_cj("Vj_over_cj", m_mesh.numberOfCells())
  {
    Kokkos::View<double*> rhoj = unknowns.rhoj();
    Kokkos::View<Rd*> uj = unknowns.uj();
    Kokkos::View<double*> Ej = unknowns.Ej();

    Kokkos::View<double*> ej = unknowns.ej();
    Kokkos::View<double*> pj = unknowns.pj();
    Kokkos::View<double*> gammaj = unknowns.gammaj();
    Kokkos::View<double*> cj = unknowns.cj();
    const Kokkos::View<const double*> mj = unknowns.mj();

    Kokkos::View<Rd*> xr = m_mesh.xr();

    const Kokkos::View<const Rd*> xj = m_mesh_data.xj();
    const Kokkos::View<const double*> Vj = m_mesh_data.Vj();

    const Kokkos::View<const unsigned int**>& cell_nodes = m_connectivity.cellNodes();
    const Kokkos::View<const Rd**> Cjr = m_mesh_data.Cjr();

    Kokkos::View<double*> inv_mj("inv_mj",m_mesh.numberOfCells());
    inverse(mj, inv_mj);

    const double tmax=0.2;
    double t=0;

    int itermax=std::numeric_limits<int>::max();
    int iteration=0;

    BlockPerfectGas block_eos(rhoj, ej, pj, gammaj, cj);

    while((t<tmax) and (iteration<itermax)) {
      double dt = 0.4*acoustic_dt(Vj, cj);
      if (t+dt<tmax) {
	t+=dt;
      } else {
	dt=tmax-t;
	t=tmax;
      }

      computeExplicitFluxes(xr, xj, rhoj, uj, pj, cj, Vj, Cjr);

      const Kokkos::View<const Rd**> Fjr = m_Fjr;
      const Kokkos::View<const Rd*> ur = m_ur;
      const Kokkos::View<const unsigned short*> cell_nb_nodes
	= m_connectivity.cellNbNodes();

      Kokkos::parallel_for(m_mesh.numberOfCells(), KOKKOS_LAMBDA(const int& j) {
	  Rd momentum_fluxes = zero;
	  double energy_fluxes = 0;
	  for (int R=0; R<cell_nb_nodes[j]; ++R) {
	    const int r=cell_nodes(j,R);
	    momentum_fluxes +=  Fjr(j,R);
	    energy_fluxes   += (Fjr(j,R), ur[r]);
	  }
	  uj[j] -= (dt*inv_mj[j]) * momentum_fluxes;
	  Ej[j] -= (dt*inv_mj[j]) * energy_fluxes;
	});

      Kokkos::parallel_for(m_mesh.numberOfCells(), KOKKOS_LAMBDA(const int& j) {
	  ej[j] = Ej[j] - 0.5 * (uj[j],uj[j]);
	});

      Kokkos::parallel_for(m_mesh.numberOfNodes(), KOKKOS_LAMBDA(const int& r){
	  xr[r] += dt*ur[r];
	});

      m_mesh_data.updateAllData();

      Kokkos::parallel_for(m_mesh.numberOfCells(), KOKKOS_LAMBDA(const int& j){
	  rhoj[j] = mj[j]/Vj[j];
	});

      block_eos.updatePandCFromRhoE();    
    
      ++iteration;
    }

    std::cout << "* " << rang::style::underline << "Final time" << rang::style::reset
	      << ":  " << rang::fgB::green << t << rang::fg::reset << " (" << iteration << " iterations)\n";

  }
};

#endif // ACOUSTIC_SOLVER_WITH_MESH_HPP