/*
 * crs_matrix.hh
 *
 *  Created on: 2013. 8. 5.
 *      Author: parkmh
 */

/*
 * Changelog
 * v.0.1 Added crs_matrix.hh
 *
 */
#ifndef CRS_MATRIX_HH_
#define CRS_MATRIX_HH_

#define DEFAULT 		"copy"
#define TRANSPOSE 	"transpose"
#define EMPTY			-1

#ifdef USE_OPENMP
#include <omp.h>
#endif

#ifndef NUMVEC_HH_
#include "numvec.hh"
#endif

#include <vector>
#include <cstring>
#include <fstream>
#include <iostream>
#include <cstdio>

template <class T> class CRSMatrix;
template <class T> class NumVec;
template <class T> class Cholesky;


template <class T> NumVec<T> operator*(const CRSMatrix<T>&, const NumVec<T>&);
template <class T> void axey(const CRSMatrix<T>&, const NumVec<T>&, NumVec<T>&);
template <class T> void atxey(const CRSMatrix<T>&, const NumVec<T>&, NumVec<T>&);
template <class T> void axpyey(const CRSMatrix<T>&, const NumVec<T>&, NumVec<T>&);
template <class T> void bmaxey(const NumVec<T>&, const CRSMatrix<T>&, const NumVec<T>&, NumVec<T>&);

template <class T>
class CRSMatrix{
	friend class Cholesky<T>;
public:
	typedef T data_type;
	typedef NumVec<T> vector_type;
	CRSMatrix();
	CRSMatrix(const size_t&, const size_t&, const size_t & );
	CRSMatrix(const NumVec<size_t> &, const NumVec<size_t> &, const NumVec<T> &, const size_t, const size_t);
	CRSMatrix(const CRSMatrix &, const char*);
	size_t row() const{return nrow_;}
	size_t col() const {return ncol_;}
	size_t nnz() const {return nnz_;}
	size_t& ia(size_t i) { return ia_.value_[i]; }
	size_t& ja(size_t i) { return ja_.value_[i]; }
	T&      aa(size_t i) { return aa_.value_[i]; }
	void ia(const size_t* val, size_t size){ ia_.reset(val,size);}
	void ja(const size_t* val, size_t size){ ja_.reset(val,size);}
	void aa(const T* val, size_t size){aa_.reset(val,size);}
	const size_t& ia(size_t i) const{ return ia_.value_[i]; }
	const size_t& ja(size_t i) const{ return ja_.value_[i]; }
	const T&      aa(size_t i) const{ return aa_.value_[i]; }

	size_t* iabegin(){ return ia_.begin();}
	const size_t* iabegin() const {return ia_.begin();}
	size_t* jabegin(){ return ja_.begin();}
	const size_t* jabegin() const {return ja_.begin();}
	T* aabegin(){ return aa_.begin();}
	const T* aabegin() const {return aa_.begin();}


	void resizeia(size_t n) {ia_.resize(n);}
	void resizeja(size_t n) {ja_.resize(n);}
	void resizeaa(size_t n) {aa_.resize(n);}

	T get(const size_t &,const  size_t&) const;
	void setSize(const size_t& nrow, const size_t&ncol, const size_t& nnz)
	{	nrow_ = nrow; ncol_ = ncol; nnz_ = nnz;}
	CRSMatrix transpose() const {return CRSMatrix<T>(*this,"transpose");}
	// TODO copy to transpose

	void load(const char*);
	void save(const char*);

	CRSMatrix& operator=(const CRSMatrix &);
	const CRSMatrix &operator*=(const CRSMatrix &);

	NumVec<int> diag() const;
	void rearrange();
	/*
	 * friend operators and functions
	 */
	friend std::ostream&
	operator<<(std::ostream& os, const CRSMatrix &crsmat){

		os << "[ " << crsmat.nrow_ << "-by-" << crsmat.ncol_ << " ] MATRIX (nnz=" << crsmat.ja_.size() << ")" <<std::endl;
		size_t i, j;
		for (i = 0; i < crsmat.nrow_; i++){
			for (j = crsmat.ia_[i]; j < crsmat.ia_[ i + 1 ]; j++){
				os << "[" << i << "][" << crsmat.ja_[ j ] << "] : "
						<< crsmat.aa_[ j ] << std::endl;
			}
		}
		return os;
	}
	friend NumVec<T> operator*<>(const CRSMatrix<T>& , const NumVec<T> & );
	friend void axey<>(const CRSMatrix<T>& A, const NumVec<T>& x, NumVec<T>& y);
	friend void atxey<>(const CRSMatrix<T>& A, const NumVec<T>& x, NumVec<T>& y);
	friend void axpyey<>(const CRSMatrix<T>& A, const NumVec<T>& x, NumVec<T>& y);
	friend void bmaxey<>(const NumVec<T>& b, const CRSMatrix<T>& A, const NumVec<T>& x, NumVec<T>& y);
private:
	NumVec< T > 	   aa_;
	NumVec< size_t > ia_;
	NumVec< size_t > ja_;

	size_t nrow_;
	size_t ncol_;
	size_t nnz_;
};


template <class T>
CRSMatrix<T>::CRSMatrix(){
	nrow_ = 0;
	ncol_ = 0;
	nnz_ = 0;
}

template <class T>
CRSMatrix<T>::CRSMatrix(const size_t &nrow, const size_t &ncol, const size_t &nnz):
nrow_(nrow), ncol_(ncol), nnz_(nnz){
	ia_.resize(nrow_+1);
	ja_.resize(nnz_);
	aa_.resize(nnz_);
}
template <class T>
CRSMatrix<T>::CRSMatrix(const NumVec<size_t> &ia,
		const NumVec<size_t> &ja, const NumVec<T> &aa,
		const size_t nrow, const size_t ncol)
		:ia_(ia), 	ja_(ja), aa_(aa), nrow_(nrow),
		 ncol_(ncol){
	nnz_ = aa_.size_;
}



template <class T>
CRSMatrix<T>::CRSMatrix(const CRSMatrix<T>& A, const char* opt = "copy"){
	if (!strcmp(opt,DEFAULT)){
		ia_ = A.ia_;
		ja_ = A.ja_;
		aa_ = A.aa_;
		nrow_ 	= A.nrow_;
		ncol_ 	= A.ncol_;
		nnz_ 	= A.nnz_;
	} else if (!strcmp(opt,TRANSPOSE)){
		nrow_ 	= A.ncol_;
		ncol_ 	= A.nrow_;
		nnz_ 	= A.nnz_;
		size_t i, j, jj, iajj;
		size_t aiai, aiaip1;
		size_t m = nrow_;
		size_t n = ncol_;
		size_t mp1 = m + 1;


		ia_.resize(mp1);
		ia_ = 0;
		ja_.resize(nnz_);
		aa_.resize(nnz_);

		aiai = A.ia_.value_[0];
		//		const size_t* Ajap = A.jabegin();
		for ( i = 0; i < n; i++){
			aiaip1 = A.ia_.value_[ i + 1 ];
			for (j = aiai; j < aiaip1; j++){
				//				ia_.value_[ *(Ajap+j)+ 1]++;
				ia_.value_[ A.ja_.value_[j] + 1]++;
			}
			aiai = aiaip1;
		}

		for (i = 0; i < m; i++){
			ia_.value_[ i + 1 ] += ia_.value_[i];
		}

		aiai = A.ia_.value_[0];
		for (i = 0; i < n; i++){
			aiaip1 = A.ia_.value_[i+1];
			for (j = aiai; j < aiaip1; j++){
				jj = A.ja_.value_[ j ];
				iajj = ia_.value_[ jj ];
				ja_.value_[ iajj ] = i;
				aa_.value_[ iajj ] = A.aa_.value_[ j ];
				ia_.value_[ jj ]++;
			}
			aiai = aiaip1;
		}

		for (i = m-1; i > 0; i--){
			ia_.value_[ i ] = ia_.value_[ i - 1 ];
		}
		ia_.value_[ 0 ] = 0;
	} else {
		throw std::invalid_argument("Invalid argument is used in a CRSMatrix copy constructor.");
	}
}

template <class T>
CRSMatrix<T> & CRSMatrix<T>::operator=(const CRSMatrix<T> &crsmat){
	if (this != &crsmat){
		ia_ = crsmat.ia_;
		ja_ = crsmat.ja_;
		aa_ = crsmat.aa_;
		nrow_ = crsmat.nrow_;
		ncol_ = crsmat.ncol_;
		nnz_ = crsmat.nnz_;
	}
	return *this;
}

template <class T>
const CRSMatrix<T>& CRSMatrix<T>::operator *=(const CRSMatrix<T> &B){
	if (ncol_ != B.nrow_){
		std::cerr << "???Error using ==> times\n";
		std::cerr << " Matrix sizes must agree( nCol(1) : " << 	ncol_ ;
		std::cerr << " , nRow(2) : "  << B.nrow_ <<"\n";
		exit(0);
	}

	size_t i, j, jj, k, length, istart;
	int  tempjc;
	size_t aiai, aiaip1, aiaip1p1, bjak, biaj, biajp1;

	size_t ici, icip1;
	int jcj;

	size_t max_lmn = 0;
	size_t min_lm = 0;
	size_t min_mn;

	int diaga = 0;

	T ajj;

	size_t *ic = new size_t[nrow_+1];

	if (B.nrow_ > B.ncol_ ){
		max_lmn = B.nrow_;
		min_lm = B.ncol_;
	}
	else {
		max_lmn = B.ncol_;
		min_lm = nrow_;
	}
	if (nrow_ > max_lmn)	{ max_lmn = nrow_;}

	if (nrow_ > ncol_) {	min_mn = ncol_;}
	else {	min_mn = nrow_;}

	// index can be negative
	NumVec<int> index(max_lmn,EMPTY);

	ic[ 0 ] = 0;


	const size_t *biap = B.iabegin();
	const size_t *bjap = B.jabegin();
	const T *baap = B.aabegin();

	size_t *iap = iabegin();
	size_t *jap = jabegin();
	T* aap = aabegin();
	int * indexp = index.begin();

	aiai = *iap;//ia_.value_[0];

	for( i = 0; i < nrow_; i++ )
	{
		aiaip1 = *(iap+i+1);//ia_.value_[ i + 1 ];
		istart = -2;
		length  = 0;
		aiaip1p1 = aiaip1+1;
		for( jj = aiai; jj < aiaip1p1; jj ++ )	{
			if( jj == aiaip1 ){
				if (diaga == 0 || i > min_mn) continue;
				j = i;
			}
			else {	j = *(jap+jj);}//ja_.value_[ jj ];}
			biaj = *(biap+j);//B.ia_.value_[ j ];
			biajp1 = *(biap+j+1);//B.ia_.value_[ j + 1 ];
			for( k = biaj ; k < biajp1 ; k++ )
			{
				bjak = *(bjap+k);//B.ja_.value_[ k ];
				if ( *(indexp+ bjak ) == -1 )
				{
					*(indexp+ bjak ) = istart;
					istart =  bjak;
					length++;
				}
			}//end k loop
		} // end jj loop

//		if ( (diagc == 1) & (*(indexp+ i) != -1) ) 	length--;

		*(ic+ i + 1 ) = *(ic+ i) + length;

		ici = *(ic+ i);
		icip1 = *(ic+ i + 1);
		for( j = ici; j < icip1; j ++ )
		{
			tempjc = istart;
			istart = *(indexp+ istart );
			*(indexp+ tempjc) = -1;
		}
		aiai = aiaip1;
	}// end i loop

	size_t csize = ic[nrow_];
	size_t *jc = new size_t[csize];
	T *cc = new T[csize];

	index = EMPTY;

	aiai = *iap;//ia_.value_[ 0 ];
	for( i = 0; i < nrow_ ; i++ ){
		aiaip1 = *(iap+i+1);//ia_.value_[ i + 1 ];
		istart = -2;
		length  = 0;
		aiaip1p1 = aiaip1+1;
		for( jj = aiai; jj < aiaip1p1 ; jj ++ ){
			if( jj == aiaip1 ){
				if ((diaga == 0) | (i > min_mn)) continue;
				j = i;
			}
			else {	j = *(jap+jj);}//ja_.value_[ jj ]; }
			biaj = *(biap+j);//B.ia_.value_[j];
			biajp1 = *(biap+j+1);//B.ia_.value_[j+1];
			for( k = biaj; k < biajp1; k++ ){
				bjak = *(bjap+k);//B.ja_.value_[ k ];
				if ( *(indexp+bjak) == -1 )	{
					*(indexp+bjak) = istart;
					istart =  bjak;
					length++;
				}
			}//end k loop
		} // end jj loop


		*(ic+ i + 1 ) = *(ic+ i ) + length;

		ici = *(ic+i);
		icip1 = *(ic+i+1);
		for( j = ici; j < icip1; j ++ )	{
			*(jc+j) = jcj = istart;
			istart = *(indexp+ istart);
			*(indexp+jcj) = -1;
		}
		aiai = aiaip1;
	}// end i loop

	NumVec< T > temp(max_lmn);
	T * tempp = temp.begin();
	aiai = *iap;//ia_.value_[ 0 ];
	for( i = 0; i < nrow_ ; i++ ){
		aiaip1 = *(iap+i+1);//ia_.value_[ i + 1 ];
		aiaip1p1 = aiaip1 + 1;
		for( jj = aiai; jj < aiaip1p1; jj++ )	{
			if( jj == aiaip1)	{
				if( (diaga == 0)| (i > min_lm) ){	continue;	}
				j = i;
				ajj = *(aap+i);//aa_.value_[ i ];
			}
			else
			{
				j = *(jap+jj);//ja_.value_[ jj ];
				ajj = *(aap+jj);//aa_.value_[ jj ];
			}
			biaj = *(biap+j);//B.ia_.value_[ j ];
			biajp1 = *(biap+j+1);//B.ia_.value_[j+1];

			for( k = biaj; k < biajp1; k++ )
			{
				*(tempp + *(bjap+k) ) += ajj * *(baap+k);//B.aa_.value_[ k ];
			}// end k loop
			aiai = aiaip1;
		}// end jj loop
		ici = *(ic+i);
		icip1 = *(ic+i+1);
		for ( j = ici; j < icip1; j ++ )
		{
			jcj = *(jc+j);
			*(cc+j) = *(tempp + jcj );
			*(tempp + jcj ) = 0;
		}//end j loop
	} // end i loop

	ia_.assign(ic,nrow_+1);
	delete[] ic;
	ja_.assign(jc,csize);
	delete[] jc;
	aa_.assign(cc,csize);
	delete[] cc;
	setSize(nrow_,B.ncol_,csize);
	return *this;
}

template <class T>
void CRSMatrix<T>::load(const char* filename){
	std::ifstream load;
	load.open(filename,std::ios::in);

	if (!load){
		std::cerr << "\n" ;
		std::cerr << "LOAD - Fatal error!\n";
		std::cerr << " Could not open the file: \"" << filename << "\"\n";
		abort();
	}
	load >> nrow_ >> ncol_ >> nnz_;
	ia_.resize(nrow_+1);
	ja_.resize(nnz_);
	aa_.resize(nnz_);

	size_t i;
	for (i = 0; i < nrow_+1; i++){
		load >> ia_.value_[i];
	}
	for (i = 0; i < nnz_; i++){
		load >> ja_.value_[i];
	}
	for (i = 0; i < nnz_; i++){
		load >> aa_.value_[i];
	}
	load.close();
}

template <class T>
void CRSMatrix<T>::save(const char* filename){
	std::ofstream save;

	save.open(filename, std::ios::out);
	save << nrow_ << std::endl;
	save << ncol_ << std::endl;
	save << nnz_  << std::endl;
	save.precision(16);
	size_t i;
	for (i = 0; i < nrow_+1; i++){
		save << ia_.value_[i] << std::endl;
	}
	for (i = 0; i < nnz_; i++){
		save << ja_.value_[i]  << std::endl;
	}
	for (i = 0; i < nnz_;i++){
		save << aa_.value_[i] << std::endl;
	}
	save.close();
}

template <class T>
NumVec<T> operator*(const CRSMatrix<T>& A, const NumVec<T> & x){
	if (A.col() != x.size()){
		throw std::runtime_error("Matrix dimensions must agree.");
	}
	size_t i, j, aiai, aiaip1;
	size_t n = A.row();
	NumVec<T> vec;
	vec.resize(n);

	T temp;
	const size_t * iap = A.iabegin();
	const size_t * jap = A.jabegin();
	const T * xp = x.begin();
	const T * aap = A.aabegin();
	T* vp = vec.begin();
#ifdef USE_OPENMP
#pragma omp parallel private (i,j,temp,aiai,aiaip1) shared (iap,aap,xp,jap,vp,n)
{
#pragma omp for schedule(guided)
	for (i = 0; i < n; i++){
		temp = T();
		aiai = *(iap+i);
		aiaip1 = *(iap+i+1);
		for (j = aiai; j < aiaip1; j++){
			temp += *(aap+j) *  *(xp+ *(jap+j) );
		}
		*(vp+i) = temp;
	}
}
#else
	aiai = *iap;
	for( i = 0; i < n ; ++i )
	{
		temp = T();
		aiaip1 = *(iap+i+1);
		for( j = aiai; j < aiaip1; ++j )
		{
			temp += *(aap+j) *  *(xp+ *(jap+j) );
		}
		aiai = aiaip1;
		*(vp+i) = temp;
	}
#endif

	return vec;
}

template<class T>
void axey(const CRSMatrix<T>& A, const NumVec<T>&x, NumVec<T>& y){
	if (A.col() != x.size()){
		throw std::runtime_error("Matrix dimensions must agree.");
	}

	size_t i, j, aiai, aiaip1;
	size_t n = A.row();
	y.resize(n);

	const size_t * iap = A.iabegin();
	const size_t * jap = A.jabegin();
	const T * xp = x.begin();
	const T * aap = A.aabegin();
	T * yp = y.begin();
	T temp;

#ifdef USE_OPENMP
#pragma omp parallel private (i,j,temp,aiai,aiaip1) shared (iap,aap,xp,jap,yp,n)
{
#pragma omp for schedule(guided)
	for (i = 0; i < n; ++i){
		temp = T();
		aiai = *(iap+i);
		aiaip1 = *(iap+i+1);
		for (j = aiai; j < aiaip1; ++j){
			temp += *(aap+j) *  *(xp+ *(jap+j) );
		}
		*(yp+i) = temp;
	}
}
#else
	aiai = *iap;
	for( i = 0; i < n ; i++ )
	{
		temp = T();
		aiaip1 = *(iap+i+1);
		for( j = aiai; j < aiaip1; j ++ )
		{
			temp += *(aap+j) * *(xp + *(jap+j));
		}
		aiai = aiaip1;
		*(yp+i) = temp;
	}
#endif
}

template<class T>
void axpyey(const CRSMatrix<T>& A, const NumVec<T>&x, NumVec<T>& y){
	if ((A.col() != x.size()) |( A.row() != y.size())){
		throw std::runtime_error("Matrix dimensions must agree.");
	}

	size_t i, j, aiai, aiaip1;
	size_t n = A.row();

	const size_t * iap = A.iabegin();
	const size_t * jap = A.jabegin();
	const T * xp = x.begin();
	const T * aap = A.aabegin();
	T * yp = y.begin();
	T temp;


#ifdef USE_OPENMP
#pragma omp parallel private (i,j,temp,aiai,aiaip1) shared (iap,aap,xp,jap,yp,n)
{
#pragma omp for schedule(guided)
	for (i = 0; i < n; i++){
		temp = T();
		aiai = *(iap+i);
		aiaip1 = *(iap+i+1);
		for (j = aiai; j < aiaip1; j++){
			temp += *(aap+j) *  *(xp+ *(jap+j) );
		}
		*(yp+i) += temp;
	}
}
#else
	aiai = *iap;
	for( i = 0; i < n ; i++ )
	{
		temp = T();
		aiaip1 = *(iap+i+1);//A.ia_[ i + 1 ];
		for( j = aiai; j < aiaip1; j ++ )
		{
			temp += *(aap+j) * *(xp + *(jap+j));//A.aa_[ j ] * x[ A.ja_[ j ] ];
		}
		aiai = aiaip1;
		*(yp+i) += temp;
	}
#endif
}

template<class T>
void bmaxey(const NumVec<T> &b, const CRSMatrix<T>& A, const NumVec<T>&x, NumVec<T>& y){
	if ((A.col() != x.size()) |( A.row() != y.size()) | (b.size() != y.size())){
		throw std::runtime_error("Matrix dimensions must agree.");
	}
	y = b;
	size_t i, j, aiai, aiaip1;
	size_t n = A.row();

	const size_t * iap = A.iabegin();
	const size_t * jap = A.jabegin();
	const T * xp = x.begin();
	const T * aap = A.aabegin();
	T * yp = y.begin();
	T temp;


#ifdef USE_OPENMP
#pragma omp parallel private (i,j,temp,aiai,aiaip1) shared (iap,aap,xp,jap,yp,n)
{
#pragma omp for schedule(guided)
	for (i = 0; i < n; i++){
		temp = T();
		aiai = *(iap+i);
		aiaip1 = *(iap+i+1);
		for (j = aiai; j < aiaip1; j++){
			temp += *(aap+j) *  *(xp+ *(jap+j) );
		}
		*(yp+i) -= temp;
	}
}
#else
	aiai = *iap;
	for( i = 0; i < n ; i++ )
	{
		temp = T();
		aiaip1 = *(iap+i+1);//A.ia_[ i + 1 ];
		for( j = aiai; j < aiaip1; j ++ )
		{
			temp += *(aap+j) * *(xp + *(jap+j));//A.aa_[ j ] * x[ A.ja_[ j ] ];
		}
		aiai = aiaip1;
		*(yp+i) -= temp;
	}
#endif
}
template<class T>
void atxey(const CRSMatrix<T>& A, const NumVec<T>&x, NumVec<T>& y){
	if (A.row() != x.size()){
		throw std::runtime_error("Matrix dimensions must agree.");
	}

	size_t i, j, aiai, aiaip1;
	size_t n = A.col();
	y.resize(n);

	const size_t * iap = A.iabegin();
	const size_t * jap = A.jabegin();
	const T * xp = x.begin();
	const T * aap = A.aabegin();
	T * yp = y.begin();

	aiai = *iap;
	T temp;
	for( i = 0; i < n ; i++ )
	{
		temp = T();
		aiaip1 = *(iap+i+1);
		for( j = aiai; j < aiaip1; j ++ )
		{
			temp += *(aap+j) * *(xp + i);
		}
		aiai = aiaip1;
		*(yp+*(jap+j)) = temp;
	}
}

template <class T>
T CRSMatrix<T>::get(const size_t& row, const size_t &col) const{
	size_t j, aiarow, aiarowp1;
	aiarow = *(ia_.value_ + row );
	aiarowp1 =*(ia_.value_ + row + 1 );
	for(j = aiarow; j < aiarowp1 ; j++ )
	{
		if ( *(ja_.value_+ j ) == col )
		{
			return *(aa_.value_ + j );
		}
	}
	return T();
}

template <class T>
NumVec<int> CRSMatrix<T>::diag() const{
	size_t order = std::min(nrow_,ncol_);
	NumVec<int> D;
	D.resize(order);
	size_t i, j, aiai, aiaip1;
	const size_t * iap = iabegin();
	const size_t * jap = jabegin();

	int * dp = D.begin();
	aiai = *iap;
	int temp;
	for( i = 0; i < order ; i++ )
	{
		temp = -1;
		aiaip1 = *(iap+i+1);
		for( j = aiai; j < aiaip1; j ++ )
		{
			if (i == *(jap+j)){
				temp = j;
				break;
			}
		}
		*(dp+i) = temp;
		aiai = aiaip1;
	}
	return D;
}


template <class T>
void CRSMatrix<T>::rearrange() {
	size_t n = nrow_;
	size_t i,j,k, aiai, aiaip1, aiaip1mk, is;
	const size_t * iap = iabegin();
	size_t * jap = jabegin();
	T * aap = aabegin();
	aiai = *iap;

	for (i = 0; i < n; i++){
		aiaip1 = *(iap+i+1);
		is = aiaip1 - aiai;
		for (k = 1; k < is; k++){
			aiaip1mk = aiaip1-k;
			for (j = aiai; j < aiaip1mk; j++){
				if (*(jap+j+1) < *(jap+j)){
					std::swap(*(jap+j+1),*(jap+j));
					std::swap(*(aap+j+1),*(aap+j));
				}
			}
		}
		aiai = aiaip1;
	}
}
template <class T>
const CRSMatrix<T> operator*(const CRSMatrix<T> &A, const CRSMatrix<T> &B){
	return CRSMatrix<T>(A)*=B;
}

#endif /* CRS_MATRIX_HH_ */
