Ex3

2019-12-01 22:57:43 +01:00
parent 56a95ec733
commit 2324790537
3 changed files with 160 additions and 0 deletions
--- a/Simulations/pcgsolver.h
+++ b/Simulations/pcgsolver.h
@@ -0,0 +1,750 @@
+//
+//  Preconditioned conjugate gradient solver
+//
+//  Created by Robert Bridson, Ryoichi Ando and Nils Thuerey
+//
+
+#ifndef RCMATRIX3_H
+#define RCMATRIX3_H
+
+#include <iterator>
+#include <cassert>
+#include <vector>
+#include <fstream>
+#include <cmath>
+#include <functional>
+
+// index type 
+#define int_index long long
+
+// parallelization disabled
+
+#define parallel_for(size)  { int thread_number = 0; int_index parallel_index=0; for( int_index parallel_index=0; parallel_index<(int_index)size; parallel_index++ ) {
+#define parallel_end 	} thread_number=parallel_index=0; }
+
+#define parallel_block 
+#define do_parallel 
+#define do_end
+#define block_end
+
+#include "vectorbase.h"
+
+// note - "Int" instead of "N" here, the latter is size!
+template<class Int, class T>
+struct InstantBLAS {
+	static inline Int offset(Int N, Int incX) { return ((incX) > 0 ?  0 : ((N) - 1) * (-(incX))); }
+	static T cblas_ddot( const Int N, const T *X, const Int incX, const T *Y, const Int incY) {
+		double r  = 0.0; // always use double precision internally here...
+		Int    i;
+		Int    ix = offset(N,incX);
+		Int    iy = offset(N,incY);
+		for (i = 0; i < N; i++) {
+			r  += X[ix] * Y[iy];
+			ix += incX;
+			iy += incY;
+		}
+		return (T)r;
+	}
+	static void cblas_daxpy( const Int N, const T alpha, const T *X, const Int incX, T *Y, const Int incY) {
+		Int i;
+		if (N     <= 0  ) return;
+		if (alpha == 0.0) return;
+		if (incX == 1 && incY == 1) {
+			const Int m = N % 4;
+			for (i = 0; i < m; i++)
+				Y[i] += alpha * X[i];
+			for (i = m; i + 3 < N; i += 4) {
+				Y[i    ] += alpha * X[i    ];
+				Y[i + 1] += alpha * X[i + 1];
+				Y[i + 2] += alpha * X[i + 2];
+				Y[i + 3] += alpha * X[i + 3];
+			}
+		} else {
+			Int ix = offset(N, incX);
+			Int iy = offset(N, incY);
+			for (i = 0; i < N; i++) {
+				Y[iy] += alpha * X[ix];
+				ix    += incX;
+				iy    += incY;
+			}
+		}
+	}
+	// dot products ==============================================================
+	static inline T dot(const std::vector<T> &x, const std::vector<T> &y) {
+		return cblas_ddot((int)x.size(), &x[0], 1, &y[0], 1);
+	}
+	
+	// inf-norm (maximum absolute value: index of max returned) ==================
+	static inline Int index_abs_max(const std::vector<T> &x) {
+		int maxind = 0;
+		T maxvalue = 0;
+		for(Int i = 0; i < (Int)x.size(); ++i) {
+			if(std::abs(x[i]) > maxvalue) {
+				maxvalue = fabs(x[i]);
+				maxind = i;
+			}
+		}
+		return maxind;
+	}
+	
+	// inf-norm (maximum absolute value) =========================================
+	// technically not part of BLAS, but useful
+	static inline T abs_max(const std::vector<T> &x)
+	{ return std::abs(x[index_abs_max(x)]); }
+	
+	// saxpy (y=alpha*x+y) =======================================================
+	static inline void add_scaled(T alpha, const std::vector<T> &x, std::vector<T> &y) {
+		cblas_daxpy((Int)x.size(), alpha, &x[0], 1, &y[0], 1);
+	}
+};
+
+
+
+
+template<class T>
+void zero(std::vector<T> &v)
+{ for(int i=(int)v.size()-1; i>=0; --i) v[i]=0; }
+
+template<class T>
+void insert(std::vector<T> &a, unsigned int index, T e)
+{
+   a.push_back(a.back());
+   for(unsigned int i=(unsigned int)a.size()-1; i>index; --i)
+      a[i]=a[i-1];
+   a[index]=e;
+}
+
+template<class T>
+void erase(std::vector<T> &a, unsigned int index)
+{
+   for(unsigned int i=index; i<a.size()-1; ++i)
+      a[i]=a[i+1];
+   a.pop_back();
+}
+
+//============================================================================
+// Dynamic compressed sparse row matrix.
+
+template<class T>
+struct SparseMatrix
+{
+   int n; // dimension
+   std::vector<std::vector<int> > index; // for each row, a list of all column indices (sorted)
+   std::vector<std::vector<T> > value; // values corresponding to index
+
+   explicit SparseMatrix(int n_=0, int expected_nonzeros_per_row=7)
+      : n(n_), index(n_), value(n_)
+   {
+      for(int i=0; i<n; ++i){
+         index[i].reserve(expected_nonzeros_per_row);
+         value[i].reserve(expected_nonzeros_per_row);
+      }
+   }
+
+   void clear(void)
+   {
+      n=0;
+      index.clear();
+      value.clear();
+   }
+
+   void zero(void)
+   {
+      for(int i=0; i<n; ++i){
+         index[i].resize(0);
+         value[i].resize(0);
+      }
+   }
+
+   void resize(int n_)
+   {
+      n=n_;
+      index.resize(n);
+      value.resize(n);
+   }
+
+   T operator()(int i, int j) const
+   {
+      for(int k=0; k<(int)index[i].size(); ++k){
+         if(index[i][k]==j) return value[i][k];
+         else if(index[i][k]>j) return 0;
+      }
+      return 0;
+   }
+
+   void set_element(int i, int j, T new_value)
+   {
+      int k=0;
+      for(; k<(int)index[i].size(); ++k){
+         if(index[i][k]==j){
+            value[i][k]=new_value;
+            return;
+         }else if(index[i][k]>j){
+            insert(index[i], k, j);
+            insert(value[i], k, new_value);
+            return;
+         }
+      }
+      index[i].push_back(j);
+      value[i].push_back(new_value);
+   }
+
+   void add_to_element(int i, int j, T increment_value)
+   {
+      int k=0;
+      for(; k<(int)index[i].size(); ++k){
+         if(index[i][k]==j){
+            value[i][k]+=increment_value;
+            return;
+         }else if(index[i][k]>j){
+            insert(index[i], k, j);
+            insert(value[i], k, increment_value);
+            return;
+         }
+      }
+      index[i].push_back(j);
+      value[i].push_back(increment_value);
+   }
+
+   // assumes indices is already sorted
+   void add_sparse_row(int i, const std::vector<int> &indices, const std::vector<T> &values)
+   {
+      int j=0, k=0;
+      while(j<indices.size() && k<(int)index[i].size()){
+         if(index[i][k]<indices[j]){
+            ++k;
+         }else if(index[i][k]>indices[j]){
+            insert(index[i], k, indices[j]);
+            insert(value[i], k, values[j]);
+            ++j;
+         }else{
+            value[i][k]+=values[j];
+            ++j;
+            ++k;
+         }
+      }
+      for(;j<indices.size(); ++j){
+         index[i].push_back(indices[j]);
+         value[i].push_back(values[j]);
+      }
+   }
+
+   // assumes matrix has symmetric structure - so the indices in row i tell us which columns to delete i from
+   void symmetric_remove_row_and_column(int i)
+   {
+      for(int a=0; a<index[i].size(); ++a){
+         int j=index[i][a]; // 
+         for(int b=0; b<index[j].size(); ++b){
+            if(index[j][b]==i){
+               erase(index[j], b);
+               erase(value[j], b);
+               break;
+            }
+         }
+      }
+      index[i].resize(0);
+      value[i].resize(0);
+   }
+
+   void write_matlab(std::ostream &output, const char *variable_name)
+   {
+      output<<variable_name<<"=sparse([";
+      for(int i=0; i<n; ++i){
+         for(int j=0; j<index[i].size(); ++j){
+            output<<i+1<<" ";
+         }
+      }
+      output<<"],...\n  [";
+      for(int i=0; i<n; ++i){
+         for(int j=0; j<index[i].size(); ++j){
+            output<<index[i][j]+1<<" ";
+         }
+      }
+      output<<"],...\n  [";
+      for(int i=0; i<n; ++i){
+         for(int j=0; j<value[i].size(); ++j){
+            output<<value[i][j]<<" ";
+         }
+      }
+      output<<"], "<<n<<", "<<n<<");"<<std::endl;
+   }
+};
+
+typedef SparseMatrix<float> SparseMatrixf;
+typedef SparseMatrix<double> SparseMatrixd;
+
+// perform result=matrix*x
+template<class T>
+void multiply(const SparseMatrix<T> &matrix, const std::vector<T> &x, std::vector<T> &result)
+{
+   assert(matrix.n==x.size());
+   result.resize(matrix.n);
+   //for(int i=0; i<matrix.n; ++i)
+	parallel_for(matrix.n) {
+		unsigned i (parallel_index);
+		T value=0;
+		for(int j=0; j<(int)matrix.index[i].size(); ++j){
+			value+=matrix.value[i][j]*x[matrix.index[i][j]];
+		}
+		result[i]=value;
+	} parallel_end
+}
+
+// perform result=result-matrix*x
+template<class T>
+void multiply_and_subtract(const SparseMatrix<T> &matrix, const std::vector<T> &x, std::vector<T> &result)
+{
+   assert(matrix.n==x.size());
+   result.resize(matrix.n);
+   for(int i=0; i<(int)matrix.n; ++i){
+      for(int j=0; j<(int)matrix.index[i].size(); ++j){
+         result[i]-=matrix.value[i][j]*x[matrix.index[i][j]];
+      }
+   }
+}
+
+//============================================================================
+// Fixed version of SparseMatrix. This is not a good structure for dynamically
+// modifying the matrix, but can be significantly faster for matrix-vector
+// multiplies due to better data locality.
+
+template<class T>
+struct FixedSparseMatrix
+{
+   int n; // dimension
+   std::vector<T> value; // nonzero values row by row
+   std::vector<int> colindex; // corresponding column indices
+   std::vector<int> rowstart; // where each row starts in value and colindex (and last entry is one past the end, the number of nonzeros)
+
+   explicit FixedSparseMatrix(int n_=0)
+      : n(n_), value(0), colindex(0), rowstart(n_+1)
+   {}
+
+   void clear(void)
+   {
+      n=0;
+      value.clear();
+      colindex.clear();
+      rowstart.clear();
+   }
+
+   void resize(int n_)
+   {
+      n=n_;
+      rowstart.resize(n+1);
+   }
+
+   void construct_from_matrix(const SparseMatrix<T> &matrix)
+   {
+      resize(matrix.n);
+      rowstart[0]=0;
+      for(int i=0; i<n; ++i){
+         rowstart[i+1]=rowstart[i]+matrix.index[i].size();
+      }
+      value.resize(rowstart[n]);
+      colindex.resize(rowstart[n]);
+      int j=0;
+      for(int i=0; i<n; ++i){
+         for(int k=0; k<(int)matrix.index[i].size(); ++k){
+            value[j]=matrix.value[i][k];
+            colindex[j]=matrix.index[i][k];
+            ++j;
+         }
+      }
+   }
+
+   void write_matlab(std::ostream &output, const char *variable_name)
+   {
+      output<<variable_name<<"=sparse([";
+      for(int i=0; i<n; ++i){
+         for(int j=rowstart[i]; j<rowstart[i+1]; ++j){
+            output<<i+1<<" ";
+         }
+      }
+      output<<"],...\n  [";
+      for(int i=0; i<n; ++i){
+         for(int j=rowstart[i]; j<rowstart[i+1]; ++j){
+            output<<colindex[j]+1<<" ";
+         }
+      }
+      output<<"],...\n  [";
+      for(int i=0; i<n; ++i){
+         for(int j=rowstart[i]; j<rowstart[i+1]; ++j){
+            output<<value[j]<<" ";
+         }
+      }
+      output<<"], "<<n<<", "<<n<<");"<<std::endl;
+   }
+};
+
+
+// perform result=matrix*x
+template<class T>
+void multiply(const FixedSparseMatrix<T> &matrix, const std::vector<T> &x, std::vector<T> &result)
+{
+   assert(matrix.n==x.size());
+   result.resize(matrix.n);
+   //for(int i=0; i<matrix.n; ++i)
+	parallel_for(matrix.n) {
+		unsigned i (parallel_index);
+		T value=0;
+		for(int j=matrix.rowstart[i]; j<matrix.rowstart[i+1]; ++j){
+			value+=matrix.value[j]*x[matrix.colindex[j]];
+		}
+		result[i]=value;
+	} parallel_end
+}
+
+// perform result=result-matrix*x
+template<class T>
+void multiply_and_subtract(const FixedSparseMatrix<T> &matrix, const std::vector<T> &x, std::vector<T> &result)
+{
+   assert(matrix.n==x.size());
+   result.resize(matrix.n);
+   for(int i=0; i<matrix.n; ++i){
+      for(int j=matrix.rowstart[i]; j<matrix.rowstart[i+1]; ++j){
+         result[i]-=matrix.value[j]*x[matrix.colindex[j]];
+      }
+   }
+}
+
+//============================================================================
+// A simple compressed sparse column data structure (with separate diagonal)
+// for lower triangular matrices
+
+template<class T>
+struct SparseColumnLowerFactor
+{
+   int n;
+   std::vector<T> invdiag; // reciprocals of diagonal elements
+   std::vector<T> value; // values below the diagonal, listed column by column
+   std::vector<int> rowindex; // a list of all row indices, for each column in turn
+   std::vector<int> colstart; // where each column begins in rowindex (plus an extra entry at the end, of #nonzeros)
+   std::vector<T> adiag; // just used in factorization: minimum "safe" diagonal entry allowed
+
+   explicit SparseColumnLowerFactor(int n_=0)
+      : n(n_), invdiag(n_), colstart(n_+1), adiag(n_)
+   {}
+
+   void clear(void)
+   {
+      n=0;
+      invdiag.clear();
+      value.clear();
+      rowindex.clear();
+      colstart.clear();
+      adiag.clear();
+   }
+
+   void resize(int n_)
+   {
+      n=n_;
+      invdiag.resize(n);
+      colstart.resize(n+1);
+      adiag.resize(n);
+   }
+
+   void write_matlab(std::ostream &output, const char *variable_name)
+   {
+      output<<variable_name<<"=sparse([";
+      for(int i=0; i<n; ++i){
+         output<<" "<<i+1;
+         for(int j=colstart[i]; j<colstart[i+1]; ++j){
+            output<<" "<<rowindex[j]+1;
+         }
+      }
+      output<<"],...\n  [";
+      for(int i=0; i<n; ++i){
+         output<<" "<<i+1;
+         for(int j=colstart[i]; j<colstart[i+1]; ++j){
+            output<<" "<<i+1;
+         }
+      }
+      output<<"],...\n  [";
+      for(int i=0; i<n; ++i){
+         output<<" "<<(invdiag[i]!=0 ? 1/invdiag[i] : 0);
+         for(int j=colstart[i]; j<colstart[i+1]; ++j){
+            output<<" "<<value[j];
+         }
+      }
+      output<<"], "<<n<<", "<<n<<");"<<std::endl;
+   }
+};
+
+//============================================================================
+// Incomplete Cholesky factorization, level zero, with option for modified version.
+// Set modification_parameter between zero (regular incomplete Cholesky) and
+// one (fully modified version), with values close to one usually giving the best
+// results. The min_diagonal_ratio parameter is used to detect and correct
+// problems in factorization: if a pivot is this much less than the diagonal
+// entry from the original matrix, the original matrix entry is used instead.
+
+template<class T>
+void factor_modified_incomplete_cholesky0(const SparseMatrix<T> &matrix, SparseColumnLowerFactor<T> &factor,
+                                          T modification_parameter=0.97, T min_diagonal_ratio=0.25)
+{
+   // first copy lower triangle of matrix into factor (Note: assuming A is symmetric of course!)
+   factor.resize(matrix.n);
+   zero(factor.invdiag); // important: eliminate old values from previous solves!
+   factor.value.resize(0);
+   factor.rowindex.resize(0);
+   zero(factor.adiag);
+   for(int i=0; i<matrix.n; ++i){
+      factor.colstart[i]=(int)factor.rowindex.size();
+      for(int j=0; j<(int)matrix.index[i].size(); ++j){
+         if(matrix.index[i][j]>i){
+            factor.rowindex.push_back(matrix.index[i][j]);
+            factor.value.push_back(matrix.value[i][j]);
+         }else if(matrix.index[i][j]==i){
+            factor.invdiag[i]=factor.adiag[i]=matrix.value[i][j];
+         }
+      }
+   }
+   factor.colstart[matrix.n]=(int)factor.rowindex.size();
+   // now do the incomplete factorization (figure out numerical values)
+
+   // MATLAB code:
+   // L=tril(A);
+   // for k=1:size(L,2)
+   //   L(k,k)=sqrt(L(k,k));
+   //   L(k+1:end,k)=L(k+1:end,k)/L(k,k);
+   //   for j=find(L(:,k))'
+   //     if j>k
+   //       fullupdate=L(:,k)*L(j,k);
+   //       incompleteupdate=fullupdate.*(A(:,j)~=0);
+   //       missing=sum(fullupdate-incompleteupdate);
+   //       L(j:end,j)=L(j:end,j)-incompleteupdate(j:end);
+   //       L(j,j)=L(j,j)-omega*missing;
+   //     end
+   //   end
+   // end
+
+   for(int k=0; k<matrix.n; ++k){
+      if(factor.adiag[k]==0) continue; // null row/column
+      // figure out the final L(k,k) entry
+      if(factor.invdiag[k]<min_diagonal_ratio*factor.adiag[k])
+         factor.invdiag[k]=1/sqrt(factor.adiag[k]); // drop to Gauss-Seidel here if the pivot looks dangerously small
+      else
+         factor.invdiag[k]=1/sqrt(factor.invdiag[k]);
+      // finalize the k'th column L(:,k)
+      for(int p=factor.colstart[k]; p<factor.colstart[k+1]; ++p){
+         factor.value[p]*=factor.invdiag[k];
+      }
+      // incompletely eliminate L(:,k) from future columns, modifying diagonals
+      for(int p=factor.colstart[k]; p<factor.colstart[k+1]; ++p){
+         int j=factor.rowindex[p]; // work on column j
+         T multiplier=factor.value[p];
+         T missing=0;
+         int a=factor.colstart[k];
+         // first look for contributions to missing from dropped entries above the diagonal in column j
+         int b=0;
+         while(a<factor.colstart[k+1] && factor.rowindex[a]<j){
+            // look for factor.rowindex[a] in matrix.index[j] starting at b
+            while(b<(int)matrix.index[j].size()){
+               if(matrix.index[j][b]<factor.rowindex[a])
+                  ++b;
+               else if(matrix.index[j][b]==factor.rowindex[a])
+                  break;
+               else{
+                  missing+=factor.value[a];
+                  break;
+               }
+            }
+            ++a;
+         }
+         // adjust the diagonal j,j entry
+         if(a<factor.colstart[k+1] && factor.rowindex[a]==j){
+            factor.invdiag[j]-=multiplier*factor.value[a];
+         }
+         ++a;
+         // and now eliminate from the nonzero entries below the diagonal in column j (or add to missing if we can't)
+         b=factor.colstart[j];
+         while(a<factor.colstart[k+1] && b<factor.colstart[j+1]){
+            if(factor.rowindex[b]<factor.rowindex[a])
+               ++b;
+            else if(factor.rowindex[b]==factor.rowindex[a]){
+               factor.value[b]-=multiplier*factor.value[a];
+               ++a;
+               ++b;
+            }else{
+               missing+=factor.value[a];
+               ++a;
+            }
+         }
+         // and if there's anything left to do, add it to missing
+         while(a<factor.colstart[k+1]){
+            missing+=factor.value[a];
+            ++a;
+         }
+         // and do the final diagonal adjustment from the missing entries
+         factor.invdiag[j]-=modification_parameter*multiplier*missing;
+      }
+   }
+}
+
+//============================================================================
+// Solution routines with lower triangular matrix.
+
+// solve L*result=rhs
+template<class T>
+void solve_lower(const SparseColumnLowerFactor<T> &factor, const std::vector<T> &rhs, std::vector<T> &result)
+{
+   assert(factor.n==rhs.size());
+   assert(factor.n==result.size());
+   result=rhs;
+   for(int i=0; i<factor.n; ++i){
+      result[i]*=factor.invdiag[i];
+      for(int j=factor.colstart[i]; j<factor.colstart[i+1]; ++j){
+         result[factor.rowindex[j]]-=factor.value[j]*result[i];
+      }
+   }
+}
+
+// solve L^T*result=rhs
+template<class T>
+void solve_lower_transpose_in_place(const SparseColumnLowerFactor<T> &factor, std::vector<T> &x)
+{
+   assert(factor.n==(int)x.size());
+   assert(factor.n>0);
+   int i=factor.n;
+   do{
+      --i;
+      for(int j=factor.colstart[i]; j<factor.colstart[i+1]; ++j){
+         x[i]-=factor.value[j]*x[factor.rowindex[j]];
+      }
+      x[i]*=factor.invdiag[i];
+   }while(i!=0);
+}
+
+//============================================================================
+// Encapsulates the Conjugate Gradient algorithm with incomplete Cholesky
+// factorization preconditioner.
+
+template <class T>
+struct SparsePCGSolver
+{
+   SparsePCGSolver(void)
+   {
+      set_solver_parameters(1e-5, 100, 0.97, 0.25);
+   }
+
+   void set_solver_parameters(T tolerance_factor_, int max_iterations_, T modified_incomplete_cholesky_parameter_=0.97, T min_diagonal_ratio_=0.25)
+   {
+      tolerance_factor=tolerance_factor_;
+      if(tolerance_factor<1e-30) tolerance_factor=1e-30;
+      max_iterations=max_iterations_;
+      modified_incomplete_cholesky_parameter=modified_incomplete_cholesky_parameter_;
+      min_diagonal_ratio=min_diagonal_ratio_;
+   }
+
+   bool solve(const SparseMatrix<T> &matrix, const std::vector<T> &rhs, std::vector<T> &result, T &relative_residual_out, int &iterations_out, int precondition=2)
+   {
+      int n=matrix.n;
+      if((int)m.size()!=n){ m.resize(n); s.resize(n); z.resize(n); r.resize(n); }
+      zero(result);
+      r=rhs;
+      double residual_out=InstantBLAS<int,T>::abs_max(r);
+      if(residual_out==0) {
+         iterations_out=0;
+         return true;
+      }
+      //double tol=tolerance_factor*residual_out; // relative residual
+      double tol=tolerance_factor;
+	  double residual_0 = residual_out;
+
+      form_preconditioner(matrix, precondition);
+      apply_preconditioner( r, z, precondition);
+      double rho=InstantBLAS<int,T>::dot(z, r);
+      if(rho==0 || rho!=rho) {
+         iterations_out=0;
+         return false;
+      }
+
+      s=z;
+      fixed_matrix.construct_from_matrix(matrix);
+      int iteration;
+      for(iteration=0; iteration<max_iterations; ++iteration){
+         multiply(fixed_matrix, s, z);
+         double alpha=rho/InstantBLAS<int,T>::dot(s, z);
+         InstantBLAS<int,T>::add_scaled(alpha, s, result);
+         InstantBLAS<int,T>::add_scaled(-alpha, z, r);
+         residual_out=InstantBLAS<int,T>::abs_max(r);
+		 relative_residual_out = residual_out / residual_0;
+         if(residual_out<=tol) {
+            iterations_out=iteration+1;
+            return true; 
+         }
+         apply_preconditioner(r, z, precondition);
+         double rho_new=InstantBLAS<int,T>::dot(z, r);
+         double beta=rho_new/rho;
+         InstantBLAS<int,T>::add_scaled(beta, s, z); s.swap(z); // s=beta*s+z
+         rho=rho_new;
+      }
+      iterations_out=iteration;
+	  relative_residual_out = residual_out / residual_0;
+      return false;
+   }
+
+   protected:
+
+   // internal structures
+   SparseColumnLowerFactor<T> ic_factor; // modified incomplete cholesky factor
+   std::vector<T> m, z, s, r; // temporary vectors for PCG
+   FixedSparseMatrix<T> fixed_matrix; // used within loop
+
+   // parameters
+   T tolerance_factor;
+   int max_iterations;
+   T modified_incomplete_cholesky_parameter;
+   T min_diagonal_ratio;
+
+   void form_preconditioner(const SparseMatrix<T>& matrix, int precondition=2)
+   {
+		if(precondition==2) {
+			// incomplete cholesky
+			factor_modified_incomplete_cholesky0(matrix, ic_factor, modified_incomplete_cholesky_parameter, min_diagonal_ratio);
+		
+		} else if(precondition==1) {
+			// diagonal
+		    ic_factor.resize(matrix.n);
+		    zero(ic_factor.invdiag);
+			for(int i=0; i<matrix.n; ++i) {
+				  for(int j=0; j<(int)matrix.index[i].size(); ++j){
+					 if(matrix.index[i][j]==i){
+						ic_factor.invdiag[i] = 1./matrix.value[i][j];
+					 }
+				  }
+			}
+		}
+   }
+
+   void apply_preconditioner(const std::vector<T> &x, std::vector<T> &result, int precondition=2)
+   {
+		if (precondition==2) {
+			// incomplete cholesky
+			solve_lower(ic_factor, x, result);
+			solve_lower_transpose_in_place(ic_factor,result);
+		} else if(precondition==1) {
+			// diagonal
+			for(int_index i=0; i<(int_index)result.size(); ++i) {
+				result[i] = x[i] * ic_factor.invdiag[i];
+		    }
+		} else {
+			// off
+			result = x;
+		}
+   }
+};
+
+
+
+#undef parallel_for
+#undef parallel_end             
+#undef int_index
+
+#undef parallel_block 
+#undef do_parallel 
+#undef do_end
+#undef block_end
+
+#endif