Ex3

2019-11-29 10:58:50 +01:00
parent 996e84b414
commit 8c73ee7c4e
4 changed files with 1571 additions and 1260 deletions
--- a/Simulations/DiffusionSimulator.cpp
+++ b/Simulations/DiffusionSimulator.cpp
@@ -0,0 +1,159 @@
 #include "DiffusionSimulator.h"
 #include "pcgsolver.h"
 using namespace std;
 Grid::Grid() {
 }
 DiffusionSimulator::DiffusionSimulator()
 {
 	m_iTestCase = 0;
 	m_vfMovableObjectPos = Vec3();
 	m_vfMovableObjectFinalPos = Vec3();
 	m_vfRotate = Vec3();
 	// to be implemented
 }
 const char * DiffusionSimulator::getTestCasesStr(){
 	return "Explicit_solver, Implicit_solver";
 }
 void DiffusionSimulator::reset(){
 		m_mouse.x = m_mouse.y = 0;
 		m_trackmouse.x = m_trackmouse.y = 0;
 		m_oldtrackmouse.x = m_oldtrackmouse.y = 0;
 }
 void DiffusionSimulator::initUI(DrawingUtilitiesClass * DUC)
 {
 	this->DUC = DUC;
 	// to be implemented
 }
 void DiffusionSimulator::notifyCaseChanged(int testCase)
 {
 	m_iTestCase = testCase;
 	m_vfMovableObjectPos = Vec3(0, 0, 0);
 	m_vfRotate = Vec3(0, 0, 0);
 	//
 	//to be implemented
 	//
 	switch (m_iTestCase)
 	{
 	case 0:
 		cout << "Explicit solver!\n";
 		break;
 	case 1:
 		cout << "Implicit solver!\n";
 		break;
 	default:
 		cout << "Empty Test!\n";
 		break;
 	}
 }
 Grid* DiffusionSimulator::diffuseTemperatureExplicit() {//add your own parameters
 	Grid* newT = new Grid();
 	// to be implemented
 	//make sure that the temperature in boundary cells stays zero
 	return newT;
 }
 void setupB(std::vector<Real>& b) {//add your own parameters
 	// to be implemented
 	//set vector B[sizeX*sizeY]
 	for (int i = 0; i < 25; i++) {
 		b.at(i) = 0;
 	}
 }
 void fillT() {//add your own parameters
 	// to be implemented
 	//fill T with solved vector x
 	//make sure that the temperature in boundary cells stays zero
 }
 void setupA(SparseMatrix<Real>& A, double factor) {//add your own parameters
 	// to be implemented
 	//setup Matrix A[sizeX*sizeY*sizeZ, sizeX*sizeY*sizeZ]
 	// set with:  A.set_element( index1, index2 , value );
 	// if needed, read with: A(index1, index2);
 	// avoid zero rows in A -> set the diagonal value for boundary cells to 1.0
 	for (int i = 0; i < 25; i++) {
 			A.set_element(i, i, 1); // set diagonal
 	}
 }
 void DiffusionSimulator::diffuseTemperatureImplicit() {//add your own parameters
 	// solve A T = b
 	// to be implemented
 	const int N = 25;//N = sizeX*sizeY*sizeZ
 	SparseMatrix<Real> *A = new SparseMatrix<Real> (N);
 	std::vector<Real> *b = new std::vector<Real>(N);
 	setupA(*A, 0.1);
 	setupB(*b);
 	// perform solve
 	Real pcg_target_residual = 1e-05;
 	Real pcg_max_iterations = 1000;
 	Real ret_pcg_residual = 1e10;
 	int  ret_pcg_iterations = -1;
 	SparsePCGSolver<Real> solver;
 	solver.set_solver_parameters(pcg_target_residual, pcg_max_iterations, 0.97, 0.25);
 	std::vector<Real> x(N);
 	for (int j = 0; j < N; ++j) { x[j] = 0.; }
 	// preconditioners: 0 off, 1 diagonal, 2 incomplete cholesky
 	solver.solve(*A, *b, x, ret_pcg_residual, ret_pcg_iterations, 0);
 	// x contains the new temperature values
 	fillT();//copy x to T
 }
 void DiffusionSimulator::simulateTimestep(float timeStep)
 {
 	// to be implemented
 	// update current setup for each frame
 	switch (m_iTestCase)
 	{
 	case 0:
 		T = diffuseTemperatureExplicit();
 		break;
 	case 1:
 		diffuseTemperatureImplicit();
 		break;
 	}
 }
 void DiffusionSimulator::drawObjects()
 {
 	// to be implemented
 	//visualization
 }
 void DiffusionSimulator::drawFrame(ID3D11DeviceContext* pd3dImmediateContext)
 {
 	drawObjects();
 }
 void DiffusionSimulator::onClick(int x, int y)
 {
 	m_trackmouse.x = x;
 	m_trackmouse.y = y;
 }
 void DiffusionSimulator::onMouse(int x, int y)
 {
 	m_oldtrackmouse.x = x;
 	m_oldtrackmouse.y = y;
 	m_trackmouse.x = x;
 	m_trackmouse.y = y;
 }
--- a/Simulations/DiffusionSimulator.h
+++ b/Simulations/DiffusionSimulator.h
@@ -0,0 +1,51 @@
 #ifndef DIFFUSIONSIMULATOR_h
 #define DIFFUSIONSIMULATOR_h
 #include "Simulator.h"
 #include "vectorbase.h"
 //impement your own grid class for saving grid data
 class Grid {
 public:
 	// Construtors
 	Grid();
 private:
 	// Attributes
 };
 class DiffusionSimulator:public Simulator{
 public:
 	// Construtors
 	DiffusionSimulator();
 	// Functions
 	const char * getTestCasesStr();
 	void initUI(DrawingUtilitiesClass * DUC);
 	void reset();
 	void drawFrame(ID3D11DeviceContext* pd3dImmediateContext);
 	void notifyCaseChanged(int testCase);
 	void simulateTimestep(float timeStep);
 	void externalForcesCalculations(float timeElapsed) {};
 	void onClick(int x, int y);
 	void onMouse(int x, int y);
 	// Specific Functions
 	void drawObjects();
 	Grid* diffuseTemperatureExplicit();
 	void diffuseTemperatureImplicit();
 private:
 	// Attributes
 	Vec3  m_vfMovableObjectPos;
 	Vec3  m_vfMovableObjectFinalPos;
 	Vec3  m_vfRotate;
 	Point2D m_mouse;
 	Point2D m_trackmouse;
 	Point2D m_oldtrackmouse;
 	Grid *T; //save results of every time step
 };
 #endif
--- a/Simulations/util/pcgsolver.h
+++ b/Simulations/util/pcgsolver.h
@@ -0,0 +1,750 @@
 //
 //  Preconditioned conjugate gradient solver
 //
 //  Created by Robert Bridson, Ryoichi Ando and Nils Thuerey
 //
 #ifndef RCMATRIX3_H
 #define RCMATRIX3_H
 #include <iterator>
 #include <cassert>
 #include <vector>
 #include <fstream>
 #include <cmath>
 #include <functional>
 // index type 
 #define int_index long long
 // parallelization disabled
 #define parallel_for(size)  { int thread_number = 0; int_index parallel_index=0; for( int_index parallel_index=0; parallel_index<(int_index)size; parallel_index++ ) {
 #define parallel_end 	} thread_number=parallel_index=0; }
 #define parallel_block 
 #define do_parallel 
 #define do_end
 #define block_end
 #include "vectorbase.h"
 // note - "Int" instead of "N" here, the latter is size!
 template<class Int, class T>
 struct InstantBLAS {
 	static inline Int offset(Int N, Int incX) { return ((incX) > 0 ?  0 : ((N) - 1) * (-(incX))); }
 	static T cblas_ddot( const Int N, const T *X, const Int incX, const T *Y, const Int incY) {
 		double r  = 0.0; // always use double precision internally here...
 		Int    i;
 		Int    ix = offset(N,incX);
 		Int    iy = offset(N,incY);
 		for (i = 0; i < N; i++) {
 			r  += X[ix] * Y[iy];
 			ix += incX;
 			iy += incY;
 		}
 		return (T)r;
 	}
 	static void cblas_daxpy( const Int N, const T alpha, const T *X, const Int incX, T *Y, const Int incY) {
 		Int i;
 		if (N     <= 0  ) return;
 		if (alpha == 0.0) return;
 		if (incX == 1 && incY == 1) {
 			const Int m = N % 4;
 			for (i = 0; i < m; i++)
 				Y[i] += alpha * X[i];
 			for (i = m; i + 3 < N; i += 4) {
 				Y[i    ] += alpha * X[i    ];
 				Y[i + 1] += alpha * X[i + 1];
 				Y[i + 2] += alpha * X[i + 2];
 				Y[i + 3] += alpha * X[i + 3];
 			}
 		} else {
 			Int ix = offset(N, incX);
 			Int iy = offset(N, incY);
 			for (i = 0; i < N; i++) {
 				Y[iy] += alpha * X[ix];
 				ix    += incX;
 				iy    += incY;
 			}
 		}
 	}
 	// dot products ==============================================================
 	static inline T dot(const std::vector<T> &x, const std::vector<T> &y) {
 		return cblas_ddot((int)x.size(), &x[0], 1, &y[0], 1);
 	}
 	// inf-norm (maximum absolute value: index of max returned) ==================
 	static inline Int index_abs_max(const std::vector<T> &x) {
 		int maxind = 0;
 		T maxvalue = 0;
 		for(Int i = 0; i < (Int)x.size(); ++i) {
 			if(std::abs(x[i]) > maxvalue) {
 				maxvalue = fabs(x[i]);
 				maxind = i;
 			}
 		}
 		return maxind;
 	}
 	// inf-norm (maximum absolute value) =========================================
 	// technically not part of BLAS, but useful
 	static inline T abs_max(const std::vector<T> &x)
 	{ return std::abs(x[index_abs_max(x)]); }
 	// saxpy (y=alpha*x+y) =======================================================
 	static inline void add_scaled(T alpha, const std::vector<T> &x, std::vector<T> &y) {
 		cblas_daxpy((Int)x.size(), alpha, &x[0], 1, &y[0], 1);
 	}
 };
 template<class T>
 void zero(std::vector<T> &v)
 { for(int i=(int)v.size()-1; i>=0; --i) v[i]=0; }
 template<class T>
 void insert(std::vector<T> &a, unsigned int index, T e)
 {
   a.push_back(a.back());
   for(unsigned int i=(unsigned int)a.size()-1; i>index; --i)
      a[i]=a[i-1];
   a[index]=e;
 }
 template<class T>
 void erase(std::vector<T> &a, unsigned int index)
 {
   for(unsigned int i=index; i<a.size()-1; ++i)
      a[i]=a[i+1];
   a.pop_back();
 }
 //============================================================================
 // Dynamic compressed sparse row matrix.
 template<class T>
 struct SparseMatrix
 {
   int n; // dimension
   std::vector<std::vector<int> > index; // for each row, a list of all column indices (sorted)
   std::vector<std::vector<T> > value; // values corresponding to index
   explicit SparseMatrix(int n_=0, int expected_nonzeros_per_row=7)
      : n(n_), index(n_), value(n_)
   {
      for(int i=0; i<n; ++i){
         index[i].reserve(expected_nonzeros_per_row);
         value[i].reserve(expected_nonzeros_per_row);
      }
   }
   void clear(void)
   {
      n=0;
      index.clear();
      value.clear();
   }
   void zero(void)
   {
      for(int i=0; i<n; ++i){
         index[i].resize(0);
         value[i].resize(0);
      }
   }
   void resize(int n_)
   {
      n=n_;
      index.resize(n);
      value.resize(n);
   }
   T operator()(int i, int j) const
   {
      for(int k=0; k<(int)index[i].size(); ++k){
         if(index[i][k]==j) return value[i][k];
         else if(index[i][k]>j) return 0;
      }
      return 0;
   }
   void set_element(int i, int j, T new_value)
   {
      int k=0;
      for(; k<(int)index[i].size(); ++k){
         if(index[i][k]==j){
            value[i][k]=new_value;
            return;
         }else if(index[i][k]>j){
            insert(index[i], k, j);
            insert(value[i], k, new_value);
            return;
         }
      }
      index[i].push_back(j);
      value[i].push_back(new_value);
   }
   void add_to_element(int i, int j, T increment_value)
   {
      int k=0;
      for(; k<(int)index[i].size(); ++k){
         if(index[i][k]==j){
            value[i][k]+=increment_value;
            return;
         }else if(index[i][k]>j){
            insert(index[i], k, j);
            insert(value[i], k, increment_value);
            return;
         }
      }
      index[i].push_back(j);
      value[i].push_back(increment_value);
   }
   // assumes indices is already sorted
   void add_sparse_row(int i, const std::vector<int> &indices, const std::vector<T> &values)
   {
      int j=0, k=0;
      while(j<indices.size() && k<(int)index[i].size()){
         if(index[i][k]<indices[j]){
            ++k;
         }else if(index[i][k]>indices[j]){
            insert(index[i], k, indices[j]);
            insert(value[i], k, values[j]);
            ++j;
         }else{
            value[i][k]+=values[j];
            ++j;
            ++k;
         }
      }
      for(;j<indices.size(); ++j){
         index[i].push_back(indices[j]);
         value[i].push_back(values[j]);
      }
   }
   // assumes matrix has symmetric structure - so the indices in row i tell us which columns to delete i from
   void symmetric_remove_row_and_column(int i)
   {
      for(int a=0; a<index[i].size(); ++a){
         int j=index[i][a]; // 
         for(int b=0; b<index[j].size(); ++b){
            if(index[j][b]==i){
               erase(index[j], b);
               erase(value[j], b);
               break;
            }
         }
      }
      index[i].resize(0);
      value[i].resize(0);
   }
   void write_matlab(std::ostream &output, const char *variable_name)
   {
      output<<variable_name<<"=sparse([";
      for(int i=0; i<n; ++i){
         for(int j=0; j<index[i].size(); ++j){
            output<<i+1<<" ";
         }
      }
      output<<"],...\n  [";
      for(int i=0; i<n; ++i){
         for(int j=0; j<index[i].size(); ++j){
            output<<index[i][j]+1<<" ";
         }
      }
      output<<"],...\n  [";
      for(int i=0; i<n; ++i){
         for(int j=0; j<value[i].size(); ++j){
            output<<value[i][j]<<" ";
         }
      }
      output<<"], "<<n<<", "<<n<<");"<<std::endl;
   }
 };
 typedef SparseMatrix<float> SparseMatrixf;
 typedef SparseMatrix<double> SparseMatrixd;
 // perform result=matrix*x
 template<class T>
 void multiply(const SparseMatrix<T> &matrix, const std::vector<T> &x, std::vector<T> &result)
 {
   assert(matrix.n==x.size());
   result.resize(matrix.n);
   //for(int i=0; i<matrix.n; ++i)
 	parallel_for(matrix.n) {
 		unsigned i (parallel_index);
 		T value=0;
 		for(int j=0; j<(int)matrix.index[i].size(); ++j){
 			value+=matrix.value[i][j]*x[matrix.index[i][j]];
 		}
 		result[i]=value;
 	} parallel_end
 }
 // perform result=result-matrix*x
 template<class T>
 void multiply_and_subtract(const SparseMatrix<T> &matrix, const std::vector<T> &x, std::vector<T> &result)
 {
   assert(matrix.n==x.size());
   result.resize(matrix.n);
   for(int i=0; i<(int)matrix.n; ++i){
      for(int j=0; j<(int)matrix.index[i].size(); ++j){
         result[i]-=matrix.value[i][j]*x[matrix.index[i][j]];
      }
   }
 }
 //============================================================================
 // Fixed version of SparseMatrix. This is not a good structure for dynamically
 // modifying the matrix, but can be significantly faster for matrix-vector
 // multiplies due to better data locality.
 template<class T>
 struct FixedSparseMatrix
 {
   int n; // dimension
   std::vector<T> value; // nonzero values row by row
   std::vector<int> colindex; // corresponding column indices
   std::vector<int> rowstart; // where each row starts in value and colindex (and last entry is one past the end, the number of nonzeros)
   explicit FixedSparseMatrix(int n_=0)
      : n(n_), value(0), colindex(0), rowstart(n_+1)
   {}
   void clear(void)
   {
      n=0;
      value.clear();
      colindex.clear();
      rowstart.clear();
   }
   void resize(int n_)
   {
      n=n_;
      rowstart.resize(n+1);
   }
   void construct_from_matrix(const SparseMatrix<T> &matrix)
   {
      resize(matrix.n);
      rowstart[0]=0;
      for(int i=0; i<n; ++i){
         rowstart[i+1]=rowstart[i]+matrix.index[i].size();
      }
      value.resize(rowstart[n]);
      colindex.resize(rowstart[n]);
      int j=0;
      for(int i=0; i<n; ++i){
         for(int k=0; k<(int)matrix.index[i].size(); ++k){
            value[j]=matrix.value[i][k];
            colindex[j]=matrix.index[i][k];
            ++j;
         }
      }
   }
   void write_matlab(std::ostream &output, const char *variable_name)
   {
      output<<variable_name<<"=sparse([";
      for(int i=0; i<n; ++i){
         for(int j=rowstart[i]; j<rowstart[i+1]; ++j){
            output<<i+1<<" ";
         }
      }
      output<<"],...\n  [";
      for(int i=0; i<n; ++i){
         for(int j=rowstart[i]; j<rowstart[i+1]; ++j){
            output<<colindex[j]+1<<" ";
         }
      }
      output<<"],...\n  [";
      for(int i=0; i<n; ++i){
         for(int j=rowstart[i]; j<rowstart[i+1]; ++j){
            output<<value[j]<<" ";
         }
      }
      output<<"], "<<n<<", "<<n<<");"<<std::endl;
   }
 };
 // perform result=matrix*x
 template<class T>
 void multiply(const FixedSparseMatrix<T> &matrix, const std::vector<T> &x, std::vector<T> &result)
 {
   assert(matrix.n==x.size());
   result.resize(matrix.n);
   //for(int i=0; i<matrix.n; ++i)
 	parallel_for(matrix.n) {
 		unsigned i (parallel_index);
 		T value=0;
 		for(int j=matrix.rowstart[i]; j<matrix.rowstart[i+1]; ++j){
 			value+=matrix.value[j]*x[matrix.colindex[j]];
 		}
 		result[i]=value;
 	} parallel_end
 }
 // perform result=result-matrix*x
 template<class T>
 void multiply_and_subtract(const FixedSparseMatrix<T> &matrix, const std::vector<T> &x, std::vector<T> &result)
 {
   assert(matrix.n==x.size());
   result.resize(matrix.n);
   for(int i=0; i<matrix.n; ++i){
      for(int j=matrix.rowstart[i]; j<matrix.rowstart[i+1]; ++j){
         result[i]-=matrix.value[j]*x[matrix.colindex[j]];
      }
   }
 }
 //============================================================================
 // A simple compressed sparse column data structure (with separate diagonal)
 // for lower triangular matrices
 template<class T>
 struct SparseColumnLowerFactor
 {
   int n;
   std::vector<T> invdiag; // reciprocals of diagonal elements
   std::vector<T> value; // values below the diagonal, listed column by column
   std::vector<int> rowindex; // a list of all row indices, for each column in turn
   std::vector<int> colstart; // where each column begins in rowindex (plus an extra entry at the end, of #nonzeros)
   std::vector<T> adiag; // just used in factorization: minimum "safe" diagonal entry allowed
   explicit SparseColumnLowerFactor(int n_=0)
      : n(n_), invdiag(n_), colstart(n_+1), adiag(n_)
   {}
   void clear(void)
   {
      n=0;
      invdiag.clear();
      value.clear();
      rowindex.clear();
      colstart.clear();
      adiag.clear();
   }
   void resize(int n_)
   {
      n=n_;
      invdiag.resize(n);
      colstart.resize(n+1);
      adiag.resize(n);
   }
   void write_matlab(std::ostream &output, const char *variable_name)
   {
      output<<variable_name<<"=sparse([";
      for(int i=0; i<n; ++i){
         output<<" "<<i+1;
         for(int j=colstart[i]; j<colstart[i+1]; ++j){
            output<<" "<<rowindex[j]+1;
         }
      }
      output<<"],...\n  [";
      for(int i=0; i<n; ++i){
         output<<" "<<i+1;
         for(int j=colstart[i]; j<colstart[i+1]; ++j){
            output<<" "<<i+1;
         }
      }
      output<<"],...\n  [";
      for(int i=0; i<n; ++i){
         output<<" "<<(invdiag[i]!=0 ? 1/invdiag[i] : 0);
         for(int j=colstart[i]; j<colstart[i+1]; ++j){
            output<<" "<<value[j];
         }
      }
      output<<"], "<<n<<", "<<n<<");"<<std::endl;
   }
 };
 //============================================================================
 // Incomplete Cholesky factorization, level zero, with option for modified version.
 // Set modification_parameter between zero (regular incomplete Cholesky) and
 // one (fully modified version), with values close to one usually giving the best
 // results. The min_diagonal_ratio parameter is used to detect and correct
 // problems in factorization: if a pivot is this much less than the diagonal
 // entry from the original matrix, the original matrix entry is used instead.
 template<class T>
 void factor_modified_incomplete_cholesky0(const SparseMatrix<T> &matrix, SparseColumnLowerFactor<T> &factor,
                                          T modification_parameter=0.97, T min_diagonal_ratio=0.25)
 {
   // first copy lower triangle of matrix into factor (Note: assuming A is symmetric of course!)
   factor.resize(matrix.n);
   zero(factor.invdiag); // important: eliminate old values from previous solves!
   factor.value.resize(0);
   factor.rowindex.resize(0);
   zero(factor.adiag);
   for(int i=0; i<matrix.n; ++i){
      factor.colstart[i]=(int)factor.rowindex.size();
      for(int j=0; j<(int)matrix.index[i].size(); ++j){
         if(matrix.index[i][j]>i){
            factor.rowindex.push_back(matrix.index[i][j]);
            factor.value.push_back(matrix.value[i][j]);
         }else if(matrix.index[i][j]==i){
            factor.invdiag[i]=factor.adiag[i]=matrix.value[i][j];
         }
      }
   }
   factor.colstart[matrix.n]=(int)factor.rowindex.size();
   // now do the incomplete factorization (figure out numerical values)
   // MATLAB code:
   // L=tril(A);
   // for k=1:size(L,2)
   //   L(k,k)=sqrt(L(k,k));
   //   L(k+1:end,k)=L(k+1:end,k)/L(k,k);
   //   for j=find(L(:,k))'
   //     if j>k
   //       fullupdate=L(:,k)*L(j,k);
   //       incompleteupdate=fullupdate.*(A(:,j)~=0);
   //       missing=sum(fullupdate-incompleteupdate);
   //       L(j:end,j)=L(j:end,j)-incompleteupdate(j:end);
   //       L(j,j)=L(j,j)-omega*missing;
   //     end
   //   end
   // end
   for(int k=0; k<matrix.n; ++k){
      if(factor.adiag[k]==0) continue; // null row/column
      // figure out the final L(k,k) entry
      if(factor.invdiag[k]<min_diagonal_ratio*factor.adiag[k])
         factor.invdiag[k]=1/sqrt(factor.adiag[k]); // drop to Gauss-Seidel here if the pivot looks dangerously small
      else
         factor.invdiag[k]=1/sqrt(factor.invdiag[k]);
      // finalize the k'th column L(:,k)
      for(int p=factor.colstart[k]; p<factor.colstart[k+1]; ++p){
         factor.value[p]*=factor.invdiag[k];
      }
      // incompletely eliminate L(:,k) from future columns, modifying diagonals
      for(int p=factor.colstart[k]; p<factor.colstart[k+1]; ++p){
         int j=factor.rowindex[p]; // work on column j
         T multiplier=factor.value[p];
         T missing=0;
         int a=factor.colstart[k];
         // first look for contributions to missing from dropped entries above the diagonal in column j
         int b=0;
         while(a<factor.colstart[k+1] && factor.rowindex[a]<j){
            // look for factor.rowindex[a] in matrix.index[j] starting at b
            while(b<(int)matrix.index[j].size()){
               if(matrix.index[j][b]<factor.rowindex[a])
                  ++b;
               else if(matrix.index[j][b]==factor.rowindex[a])
                  break;
               else{
                  missing+=factor.value[a];
                  break;
               }
            }
            ++a;
         }
         // adjust the diagonal j,j entry
         if(a<factor.colstart[k+1] && factor.rowindex[a]==j){
            factor.invdiag[j]-=multiplier*factor.value[a];
         }
         ++a;
         // and now eliminate from the nonzero entries below the diagonal in column j (or add to missing if we can't)
         b=factor.colstart[j];
         while(a<factor.colstart[k+1] && b<factor.colstart[j+1]){
            if(factor.rowindex[b]<factor.rowindex[a])
               ++b;
            else if(factor.rowindex[b]==factor.rowindex[a]){
               factor.value[b]-=multiplier*factor.value[a];
               ++a;
               ++b;
            }else{
               missing+=factor.value[a];
               ++a;
            }
         }
         // and if there's anything left to do, add it to missing
         while(a<factor.colstart[k+1]){
            missing+=factor.value[a];
            ++a;
         }
         // and do the final diagonal adjustment from the missing entries
         factor.invdiag[j]-=modification_parameter*multiplier*missing;
      }
   }
 }
 //============================================================================
 // Solution routines with lower triangular matrix.
 // solve L*result=rhs
 template<class T>
 void solve_lower(const SparseColumnLowerFactor<T> &factor, const std::vector<T> &rhs, std::vector<T> &result)
 {
   assert(factor.n==rhs.size());
   assert(factor.n==result.size());
   result=rhs;
   for(int i=0; i<factor.n; ++i){
      result[i]*=factor.invdiag[i];
      for(int j=factor.colstart[i]; j<factor.colstart[i+1]; ++j){
         result[factor.rowindex[j]]-=factor.value[j]*result[i];
      }
   }
 }
 // solve L^T*result=rhs
 template<class T>
 void solve_lower_transpose_in_place(const SparseColumnLowerFactor<T> &factor, std::vector<T> &x)
 {
   assert(factor.n==(int)x.size());
   assert(factor.n>0);
   int i=factor.n;
   do{
      --i;
      for(int j=factor.colstart[i]; j<factor.colstart[i+1]; ++j){
         x[i]-=factor.value[j]*x[factor.rowindex[j]];
      }
      x[i]*=factor.invdiag[i];
   }while(i!=0);
 }
 //============================================================================
 // Encapsulates the Conjugate Gradient algorithm with incomplete Cholesky
 // factorization preconditioner.
 template <class T>
 struct SparsePCGSolver
 {
   SparsePCGSolver(void)
   {
      set_solver_parameters(1e-5, 100, 0.97, 0.25);
   }
   void set_solver_parameters(T tolerance_factor_, int max_iterations_, T modified_incomplete_cholesky_parameter_=0.97, T min_diagonal_ratio_=0.25)
   {
      tolerance_factor=tolerance_factor_;
      if(tolerance_factor<1e-30) tolerance_factor=1e-30;
      max_iterations=max_iterations_;
      modified_incomplete_cholesky_parameter=modified_incomplete_cholesky_parameter_;
      min_diagonal_ratio=min_diagonal_ratio_;
   }
   bool solve(const SparseMatrix<T> &matrix, const std::vector<T> &rhs, std::vector<T> &result, T &relative_residual_out, int &iterations_out, int precondition=2)
   {
      int n=matrix.n;
      if((int)m.size()!=n){ m.resize(n); s.resize(n); z.resize(n); r.resize(n); }
      zero(result);
      r=rhs;
      double residual_out=InstantBLAS<int,T>::abs_max(r);
      if(residual_out==0) {
         iterations_out=0;
         return true;
      }
      //double tol=tolerance_factor*residual_out; // relative residual
      double tol=tolerance_factor;
 	  double residual_0 = residual_out;
      form_preconditioner(matrix, precondition);
      apply_preconditioner( r, z, precondition);
      double rho=InstantBLAS<int,T>::dot(z, r);
      if(rho==0 || rho!=rho) {
         iterations_out=0;
         return false;
      }
      s=z;
      fixed_matrix.construct_from_matrix(matrix);
      int iteration;
      for(iteration=0; iteration<max_iterations; ++iteration){
         multiply(fixed_matrix, s, z);
         double alpha=rho/InstantBLAS<int,T>::dot(s, z);
         InstantBLAS<int,T>::add_scaled(alpha, s, result);
         InstantBLAS<int,T>::add_scaled(-alpha, z, r);
         residual_out=InstantBLAS<int,T>::abs_max(r);
 		 relative_residual_out = residual_out / residual_0;
         if(residual_out<=tol) {
            iterations_out=iteration+1;
            return true; 
         }
         apply_preconditioner(r, z, precondition);
         double rho_new=InstantBLAS<int,T>::dot(z, r);
         double beta=rho_new/rho;
         InstantBLAS<int,T>::add_scaled(beta, s, z); s.swap(z); // s=beta*s+z
         rho=rho_new;
      }
      iterations_out=iteration;
 	  relative_residual_out = residual_out / residual_0;
      return false;
   }
   protected:
   // internal structures
   SparseColumnLowerFactor<T> ic_factor; // modified incomplete cholesky factor
   std::vector<T> m, z, s, r; // temporary vectors for PCG
   FixedSparseMatrix<T> fixed_matrix; // used within loop
   // parameters
   T tolerance_factor;
   int max_iterations;
   T modified_incomplete_cholesky_parameter;
   T min_diagonal_ratio;
   void form_preconditioner(const SparseMatrix<T>& matrix, int precondition=2)
   {
 		if(precondition==2) {
 			// incomplete cholesky
 			factor_modified_incomplete_cholesky0(matrix, ic_factor, modified_incomplete_cholesky_parameter, min_diagonal_ratio);
 		} else if(precondition==1) {
 			// diagonal
 		    ic_factor.resize(matrix.n);
 		    zero(ic_factor.invdiag);
 			for(int i=0; i<matrix.n; ++i) {
 				  for(int j=0; j<(int)matrix.index[i].size(); ++j){
 					 if(matrix.index[i][j]==i){
 						ic_factor.invdiag[i] = 1./matrix.value[i][j];
 					 }
 				  }
 			}
 		}
   }
   void apply_preconditioner(const std::vector<T> &x, std::vector<T> &result, int precondition=2)
   {
 		if (precondition==2) {
 			// incomplete cholesky
 			solve_lower(ic_factor, x, result);
 			solve_lower_transpose_in_place(ic_factor,result);
 		} else if(precondition==1) {
 			// diagonal
 			for(int_index i=0; i<(int_index)result.size(); ++i) {
 				result[i] = x[i] * ic_factor.invdiag[i];
 		    }
 		} else {
 			// off
 			result = x;
 		}
   }
 };
 #undef parallel_for
 #undef parallel_end             
 #undef int_index
 #undef parallel_block 
 #undef do_parallel 
 #undef do_end
 #undef block_end
 #endif
--- a/Simulations/util/vectorbase.h
+++ b/Simulations/util/vectorbase.h