System

Test 2: Random Order Initialization

The following program create_sparse_pattern.cc (see the FLENS examples directory) first generates a vector of length n with random values. Next, it generates random row and column indices and increments . These triples are stored in a file and can be used for initializing a sparse matrix according to

Further stored in the file are vectors and .

    #include <flens/flens.h>
    #include <fstream>
    #include <sstream>
  
    using namespace flens;
    using namespace std;
  
    typedef DenseVector<Array<double> > Vec;
  
    int
    main(int argc, char **argv)
    {
        ofstream out("sparse_pattern.dat");
        int n = 10000000;
        int k = 3;
        if (argc>1) {
            n = atoi(argv[1]);
        }
        if (argc>2) {
            k = atoi(argv[2]);
        }
        out << n << endl;
        out << k << endl;
  
        Vec x(n);
        for (int i=1; i<=n; ++i) {
            int pos = rand() % n +1;
            int incr = rand() % 10 +1;
            x(pos) += incr;
        }
  
        // b1 =  A*x
        // b2 = x'*A
        Vec b1(n), b2(n);
        for (int i=1; i<=k*n; ++i) {
            int row = rand() % n +1;
            int col = rand() % n +1;
            int incr = rand() % 10 +1;
            out << row << " " << col << " " << incr << endl;
            b1(row) += incr*x(col);
            b2(col) += incr*x(row);
        }
  
        for (int i=1; i<=n; ++i) {
            out << x(i) << " " << b1(i) << " " << b2(i) << endl;
        }
    }

FLENS example

residual_sparse2.cc reads in the generated test-file and computes and :

  #include <flens/flens.h>
  #include <timer.h>
  #include <fstream>
  
  using namespace flens;
  using namespace std;
  
  typedef SparseGeMatrix<CRS<double> > Mat;
  typedef DenseVector<Array<double> >  Vec;
  
  int
  main()
  {
      ifstream in("sparse_pattern.dat");
      int n, k;
      in >> n >> k;
  
      Mat A(n, n, k);
      Vec x(n), b1(n), b2(n), r(n);
  
      {
          timer t;
  
          t.tic();
          for (int i=1; i<=k*n; ++i) {
              int row, col, incr;
  
              in >> row >> col >> incr;
              A(row, col) += incr;
          }
          A.finalize();
          cout << "fill: " << t.toc() << endl;
  
          for (int i=1; i<=n; ++i) {
              int x_, b1_, b2_;
  
              in >> x_ >> b1_ >> b2_;
              x(i) = x_;
              b1(i) = b1_;
              b2(i) = b2_;
          }
      }
  
      {
          timer t;
  
          t.tic();
          r = b1 - A*x;
          cout << "r = b - A*x: " << t.toc()
               << " (" << nrm2(r) << ")" << endl;
      }
  
      {
          timer t;
  
          t.tic();
          r = b2 - x*A;
          cout << "r = b - x*A: " << t.toc()
               << " (" << nrm2(r) << ")" << endl;
      }
  }

Now, the initialization also requires sorting. Internally A.finalize() triggers the conversion from coordinate storage to compressed row storage (so in the following output fill refers to initialize coordinate storage and convert to compressed row storage). Here some test runs showing that FLENS is scaling fine for this job:

  lehn@node31:~/flens_new/FLENS-lite/examples> ./residual_sparse2
  fill: 0.32
  r = b - A*x: 0 (0)
  r = b - x*A: 0.01 (0)
  lehn@node31:~/flens_new/FLENS-lite/examples> ./create_sparse_pattern 100000 5
  lehn@node31:~/flens_new/FLENS-lite/examples> ./residual_sparse2
  fill: 0.53
  r = b - A*x: 0.01 (0)
  r = b - x*A: 0.01 (0)
  lehn@node31:~/flens_new/FLENS-lite/examples> ./create_sparse_pattern 1000000 5
  lehn@node31:~/flens_new/FLENS-lite/examples> ./residual_sparse2
  fill: 4.6
  r = b - A*x: 0.22 (0)
  r = b - x*A: 0.23 (0)
  lehn@node31:~/flens_new/FLENS-lite/examples> ./create_sparse_pattern 10000000 5
  lehn@node31:~/flens_new/FLENS-lite/examples> ./residual_sparse2
  fill: 47.22
  r = b - A*x: 2.54 (0)
  r = b - x*A: 2.73 (0)

uBLAS Example

The following uBLAS example is implemented analogously to the above FLENS example:

    // -*- c++ -*- //
    /** \file main.cpp  \brief main routines */
    /***************************************************************************
     -   begin                : 2003-09-05
     -   copyright            : (C) 2003 by Gunter Winkler
     -   email                : guwi17@gmx.de
     ***************************************************************************/
  
    /***************************************************************************
     *                                                                         *
     *   This program is free software; you can redistribute it and/or modify  *
     *   it under the terms of the GNU General Public License as published by  *
     *   the Free Software Foundation; either version 2 of the License, or     *
     *   (at your option) any later version.                                   *
     *                                                                         *
     ***************************************************************************/
  
    #include <iostream>
    #include <fstream>
  
    #include <boost/timer.hpp>
  
    #include <boost/numeric/ublas/matrix.hpp>
    #include <boost/numeric/ublas/matrix_sparse.hpp>
    #include <boost/numeric/ublas/vector.hpp>
    #include <boost/numeric/ublas/io.hpp>
    #include <boost/numeric/ublas/operation.hpp>
  
  
    #ifndef NDEBUG
    const size_t default_size = 100;
    #else
    const size_t default_size = 10000000;
    #endif
  
    const size_t subsize = 3;
  
    using namespace boost::numeric::ublas ;
  
    using std::cout;
    using std::endl;
    using std::ifstream;
  
    namespace boost { namespace numeric { namespace ublas {
  
        /** \brief compute residual r = b - Ax
         *
         */
        template<class V, class T1, class IA1, class TA1, class E2>
        void.
        residual ( const compressed_matrix<T1, row_major, 0, IA1, TA1> & A,
                   const vector_expression<E2> &x,
                   const vector_expression<E2> &b,
                   V &r, row_major_tag) {
  
            BOOST_UBLAS_CHECK( x().size() >= A.size2(), external_logic() );
            BOOST_UBLAS_CHECK( b().size() >= A.size1(), external_logic() );
            BOOST_UBLAS_CHECK( r.size() >= A.size1(), external_logic() );
  
            typedef typename V::size_type size_type;
            typedef typename V::value_type value_type;
  
            for (size_type i = 0; i < A.size1 (); ++ i) {
                size_type begin = A.index1_data () [i];
                size_type end = A.index1_data () [i + 1];
                value_type t (b() (i));
                for (size_type j = begin; j < end; ++ j)
                    t -= A.value_data () [j] * x () (A.index2_data () [j]);
                r (i) = t;
            }
            return;
        }
  
    }}}
  
    template <class MAT>
    void matrix_fill(MAT& A)
    {
      typedef typename MAT::size_type  size_type;
  
      for (size_type i=0; i<A.size1(); ++i)|{
        if (i>=1) { A.push_back(i,i-1,-1.0); }
        A.push_back(i,i,4);
        if (i+1<A.size2()) { A.push_back(i,i+1,-1.0); }
      }
    }
  
    int main(int argc, char *argv[])
    {
  
      ifstream in("sparse_pattern.dat");
      typedef size_t  size_type;
      size_type size, k;
      in >> size >> k;
  
      compressed_matrix<double, row_major> A(size,size);
      coordinate_matrix<double, row_major> Ah(size,size);
      vector<double>                       x(size), b1(size), b2(size), r(size);
      {
        boost::timer t;
        t.restart();
        for (size_type i=1; i<=k*size; ++i) {
          int row, col, incr;
          in >> row >> col >> incr;
          Ah.push_back(row-1, col-1, incr);
        }
        cout << "fill " << t.elapsed() << endl;
  
        for (size_type i=0; i<size; ++i) {
          int x_, b1_, b2_;
          in >> x_ >> b1_ >> b2_;
          x(i) = x_;
          b1(i) = b1_;
          b2(i) = b2_;
        }
      }
  
      {
        boost::timer t;
        t.restart();
        A = Ah;
        cout << "convert " << t.elapsed() << endl;
      }
  
      {
        boost::timer t;
        t.restart();
        vector<double>                       r(size);
        for (int k=1; k<=20; ++k) {
          residual(A,x,b1,r,row_major_tag());
        }
        cout << "residual(A,x,b,r,row_major_tag()) : " << t.elapsed()
             << " (" << norm_2(r) << ")" << endl;
      }
  
      return EXIT_SUCCESS;
    }

However, this example does not scale that well:

  lehn@node31:~/ublas2_test> ./ex_residual2
  fill 0.03
  convert 0.33
  residual(A,x,b,r,row_major_tag()) : 0 (550313)
  lehn@node31:~/ublas2_test> ../flens_new/FLENS-lite/examples/create_sparse_pattern 50000 5
  lehn@node31:~/ublas2_test> ./ex_residual2
  fill 0.18
  convert 82.8
  residual(A,x,b,r,row_major_tag()) : 0.05 (5.05958e+06)
  lehn@node31:~/ublas2_test> ../flens_new/FLENS-lite/examples/create_sparse_pattern 100000 5
  lehn@node31:~/ublas2_test> ./ex_residual2
  fill 0.44

...the last run did not finish after hours.