@conference {Kasmi2015, title = {Taking advantage of GPU/CPU architectures for sparse Conjugate Gradient solver computation}, booktitle = {Proceedings of 2015 IEEE World Conference on Complex Systems, WCCS 2015}, year = {2015}, note = {cited By 0}, abstract = {Solving large sparse linear systems is a time and energy consuming process. This paper presents an efficient exploitation of graphic processing units (GPUs) for accelerating Conjugate Gradient iterative solver (CG). We use the high-level software library PARALUTION for sparse linear algebra on multi/many-core systems, which supports GPU (with CUDA and OpenCL) and Multi-CPU implementations of CG method using different storage formats. We discuss and compare performance using three platforms. {\textcopyright} 2015 IEEE.}, doi = {10.1109/ICoCS.2015.7483268}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84978427957\&doi=10.1109\%2fICoCS.2015.7483268\&partnerID=40\&md5=af973d4adddb0778d096c29e4a71e051}, author = {Kasmi, N.a and Zbakh, M.a and Mahmoudi, S.A.b and Manneback, P.b} } @conference {Kasmi201423, title = {Performance evaluation of sparse matrix-vector product (SpMV) computation on GPU architecture}, booktitle = {2014 2nd World Conference on Complex Systems, WCCS 2014}, year = {2014}, note = {cited By 1}, pages = {23-27}, abstract = {Sparse matrices are entailed in many linear algebra problems such as linear systems resolution, matrix eigen-values/vectors computation and partial differential equations, wherefore sparse matrix vector product (SpMV) constitutes a basic kernel for solving many scientific and engineering applications problems. With the appearance of Graphics Processing Units (GPUs) as platforms that provides important acceleration factors, the optimization of SpMV on GPUs and its implementation has been a subject of broad research for the last few years. In this work we present a comparative evaluation of sparse matrix vector product (SpMV) on different platforms. We use Cusp library on CUDA architecture GPUs and MKL Intel library as reference on CPUs. Experimental results have been conducted using a set of matrices from matrix market repository1, comparing performance between GPU-based Cusp2 and CPU-based MKL3 libraries. The results showed a global speedup, obtained with GPU, ranging from 1.1 x to 4.6 x compared to CPU implementations. An analysis and evaluation of these results is discussed. {\textcopyright} 2014 IEEE.}, doi = {10.1109/ICoCS.2014.7060964}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84929179718\&doi=10.1109\%2fICoCS.2014.7060964\&partnerID=40\&md5=5a761f6d02607bb6f754f683b3a7878f}, author = {Kasmi, N.a and Mahmoudi, S.A.b and Zbakh, M.a and Manneback, P.b} }