Marco Edoardo Santimaria
Parallel Computing group
Via Pessinetto 12, 10149 Torino – Italy
Email: marcoedoardo.santimaria@unito.it
Short Bio
Marco Edoardo Santimaria is a PhD student at the University of Turin. He is the main developer of CAPIO, and its interest are on scientific workflows, low level system programming and network sciences
Fields of interest
- HPC
- Parallel Computing
- Networking
- Scientific workflows
- Network sciences
Publication
2024
Marco Edoardo Santimaria, Samuele Fonio, Giulio Malenza, Iacopo Colonnelli, Marco Aldinucci
Benchmarking Parallelization Models through Karmarkar Interior-point method Proceedings Article
In: Chis, Horacio González-Vélez Adriana E. (Ed.): Proc. of 32nd Euromicro intl. Conference on Parallel, Distributed and Network-based Processing (PDP), pp. 1-8, IEEE, Dublin, Ireland, 2024, ISSN: 2377-5750.
Abstract | Links | BibTeX | Tags: HPC, icsc
@inproceedings{24:pdp:karmarkar,
title = {Benchmarking Parallelization Models through Karmarkar Interior-point method},
author = {Marco Edoardo Santimaria and Samuele Fonio and Giulio Malenza and Iacopo Colonnelli and Marco Aldinucci},
editor = {Horacio González-Vélez Adriana E. Chis},
url = {https://hdl.handle.net/2318/1964571},
doi = {10.1109/PDP62718.2024.00010},
issn = {2377-5750},
year = {2024},
date = {2024-03-01},
booktitle = {Proc. of 32nd Euromicro intl. Conference on Parallel, Distributed and Network-based Processing (PDP)},
pages = {1-8},
publisher = {IEEE},
address = {Dublin, Ireland},
abstract = {Optimization problems are one of the main focus of scientific research. Their computational-intensive nature makes them prone to be parallelized with consistent improvements in performance. This paper sheds light on different parallel models for accelerating Karmarkar's Interior-point method. To do so, we assess parallelization strategies for individual operations within the aforementioned Karmarkar's algorithm using OpenMP, GPU acceleration with CUDA, and the recent Parallel Standard C++ Linear Algebra library (PSTL) executing both on GPU and CPU. Our different implementations yield interesting benchmark results that show the optimal approach for parallelizing interior point algorithms for general Linear Programming (LP) problems. In addition, we propose a more theoretical perspective of the parallelization of this algorithm, with a detailed study of our OpenMP implementation, showing the limits of optimizing the single operations},
keywords = {HPC, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Giulio Malenza, Valentina Cesare, Marco Edoardo Santimaria, Robert Birke, Alberto Vecchiato, Ugo Becciani, Marco Aldinucci
Performance portability via C++ PSTL, SYCL, OpenMP, and HIP: the Gaia AVU-GSR case study Proceedings Article
In: SC24-W: Workshops of the International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 1152-1163, IEEE, 2024, ISBN: 979-8-3503-5554-3.
Abstract | Links | BibTeX | Tags: eupex, icsc
@inproceedings{Malenza_P3HPC_24,
title = {Performance portability via C++ PSTL, SYCL, OpenMP, and HIP: the Gaia AVU-GSR case study},
author = {Giulio Malenza and Valentina Cesare and Marco Edoardo Santimaria and Robert Birke and Alberto Vecchiato and Ugo Becciani and Marco Aldinucci},
url = {https://conferences.computer.org/sc-wpub/pdfs/SC-W2024-6oZmigAQfgJ1GhPL0yE3pS/555400b152/555400b152.pdf},
doi = {10.1109/SCW63240.2024.00157},
isbn = {979-8-3503-5554-3},
year = {2024},
date = {2024-01-01},
booktitle = {SC24-W: Workshops of the International Conference for High Performance Computing, Networking, Storage and Analysis},
pages = {1152-1163},
publisher = {IEEE},
abstract = {Applications that analyze data from modern scientific experiments will soon require a computing capacity of ExaFLOPs. The current trend to achieve such performance is to employ GPU-accelerated supercomputers and design applications to optimally exploit this hardware. Since each supercomputer is typically a one-off project, the necessity of having computational languages portable across diverse CPU and GPU architectures without performance losses is increasingly compelling. Here, we study the performance portability of the LSQR algorithm as found in the AVU-GSR code of the ESA Gaia mission. This code computes the astrometric parameters of the ~108 stars in our Galaxy. The LSQR algorithm is widely used across a broad range of high-performance computing (HPC) applications, elevating the study's relevance beyond the astrophysical domain. We developed different GPU-accelerated ports based on CUDA, C++ PSTL, SYCL, OpenMP, and HIP. We carefully verified the correctness of each port and tuned them to five different GPU-accelerated platforms from NVIDIA and AMD to evaluate the performance portability (PP) in terms of the harmonic mean of the application's performance efficiency across the tested hardware. HIP was demonstrated to be the most portable solution with a 0.94 average PP across the tested problem sizes, closely followed by SYCL coupled with AdaptiveCpp (ACPP) with 0.93. If we only consider NVIDIA platforms, CUDA would be the winner with 0.97. The tuning-oblivious C++ PSTL achieves 0.62 when coupled with vendor-specific compilers.},
keywords = {eupex, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Talks
2024
Marco Edoardo Santimaria, Iacopo Colonnelli, Marco Aldinucci
Releasing the CAPIO middleware from MPI derived constraints Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: across, admire, capio, capiocl, eupex, icsc
@misc{24:santimaria:bighpc,
title = {Releasing the CAPIO middleware from MPI derived constraints},
author = {Marco Edoardo Santimaria and Iacopo Colonnelli and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/zrJGD4i36fWdp5g},
year = {2024},
date = {2024-09-01},
address = {Pisa, Italy},
abstract = {CAPIO is a middleware that transparently injects streaming capabilities into file-based workflows. However, its implementation is limited to HPC environments based on the MPI framework, significantly limiting its applications. This paper will illustrate a proposed architecture and some preliminary results aimed at investigating the usage of a distributed files system as a communication media for the CAPIO middleware, with the ultimate goal of supporting both CLOUD-based and HPC-based workflows.},
keywords = {across, admire, capio, capiocl, eupex, icsc},
pubstate = {published},
tppubtype = {misc}
}
Marco Edoardo Santimaria, Iacopo Colonnelli, Massimo Torquati, Marco Aldinucci
CAPIO: Cross Application Programamble IO Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: across, admire, capio, capiocl, eupex, icsc
@misc{24:santimaria:itadata:shpcpee,
title = {CAPIO: Cross Application Programamble IO},
author = {Marco Edoardo Santimaria and Iacopo Colonnelli and Massimo Torquati and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/rg6LWwrZXi6tTXm},
year = {2024},
date = {2024-09-01},
address = {Pisa, Italy},
abstract = {With the increasing amount of digital data available for analysis and simulation, the class of I/O-intensive HPC workflows is fated to expand, further exacerbating quickly the performance gap between computing, memory, and storage technologies. CAPIO (Cross-Application Programmable I/O), is a middleware capable of injecting I/O streaming capabilities into file-based workflows, improving the computation-I/O overlap without the need to change the application code. In this presentation, we will introduce the CAPIO-CL language with its semantics, as well as the implementation of the CAPIO-CL language through the CAPIO middleware. We will also provide some case studies of how CAPIO has been employed to improve workflow execution time as well as some future directions.},
keywords = {across, admire, capio, capiocl, eupex, icsc},
pubstate = {published},
tppubtype = {misc}
}
Marco Edoardo Santimaria
CAPIO-CL: Cross Application Programmable IO - Coordination Language Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: across, admire, capio, eupex, icsc
@misc{24:santimaria:hlpp:capiocl,
title = {CAPIO-CL: Cross Application Programmable IO - Coordination Language},
author = {Marco Edoardo Santimaria},
url = {https://datacloud.di.unito.it/index.php/s/zsKY3PWzX5NFCiX},
year = {2024},
date = {2024-07-01},
address = {Pisa, Italy},
abstract = {The performance bottleneck in file-based workflows remains a pressing issue in the realm of I/O-based workflows. To address this challenge, a novel annotation language has been developed. CAPIO-CL is positioned as an innovative I/O coordination language, enabling users to annotate data dependencies within file-based workflows with synchronization semantics pertinent to the involved files and directories. Through the information provided by the language, optimization opportunities arise in streaming and preemptive data movement. This paper serves to illustrate the semantics and syntax enabling CAPIO-CL to enhance the performance of in situ workflows without necessitating the rewriting or modification of the original workflow application steps. Finally, an analysis of CAPIO-CL is provided, taking into consideration both language expressiveness and application performance enhancement.},
keywords = {across, admire, capio, eupex, icsc},
pubstate = {published},
tppubtype = {misc}
}
Giulio Malenza, Marco Edoardo Santimaria
Benchmarking Parallelization Models through Karmarkar`s algorithm Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: HPC, icsc
@misc{24:pdp:karmarkartalk,
title = {Benchmarking Parallelization Models through Karmarkar`s algorithm},
author = {Giulio Malenza and Marco Edoardo Santimaria},
url = {https://datacloud.di.unito.it/index.php/s/JjKcAJpYS7ctX9r},
year = {2024},
date = {2024-03-01},
address = {Dublin, Irelans},
abstract = {Optimization problems are one of the main focus of scientific research. Their computational-intensive nature makes them prone to be parallelized with consistent improvements in performance. This paper sheds light on different parallel models for accelerating Karmarkar’s Interior-point method. To do so, we assess parallelization strategies for individual operations within the aforementioned Karmarkar’s algorithm using OpenMP, GPU acceleration with CUDA, and the recent Parallel Standard C++ Linear Algebra library (PSTL) executing both on GPU and CPU. Our different implementations yield interesting benchmark results that show the optimal approach for parallelizing interior point algorithms for general Linear Programming (LP) problems. In addition, we propose a more theoretical perspective of the parallelization of this algorithm, with a detailed study of our OpenMP implementation, showing the limits of optimizing the single operations},
keywords = {HPC, icsc},
pubstate = {published},
tppubtype = {misc}
}