@misc{23:mulone:wide:vcp,
title = {Porting the Variant Calling Pipeline for NGS data in cloud-HPC environment},
author = {Alberto Mulone and Sherine Awad and Davide Chiarugi and Marco Aldinucci},
editor = {Hossain Shahriar and Yuuichi Teranishi and Alfredo Cuzzocrea and Moushumi Sharmin and Dave Towey and A. K. M. Jahangir Alam Majumder and Hiroki Kashiwazaki and Ji-Jiang Yang and Michiharu Takemoto and Nazmus Sakib and Ryohei Banno and Sheikh Iqbal Ahamed},
url = {https://datacloud.di.unito.it/index.php/s/zNLj3LCZNsNxHwy},
doi = {10.1109/COMPSAC57700.2023.00288},
year = {2023},
date = {2023-06-01},
booktitle = {47th IEEE Annual Computers, Software, and Applications Conference, COMPSAC 2023},
pages = {1858–1863},
publisher = {IEEE},
address = {Torino, Italy},
abstract = {In recent years we have understood the importance of analyzing and sequencing human genetic variation. A relevant aspect that emerged from the Covid-19 pandemic was the need to obtain results very quickly; this involved using High-Performance Computing (HPC) environments to execute the Next Generation Sequencing (NGS) pipeline. However, HPC is not always the most suitable environment for the entire execution of a pipeline, especially when it involves many heterogeneous tools. The ability to execute parts of the pipeline on different environments can lead to higher performance but also cheaper executions. This work shows the design and optimization process that led us to a state-of-the-art Variant Calling hybrid workflow based on the StreamFlow Workflow Management System (WfMS). We also compare StreamFlow with Snakemake, an established WfMS targeting HPC facilities, observing comparable performance on single environments and satisfactory improvements with a hybrid cloud-HPC configuration.},
howpublished = {47th IEEE Annual Computers, Software, and Applications Conference, COMPSAC 2023},
keywords = {across, icsc, streamflow},
pubstate = {published},
tppubtype = {misc}
}