Papers | Parallel Computing
2019
Ivan Merelli, Federico Fornari, Fabio Tordini, Daniele D'Agostino, Marco Aldinucci, Daniele Cesini
Exploiting Docker containers over Grid computing for a comprehensive study of chromatin conformation in different cell types Journal Article
In: Journal of Parallel and Distributed Computing, vol. 134, pp. 116–127, 2019, ISSN: 0743-7315.
Abstract | Links | BibTeX | Tags: bioinformatics
@article{19:merelli:jpdc,
title = {Exploiting Docker containers over Grid computing for a comprehensive study of chromatin conformation in different cell types},
author = {Ivan Merelli and Federico Fornari and Fabio Tordini and Daniele D'Agostino and Marco Aldinucci and Daniele Cesini},
url = {https://iris.unito.it/retrieve/handle/2318/1711684/532767/2019_Nuchart_JPDC_open.pdf},
doi = {10.1016/j.jpdc.2019.08.002},
issn = {0743-7315},
year = {2019},
date = {2019-01-01},
journal = {Journal of Parallel and Distributed Computing},
volume = {134},
pages = {116–127},
abstract = {Many bioinformatic applications require to exploit the capabilities of several computational resources to effectively access and process large and distributed datasets. In this context, Grid computing has been largely used to face unprecedented challenges in Computational Biology, at the cost of complex workarounds needed to make applications successfully running. The Grid computing paradigm, in fact, has always suffered from a lack of flexibility. Although this has been partially solved by Cloud computing, the on-demand approach is way distant from the original idea of volunteering computing that boosted the Grid paradigm. A solution to outpace the impossibility of creating custom environments for running applications in Grid is represented by the containerization technology. In this paper, we describe our experience in exploiting a Docker-based approach to run in a Grid environment a novel, computationally intensive, bioinformatic application, which models the DNA spatial conformation inside the nucleus of eukaryotic cells. Results assess the feasibility of this approach in terms of performance and efforts to run large experiments.},
keywords = {bioinformatics},
pubstate = {published},
tppubtype = {article}
}
2017
Fabio Tordini, Maurizio Drocco, Claudia Misale, Luciano Milanesi, Pietro Liò, Ivan Merelli, Massimo Torquati, Marco Aldinucci
NuChart-II: the road to a fast and scalable tool for Hi-C data analysis Journal Article
In: International Journal of High Performance Computing Applications, vol. 31, no. 3, pp. 196–211, 2017.
Abstract | Links | BibTeX | Tags: bioinformatics, fastflow, repara, rephrase
@article{16:ijhpca:nuchart,
title = {NuChart-II: the road to a fast and scalable tool for Hi-C data analysis},
author = {Fabio Tordini and Maurizio Drocco and Claudia Misale and Luciano Milanesi and Pietro Liò and Ivan Merelli and Massimo Torquati and Marco Aldinucci},
url = {https://iris.unito.it/retrieve/handle/2318/1607126/238747/main.pdf},
doi = {10.1177/1094342016668567},
year = {2017},
date = {2017-01-01},
journal = {International Journal of High Performance Computing Applications},
volume = {31},
number = {3},
pages = {196–211},
abstract = {Recent advances in molecular biology and bioinformatics techniques brought to an explosion of the information about the spatial organisation of the DNA in the nucleus of a cell. High-throughput molecular biology techniques provide a genome-wide capture of the spatial organization of chromosomes at unprecedented scales, which permit to identify physical interactions between genetic elements located throughout a genome. Recent results have shown that there is a large correlation between co-localization and co-regulation of genes, but these important information are hampered by the lack of biologists-friendly analysis and visualisation software. In this work we present NuChart-II, an efficient and highly optimized tool for genomic data analysis that provides a gene-centric, graph-based representation of genomic information. While designing NuChart-II we addressed several common issues in the parallelisation of memory bound algorithms for shared-memory systems. With performance and usability in mind, NuChart-II is a R package that embeds a C++ engine: computing capabilities and memory hierarchy of multi-core architectures are fully exploited, while the versatile R environment for statistical analysis and data visualisation rises the level of abstraction and permits to orchestrate analysis and visualisation of genomic data.},
keywords = {bioinformatics, fastflow, repara, rephrase},
pubstate = {published},
tppubtype = {article}
}
2016
Fabio Tordini
The road towards a Cloud-based High-Performance solution for genomic data analysis PhD Thesis
Computer Science Department, University of Torino, Italy, 2016.
Abstract | Links | BibTeX | Tags: bioinformatics, fastflow
@phdthesis{tordiniThesis16,
title = {The road towards a Cloud-based High-Performance solution for genomic data analysis},
author = {Fabio Tordini},
url = {http://calvados.di.unipi.it/storage/paper_files/2016_tordini_phdthesis.pdf},
year = {2016},
date = {2016-04-01},
school = {Computer Science Department, University of Torino, Italy},
abstract = {Nowadays, molecular biology laboratories are delivering more and more data about DNA organisation, at increasing resolution and in a large number of samples. So much that genomic research is now facing many of the scale-out issues that high-performance computing has been addressing for years: they require powerful infrastructures with fast computing and storage capabilities, with substantial challenges in terms of data processing, statistical analysis and data representation. With this thesis we propose a high-performance pipeline for the analysis and interpretation of heterogeneous genomic information: beside performance, usability and availability are two essential requirements that novel Bioinformatics tools should satisfy. In this perspective, we propose and discuss our efforts towards a solid infrastructure for data processing and storage, where software that operates over data is exposed as a service, and is accessible by users through the Internet. We begin by presenting NuChart-II, a tool for the analysis and interpretation of spatial genomic information. With NuChart-II we propose a graph-based representation of genomic data, which can provide insights on the disposition of genomic elements in the DNA. We also discuss our approach for the normalisation of biases that affect raw sequenced data. We believe that many currently available tools for genomic data analysis are perceived as tricky and troublesome applications, that require highly specialised skills to obtain the desired outcomes. Concerning usability, we want to rise the level of abstraction perceived by the user, but maintain high performance and correctness while providing an exhaustive solution for data visualisation. We also intend to foster the availability of novel tools: in this work we also discuss a cloud solution that delivers computation and storage as dynamically allocated virtual resources via the Internet, while needed software is provided as a service. In this way, the computational demand of genomic research can be satisfied more economically by using lab-scale and enterprise-oriented technologies. Here we discuss our idea of a task farm for the integration of heterogeneous data resulting from different sequencing experiments: we believe that the integration of multi-omic features on a nuclear map can be a valuable mean for studying the interactions among genetic elements. This can reveal insights on biological mechanisms, such as genes regulation, translocations and epigenetic patterns.},
keywords = {bioinformatics, fastflow},
pubstate = {published},
tppubtype = {phdthesis}
}
Fabio Tordini, Ivan Merelli, Pietro Liò, Luciano Milanesi, Marco Aldinucci
NuchaRt: embedding high-level parallel computing in R for augmented Hi-C data analysis Book Section
In: Publishing, Springer International (Ed.): Computational Intelligence Methods for Bioinformatics and Biostatistics, vol. 9874, pp. 259–272, Springer International Publishing, Cham (ZG), 2016, ISBN: 978-3-319-44331-7.
Abstract | Links | BibTeX | Tags: bioinformatics, fastflow, repara
@incollection{15:lnbi:nuchaRt,
title = {NuchaRt: embedding high-level parallel computing in R for augmented Hi-C data analysis},
author = {Fabio Tordini and Ivan Merelli and Pietro Liò and Luciano Milanesi and Marco Aldinucci},
editor = {Springer International Publishing},
url = {https://iris.unito.it/retrieve/handle/2318/1608281/253372/rnuchart.pdf},
doi = {10.1007/978-3-319-44332-4},
isbn = {978-3-319-44331-7},
year = {2016},
date = {2016-01-01},
booktitle = {Computational Intelligence Methods for Bioinformatics and Biostatistics},
volume = {9874},
pages = {259–272},
publisher = {Springer International Publishing},
address = {Cham (ZG)},
series = {Lecture Notes in Computer Science},
abstract = {Recent advances in molecular biology and Bioinformatics techniques brought to an explosion of the information about the spatial organisation of the DNA in the nucleus. High-throughput chromosome conformation capture techniques provide a genome-wide capture of chromatin contacts at unprecedented scales, which permit to identify physical interactions between genetic elements located throughout the human genome. These important studies are hampered by the lack of biologists-friendly software. In this work we present NuchaRt, an R package that wraps NuChart-II, an efficient and highly optimized C++ tool for the exploration of Hi-C data. By rising the level of abstraction, NuchaRt proposes a high-performance pipeline that allows users to orchestrate analysis and visualisation of multi-omics data, making optimal use of the computing capabilities offered by modern multi-core architectures, combined with the versatile and well known R environment for statistical analysis and data visualisation.},
keywords = {bioinformatics, fastflow, repara},
pubstate = {published},
tppubtype = {incollection}
}
Fabio Tordini
A cloud solution for multi-omics data integration Proceedings Article
In: Proceedings of the 16th IEEE International Conference on Scalable Computing and Communication, pp. 559–566, IEEE Computer Society, 2016, (Best paper award).
Abstract | Links | BibTeX | Tags: bioinformatics, fastflow, rephrase
@inproceedings{16:scalcom:cloud,
title = {A cloud solution for multi-omics data integration},
author = {Fabio Tordini},
url = {http://calvados.di.unipi.it/storage/paper_files/2016_cloudpipeline_scalcom.pdf},
doi = {10.1109/UIC-ATC-ScalCom-CBDCom-IoP-SmartWorld.2016.131},
year = {2016},
date = {2016-01-01},
booktitle = {Proceedings of the 16th IEEE International Conference on Scalable Computing and Communication},
pages = {559–566},
publisher = {IEEE Computer Society},
abstract = {Recent advances in molecular biology and Bioinformatics techniques have brought to an explosion of the information about the spatial organisation of the DNA inside the nucleus. In particular, 3C-based techniques are revealing the genome folding for many different cell types, and permit to create a more effective representation of the disposition of genes in the three-dimensional space. This information can be used to re-interpret heterogeneous genomic data (multi-omic) relying on 3D maps of the chromosome. The storage and computational requirements needed to accomplish such operations on raw sequenced data have to be fulfilled using HPC solutions, and the the Cloud paradigm is a valuable and convenient mean for delivering HPC to Bioinformatics. In this work we describe a data analysis work-flow that allows the integration and the interpretation of multi-omic data on a sort of ``topographical'' nuclear map, capable of representing the effective disposition of genes in a graph-based representation. We propose a cloud-based task farm pattern to orchestrate the services needed to accomplish genomic data analysis, where each service represents a special-purpose tool, playing a part in well known data analysis pipelines.},
note = {Best paper award},
keywords = {bioinformatics, fastflow, rephrase},
pubstate = {published},
tppubtype = {inproceedings}
}
2015
Fabio Tordini, Maurizio Drocco, Ivan Merelli, Luciano Milanesi, Pietro Liò, Marco Aldinucci
NuChart-II: a graph-based approach for the analysis and interpretation of Hi-C data Proceedings Article
In: Serio, Clelia Di, Liò, Pietro, Nonis, Alessandro, Tagliaferri, Roberto (Ed.): Proc. of 11th Intl. Meeting on Computational Intelligence Methods for Bioinformatics and Biostatistics (CIBB), pp. 298–311, Springer, Cambridge, UK, 2015, ISBN: 978-3-319-24461-7.
Abstract | Links | BibTeX | Tags: bioinformatics, fastflow, paraphrase, repara
@inproceedings{14:ff:nuchart:cibb,
title = {NuChart-II: a graph-based approach for the analysis and interpretation of Hi-C data},
author = {Fabio Tordini and Maurizio Drocco and Ivan Merelli and Luciano Milanesi and Pietro Liò and Marco Aldinucci},
editor = {Clelia Di Serio and Pietro Liò and Alessandro Nonis and Roberto Tagliaferri},
url = {http://calvados.di.unipi.it/storage/paper_files/2014_nuchart_cibb.pdf},
doi = {10.1007/978-3-319-24462-4_25},
isbn = {978-3-319-24461-7},
year = {2015},
date = {2015-06-01},
booktitle = {Proc. of 11th Intl. Meeting on Computational Intelligence Methods for Bioinformatics and Biostatistics (CIBB)},
volume = {8623},
pages = {298–311},
publisher = {Springer},
address = {Cambridge, UK},
series = {LNCS},
abstract = {Long-range chromosomal associations between genomic regions, and their repositioning in the 3D space of the nucleus, are now considered to be key contributors to the regulation of gene expressions, and important links have been highlighted with other genomic features involved in DNA rearrangements. Recent Chromosome Conformation Capture (3C) measurements performed with high throughput sequencing (Hi-C) and molecular dynamics studies show that there is a large correlation between co-localization and co-regulation of genes, but these important researches are hampered by the lack of biologists-friendly analysis and visualisation software. In this work we present NuChart-II, a software that allows the user to annotate and visualize a list of input genes with information relying on Hi-C data, integrating knowledge data about genomic features that are involved in the chromosome spatial organization. This software works directly with sequenced reads to identify related Hi-C fragments, with the aim of creating gene-centric neighbourhood graphs on which multi-omics features can be mapped. NuChart-II is a highly optimized implementation of a previous prototype package developed in R, in which the graph-based representation of Hi-C data was tested. The prototype showed inevitable problems of scalability while working genome-wide on large datasets: particular attention has been paid in optimizing the data structures employed while constructing the neighbourhood graph, so as to foster an efficient parallel implementation of the software. The normalization of Hi-C data has been modified and improved, in order to provide a reliable estimation of proximity likelihood for the genes.},
keywords = {bioinformatics, fastflow, paraphrase, repara},
pubstate = {published},
tppubtype = {inproceedings}
}
Maurizio Drocco, Claudia Misale, Guilherme Peretti Pezzi, Fabio Tordini, Marco Aldinucci
Memory-Optimised Parallel Processing of Hi-C Data Proceedings Article
In: Proc. of 23rd Euromicro Intl. Conference on Parallel Distributed and network-based Processing (PDP), pp. 1–8, IEEE, 2015.
Abstract | Links | BibTeX | Tags: bioinformatics, fastflow, impact, paraphrase, repara
@inproceedings{nuchart:speedup:15,
title = {Memory-Optimised Parallel Processing of Hi-C Data},
author = {Maurizio Drocco and Claudia Misale and Guilherme Peretti Pezzi and Fabio Tordini and Marco Aldinucci},
url = {https://iris.unito.it/retrieve/handle/2318/1521910/40615/2015_pdp_memopt.pdf},
doi = {10.1109/PDP.2015.63},
year = {2015},
date = {2015-03-01},
booktitle = {Proc. of 23rd Euromicro Intl. Conference on Parallel Distributed and network-based Processing (PDP)},
pages = {1–8},
publisher = {IEEE},
abstract = {This paper presents the optimisation efforts on the creation of a graph-based mapping representation of gene adjacency. The method is based on the Hi-C process, starting from Next Generation Sequencing data, and it analyses a huge amount of static data in order to produce maps for one or more genes. Straightforward parallelisation of this scheme does not yield acceptable performance on multicore architectures since the scalability is rather limited due to the memory bound nature of the problem. This work focuses on the memory optimisations that can be applied to the graph construction algorithm and its (complex) data structures to derive a cache-oblivious algorithm and eventually to improve the memory bandwidth utilisation. We used as running example NuChart-II, a tool for annotation and statistic analysis of Hi-C data that creates a gene-centric neighborhood graph. The proposed approach, which is exemplified for Hi-C, addresses several common issue in the parallelisation of memory bound algorithms for multicore. Results show that the proposed approach is able to increase the parallel speedup from 7x to 22x (on a 32-core platform). Finally, the proposed C++ implementation outperforms the first R NuChart prototype, by which it was not possible to complete the graph generation because of strong memory-saturation problems.},
keywords = {bioinformatics, fastflow, impact, paraphrase, repara},
pubstate = {published},
tppubtype = {inproceedings}
}
Fabio Tordini, Maurizio Drocco, Claudia Misale, Luciano Milanesi, Pietro Liò, Ivan Merelli, Marco Aldinucci
Parallel Exploration of the Nuclear Chromosome Conformation with NuChart-II Proceedings Article
In: Proc. of 23rd Euromicro Intl. Conference on Parallel Distributed and network-based Processing (PDP), IEEE, 2015.
Abstract | Links | BibTeX | Tags: bioinformatics, fastflow, impact, paraphrase, repara
@inproceedings{nuchar:tool:15,
title = {Parallel Exploration of the Nuclear Chromosome Conformation with NuChart-II},
author = {Fabio Tordini and Maurizio Drocco and Claudia Misale and Luciano Milanesi and Pietro Liò and Ivan Merelli and Marco Aldinucci},
url = {https://iris.unito.it/retrieve/handle/2318/1522038/40619/2015_pdp_nuchartff.pdf},
doi = {10.1109/PDP.2015.104},
year = {2015},
date = {2015-03-01},
booktitle = {Proc. of 23rd Euromicro Intl. Conference on Parallel Distributed and network-based Processing (PDP)},
publisher = {IEEE},
abstract = {High-throughput molecular biology techniques are widely used to identify physical interactions between genetic elements located throughout the human genome. Chromosome Conformation Capture (3C) and other related techniques allow to investigate the spatial organisation of chromosomes in the cell's natural state. Recent results have shown that there is a large correlation between co-localization and co-regulation of genes, but these important information are hampered by the lack of biologists-friendly analysis and visualisation software. In this work we introduce NuChart-II, a tool for Hi-C data analysis that provides a gene-centric view of the chromosomal neighbour- hood in a graph-based manner. NuChart-II is an efficient and highly optimized C++ re-implementation of a previous prototype package developed in R. Representing Hi-C data using a graph-based approach overcomes the common view relying on genomic coordinates and permits the use of graph analysis techniques to explore the spatial conformation of a gene neighbourhood.},
keywords = {bioinformatics, fastflow, impact, paraphrase, repara},
pubstate = {published},
tppubtype = {inproceedings}
}
Ivan Merelli, Fabio Tordini, Maurizio Drocco, Marco Aldinucci, Pietro Liò, Luciano Milanesi
Integrating Multi-omic features exploiting Chromosome Conformation Capture data Journal Article
In: Frontiers in Genetics, vol. 6, no. 40, 2015, ISSN: 1664-8021.
Abstract | Links | BibTeX | Tags: bioinformatics, fastflow
@article{nuchart:frontiers:15,
title = {Integrating Multi-omic features exploiting Chromosome Conformation Capture data},
author = {Ivan Merelli and Fabio Tordini and Maurizio Drocco and Marco Aldinucci and Pietro Liò and Luciano Milanesi},
url = {http://journal.frontiersin.org/Journal/10.3389/fgene.2015.00040/pdf},
doi = {10.3389/fgene.2015.00040},
issn = {1664-8021},
year = {2015},
date = {2015-01-01},
journal = {Frontiers in Genetics},
volume = {6},
number = {40},
abstract = {The representation, integration and interpretation of omic data is a complex task, in particular considering the huge amount of information that is daily produced in molecular biology laboratories all around the world. The reason is that sequencing data regarding expression profiles, methylation patterns, and chromatin domains is difficult to harmonize in a systems biology view, since genome browsers only allow coordinate-based representations, discarding functional clusters created by the spatial conformation of the DNA in the nucleus. In this context, recent progresses in high throughput molecular biology techniques and bioinformatics have provided insights into chromatin interactions on a larger scale and offer a formidable support for the interpretation of multi-omic data. In particular, a novel sequencing technique called Chromosome Conformation Capture (3C) allows the analysis of the chromosome organization in the cell's natural state. While performed genome wide, this technique is usually called Hi-C. Inspired by service applications such as Google Maps, we developed NuChart, an R package that integrates Hi-C data to describe the chromosomal neighbourhood starting from the information about gene positions, with the possibility of mapping on the achieved graphs genomic features such as methylation patterns and histone modifications, along with expression profiles. In this paper we show the importance of the NuChart application for the integration of multi-omic data in a systems biology fashion, with particular interest in cytogenetic applications of these techniques. Moreover, we demonstrate how the integration of multi-omic data can provide useful information in understanding why genes are in certain specific positions inside the nucleus and how epigenetic patterns correlate with their expression.},
keywords = {bioinformatics, fastflow},
pubstate = {published},
tppubtype = {article}
}
Marco Aldinucci, Andrea Bracciali, Tobias Marschall, Murray Patterson, Nadia Pisanti, Massimo Torquati
High-Performance Haplotype Assembly Proceedings Article
In: Serio, Clelia Di, Liò, Pietro, Nonis, Alessandro, Tagliaferri, Roberto (Ed.): Computational Intelligence Methods for Bioinformatics and Biostatistics - 11th International Meeting, CIBB 2014, Cambridge, UK, June 26-28, 2014, Revised Selected Papers, pp. 245–258, Springer, Cambridge, UK, 2015.
Abstract | Links | BibTeX | Tags: bioinformatics, fastflow
@inproceedings{14:ff:whatsapp:cibb,
title = {High-Performance Haplotype Assembly},
author = {Marco Aldinucci and Andrea Bracciali and Tobias Marschall and Murray Patterson and Nadia Pisanti and Massimo Torquati},
editor = {Clelia Di Serio and Pietro Liò and Alessandro Nonis and Roberto Tagliaferri},
url = {https://iris.unito.it/retrieve/handle/2318/1523292/46714/2014_pHaplo_cibb.pdf},
doi = {10.1007/978-3-319-24462-4_21},
year = {2015},
date = {2015-01-01},
booktitle = {Computational Intelligence Methods for Bioinformatics and Biostatistics - 11th International Meeting, CIBB 2014, Cambridge, UK, June 26-28, 2014, Revised Selected Papers},
volume = {8623},
pages = {245–258},
publisher = {Springer},
address = {Cambridge, UK},
series = {LNCS},
abstract = {The problem of Haplotype Assembly is an essential step in human genome analysis. It is typically formalised as the Minimum Error Correction (MEC) problem which is NP-hard. MEC has been approached using heuristics, integer linear programming, and fixed-parameter tractability (FPT), including approaches whose runtime is exponential in the length of the DNA fragments obtained by the sequencing process. Technological improvements are currently increasing fragment length, which drastically elevates computational costs for such methods. We present pWhatsHap, a multi-core parallelisation of WhatsHap, a recent FPT optimal approach to MEC. WhatsHap moves complexity from fragment length to fragment overlap and is hence of particular interest when considering sequencing technology's current trends. pWhatsHap further improves the efficiency in solving the MEC problem, as shown by experiments performed on datasets with high coverage.},
keywords = {bioinformatics, fastflow},
pubstate = {published},
tppubtype = {inproceedings}
}
2014
Marco Aldinucci, Massimo Torquati, Concetto Spampinato, Maurizio Drocco, Claudia Misale, Cristina Calcagno, Mario Coppo
Parallel stochastic systems biology in the cloud Journal Article
In: Briefings in Bioinformatics, vol. 15, no. 5, pp. 798–813, 2014, ISSN: 1467-5463.
Abstract | Links | BibTeX | Tags: bioinformatics, fastflow, impact, paraphrase
@article{cwc:cloud:bib:13,
title = {Parallel stochastic systems biology in the cloud},
author = {Marco Aldinucci and Massimo Torquati and Concetto Spampinato and Maurizio Drocco and Claudia Misale and Cristina Calcagno and Mario Coppo},
url = {https://iris.unito.it/retrieve/handle/2318/140080/22528/FF_Cloud_briefings_final_submitted_copy.pdf},
doi = {10.1093/bib/bbt040},
issn = {1467-5463},
year = {2014},
date = {2014-01-01},
journal = {Briefings in Bioinformatics},
volume = {15},
number = {5},
pages = {798–813},
abstract = {The stochastic modelling of biological systems, coupled with Monte Carlo simulation of models, is an increasingly popular technique in bioinformatics. The simulation-analysis workflow may result computationally expensive reducing the interactivity required in the model tuning. In this work, we advocate the high-level software design as a vehicle for building efficient and portable parallel simulators for the cloud. In particular, the Calculus of Wrapped Components (CWC) simulator for systems biology, which is designed according to the FastFlow pattern-based approach, is presented and discussed. Thanks to the FastFlow framework, the CWC simulator is designed as a high-level workflow that can simulate CWC models, merge simulation results and statistically analyse them in a single parallel workflow in the cloud. To improve interactivity, successive phases are pipelined in such a way that the workflow begins to output a stream of analysis results immediately after simulation is started. Performance and effectiveness of the CWC simulator are validated on the Amazon Elastic Compute Cloud.},
keywords = {bioinformatics, fastflow, impact, paraphrase},
pubstate = {published},
tppubtype = {article}
}
Marco Aldinucci, Cristina Calcagno, Mario Coppo, Ferruccio Damiani, Maurizio Drocco, Eva Sciacca, Salvatore Spinella, Massimo Torquati, Angelo Troina
On designing multicore-aware simulators for systems biology endowed with on-line statistics Journal Article
In: BioMed Research International, 2014.
Abstract | Links | BibTeX | Tags: bioinformatics, fastflow, paraphrase
@article{cwcsim:ff:multicore:biomed:14,
title = {On designing multicore-aware simulators for systems biology endowed with on-line statistics},
author = {Marco Aldinucci and Cristina Calcagno and Mario Coppo and Ferruccio Damiani and Maurizio Drocco and Eva Sciacca and Salvatore Spinella and Massimo Torquati and Angelo Troina},
url = {http://downloads.hindawi.com/journals/bmri/2014/207041.pdf},
doi = {10.1155/2014/207041},
year = {2014},
date = {2014-01-01},
journal = {BioMed Research International},
abstract = {The paper arguments are on enabling methodologies for the design of a fully parallel, online, interactive tool aiming to support the bioinformatics scientists .In particular, the features of these methodologies, supported by the FastFlow parallel programming framework, are shown on a simulation tool to perform the modeling, the tuning, and the sensitivity analysis of stochastic biological models. A stochastic simulation needs thousands of independent simulation trajectories turning into big data that should be analysed by statistic and data mining tools. In the considered approach the two stages are pipelined in such a way that the simulation stage streams out the partial results of all simulation trajectories to the analysis stage that immediately produces a partial result. The simulation-analysis workflow is validated for performance and effectiveness of the online analysis in capturing biological systems behavior on a multicore platform and representative proof-of-concept biological systems. The exploited methodologies include pattern-based parallel programming and data streaming that provide key features to the software designers such as performance portability and efficient in-memory (big) data management and movement. Two paradigmatic classes of biological systems exhibiting multistable and oscillatory behavior are used as a testbed.},
keywords = {bioinformatics, fastflow, paraphrase},
pubstate = {published},
tppubtype = {article}
}
Marco Aldinucci, Maurizio Drocco, Guilherme Peretti Pezzi, Claudia Misale, Fabio Tordini, Massimo Torquati
Exercising high-level parallel programming on streams: a systems biology use case Proceedings Article
In: Proc. of 34th IEEE Intl. Conference on Distributed Computing Systems Workshops (ICDCSW), IEEE, Madrid, Spain, 2014.
Abstract | Links | BibTeX | Tags: bioinformatics, fastflow, impact, paraphrase
@inproceedings{cwc:gpu:dcperf:14,
title = {Exercising high-level parallel programming on streams: a systems biology use case},
author = {Marco Aldinucci and Maurizio Drocco and Guilherme Peretti Pezzi and Claudia Misale and Fabio Tordini and Massimo Torquati},
url = {https://iris.unito.it/retrieve/handle/2318/154516/26657/2014_dcperf_cwc_gpu.pdf},
doi = {10.1109/ICDCSW.2014.38},
year = {2014},
date = {2014-01-01},
booktitle = {Proc. of 34th IEEE Intl. Conference on Distributed Computing Systems Workshops (ICDCSW)},
publisher = {IEEE},
address = {Madrid, Spain},
abstract = {The stochastic modelling of biological systems, cou- pled with Monte Carlo simulation of models, is an increasingly popular technique in Bioinformatics. The simulation-analysis workflow may result into a computationally expensive task reducing the interactivity required in the model tuning. In this work, we advocate high-level software design as a vehicle for building efficient and portable parallel simulators for a variety of platforms, ranging from multi-core platforms to GPGPUs to cloud. In particular, the Calculus of Wrapped Compartments (CWC) parallel simulator for systems biology equipped with on- line mining of results, which is designed according to the FastFlow pattern-based approach, is discussed as a running example. In this work, the CWC simulator is used as a paradigmatic example of a complex C++ application where the quality of results is correlated with both computation and I/O bounds, and where high-quality results might turn into big data. The FastFlow parallel programming framework, which advocates C++ pattern- based parallel programming makes it possible to develop portable parallel code without relinquish neither run-time efficiency nor performance tuning opportunities. Performance and effectiveness of the approach are validated on a variety of platforms, inter-alia cache-coherent multi-cores, cluster of multi-core (Ethernet and Infiniband) and the Amazon Elastic Compute Cloud.},
keywords = {bioinformatics, fastflow, impact, paraphrase},
pubstate = {published},
tppubtype = {inproceedings}
}
Claudia Misale, Giulio Ferrero, Massimo Torquati, Marco Aldinucci
Sequence alignment tools: one parallel pattern to rule them all? Journal Article
In: BioMed Research International, 2014.
Abstract | Links | BibTeX | Tags: bioinformatics, fastflow, paraphrase, repara
@article{bowtie-bwa:ff:multicore:biomed:14,
title = {Sequence alignment tools: one parallel pattern to rule them all?},
author = {Claudia Misale and Giulio Ferrero and Massimo Torquati and Marco Aldinucci},
url = {http://downloads.hindawi.com/journals/bmri/2014/539410.pdf},
doi = {10.1155/2014/539410},
year = {2014},
date = {2014-01-01},
journal = {BioMed Research International},
abstract = {In this paper we advocate high-level programming methodology for Next Generation Sequencers (NGS) alignment tools for both productivity and absolute performance. We analyse the problem of parallel alignment and review the parallelisation strategies of the most popular alignment tools, which can all be abstracted to a single parallel paradigm. We compare these tools against their porting onto the FastFlow pattern-based programming framework, which provides programmers with high-level parallel patterns. By using a high-level approach, programmers are liberated from all complex aspects of parallel programming, such as synchronisation protocols and task scheduling, gaining more possibility for seamless performance tuning. In this work we show some use case in which, by using a high-level approach for parallelising NGS tools, it is possible to obtain comparable or even better absolute performance for all used datasets.},
keywords = {bioinformatics, fastflow, paraphrase, repara},
pubstate = {published},
tppubtype = {article}
}
2013
Marco Aldinucci, Fabio Tordini, Maurizio Drocco, Massimo Torquati, Mario Coppo
Parallel stochastic simulators in system biology: the evolution of the species Proceedings Article
In: Proc. of 21st Euromicro Intl. Conference on Parallel Distributed and network-based Processing (PDP), IEEE, Belfast, Nothern Ireland, U.K., 2013.
Abstract | Links | BibTeX | Tags: bioinformatics, fastflow
@inproceedings{ff_cwc_distr:pdp:13,
title = {Parallel stochastic simulators in system biology: the evolution of the species},
author = {Marco Aldinucci and Fabio Tordini and Maurizio Drocco and Massimo Torquati and Mario Coppo},
url = {http://calvados.di.unipi.it/storage/paper_files/2013_cwc_d_PDP.pdf},
doi = {10.1109/PDP.2013.66},
year = {2013},
date = {2013-02-01},
booktitle = {Proc. of 21st Euromicro Intl. Conference on Parallel Distributed and network-based Processing (PDP)},
publisher = {IEEE},
address = {Belfast, Nothern Ireland, U.K.},
abstract = {The stochastic simulation of biological systems is an increasingly popular technique in Bioinformatics. It is often an enlightening technique, especially for multi-stable systems which dynamics can be hardly captured with ordinary differential equations. To be effective, stochastic simulations should be supported by powerful statistical analysis tools. The simulation-analysis workflow may however result in being computationally expensive, thus compromising the interactivity required in model tuning. In this work we advocate the high-level design of simulators for stochastic systems as a vehicle for building efficient and portable parallel simulators. In particular, the Calculus of Wrapped Components (CWC) simulator, which is designed according to the FastFlow's pattern-based approach, is presented and discussed in this work. FastFlow has been extended to support also clusters of multi-cores with minimal coding effort, assessing the portability of the approach.},
keywords = {bioinformatics, fastflow},
pubstate = {published},
tppubtype = {inproceedings}
}
2012
Marco Aldinucci, Mario Coppo, Ferruccio Damiani, Maurizio Drocco, Eva Sciacca, Salvatore Spinella, Massimo Torquati, Angelo Troina
On Parallelizing On-Line Statistics for Stochastic Biological Simulations Proceedings Article
In: Alexander, Michael, D'Ambra, Pasqua, Belloum, Adam, Bosilca, George, Cannataro, Mario, Danelutto, Marco, Martino, Beniamino Di, Gerndt, Michael, Jeannot, Emmanuel, Namyst, Raymond, Roman, Jean, Scott, Stephen L., Träff, Jesper Larsson, Vallée, Geoffroy, Weidendorfer, Josef (Ed.): Proc. of Euro-Par Workshops: 2nd Workshop on High Performance Bioinformatics and Biomedicine (HiBB), pp. 3–12, Springer, Bordeaux, France, 2012.
Abstract | Links | BibTeX | Tags: bioinformatics, fastflow
@inproceedings{cwcsim:onlinestats:ff:hibb:11,
title = {On Parallelizing On-Line Statistics for Stochastic Biological Simulations},
author = {Marco Aldinucci and Mario Coppo and Ferruccio Damiani and Maurizio Drocco and Eva Sciacca and Salvatore Spinella and Massimo Torquati and Angelo Troina},
editor = {Michael Alexander and Pasqua D'Ambra and Adam Belloum and George Bosilca and Mario Cannataro and Marco Danelutto and Beniamino Di Martino and Michael Gerndt and Emmanuel Jeannot and Raymond Namyst and Jean Roman and Stephen L. Scott and Jesper Larsson Träff and Geoffroy Vallée and Josef Weidendorfer},
url = {http://calvados.di.unipi.it/storage/paper_files/2012_onlinestat_HiBB2011.pdf},
doi = {10.1007/978-3-642-29740-3_2},
year = {2012},
date = {2012-01-01},
booktitle = {Proc. of Euro-Par Workshops: 2nd Workshop on High Performance Bioinformatics and Biomedicine (HiBB)},
volume = {7156},
pages = {3–12},
publisher = {Springer},
address = {Bordeaux, France},
series = {LNCS},
abstract = {This work concerns a general technique to enrich parallel version of stochastic simulators for biological systems with tools for on-line statistical analysis of the results. In particular, within the FastFlow parallel programming framework, we describe the methodology and the implementation of a parallel Monte Carlo simulation infrastructure extended with user-defined on-line data filtering and mining functions. The simulator and the on-line analysis were validated on large multi-core platforms and representative proof-of-concept biological systems.},
keywords = {bioinformatics, fastflow},
pubstate = {published},
tppubtype = {inproceedings}
}
2011
Marco Aldinucci, Andrea Bracciali, Pietro Liò, Anil Sorathiya, Massimo Torquati
StochKit-FF: Efficient Systems Biology on Multicore Architectures Proceedings Article
In: Guarracino, M. R., Vivien, F., Träff, J. L., Cannataro, M., Danelutto, M., Hast, A., Perla, F., Knüpfer, A., Martino, B. Di, Alexander, M. (Ed.): Euro-Par 2010 Workshops, Proc. of the 1st Workshop on High Performance Bioinformatics and Biomedicine (HiBB), pp. 167–175, Springer, Ischia, Italy, 2011.
Abstract | Links | BibTeX | Tags: bioinformatics
@inproceedings{stochkit-ff:hibb:10,
title = {StochKit-FF: Efficient Systems Biology on Multicore Architectures},
author = {Marco Aldinucci and Andrea Bracciali and Pietro Liò and Anil Sorathiya and Massimo Torquati},
editor = {M. R. Guarracino and F. Vivien and J. L. Träff and M. Cannataro and M. Danelutto and A. Hast and F. Perla and A. Knüpfer and B. Di Martino and M. Alexander},
url = {http://calvados.di.unipi.it/storage/paper_files/2010_stochkit-ff_hibb.pdf},
doi = {10.1007/978-3-642-21878-1_21},
year = {2011},
date = {2011-08-01},
booktitle = {Euro-Par 2010 Workshops, Proc. of the 1st Workshop on High Performance Bioinformatics and Biomedicine (HiBB)},
volume = {6586},
pages = {167–175},
publisher = {Springer},
address = {Ischia, Italy},
series = {LNCS},
abstract = {The stochastic modelling of biological systems is an informative, and in some cases, very adequate technique, which may however result in being more expensive than other modelling approaches, such as differential equations. We present StochKit-FF, a parallel version of StochKit, a reference toolkit for stochastic simulations. StochKit-FF is based on the FastFlow programming toolkit for multicores and exploits the novel concept of selective memory. We experiment StochKit-FF on a model of HIV infection dynamics, with the aim of extracting information from efficiently run experiments, here in terms of average and variance and, on a longer term, of more structured data.},
keywords = {bioinformatics},
pubstate = {published},
tppubtype = {inproceedings}
}
2010
Marco Aldinucci, Andrea Bracciali, Pietro Liò
Formal Synthetic Immunology Journal Article
In: ERCIM News, vol. 82, pp. 40–41, 2010, ISSN: 0926-4981.
Abstract | Links | BibTeX | Tags: bioinformatics, fastflow
@article{stochkitff:ercimnews:10,
title = {Formal Synthetic Immunology},
author = {Marco Aldinucci and Andrea Bracciali and Pietro Liò},
url = {http://ercim-news.ercim.eu/images/stories/EN82/EN82-web.pdf},
issn = {0926-4981},
year = {2010},
date = {2010-07-01},
journal = {ERCIM News},
volume = {82},
pages = {40–41},
abstract = {The human immune system fights pathogens using an articulated set of strategies whose function is to maintain in health the organism. A large effort to formally model such a complex system using a computational approach is currently underway, with the goal of developing a discipline for engineering "synthetic" immune responses. This requires the integration of a range of analysis techniques developed for formally reasoning about the behaviour of complex dynamical systems. Furthermore, a novel class of software tools has to be developed, capable of efficiently analysing these systems on widely accessible computing platforms, such as commodity multi-core architectures..},
keywords = {bioinformatics, fastflow},
pubstate = {published},
tppubtype = {article}
}
Marco Aldinucci
Efficient Parallel MonteCarlo with FastFlow Book Section
In: HPC-Europa2: Science and Supercomputing in Europe, research highlights 2010, Cineca, 2010.
Abstract | Links | BibTeX | Tags: bioinformatics, fastflow
@incollection{ff:hpc-europa:10,
title = {Efficient Parallel MonteCarlo with FastFlow},
author = {Marco Aldinucci},
url = {http://calvados.di.unipi.it/storage/paper_files/2010-ff_hpceuropa2_092-inform-Aldinucci.pdf},
year = {2010},
date = {2010-01-01},
booktitle = {HPC-Europa2: Science and Supercomputing in Europe, research highlights 2010},
publisher = {Cineca},
abstract = {The stochastic simulation of natural systems is a very informative but happens be computationally expensive. We present StochKit-FF, an parallel version of StochKit, a reference toolkit for stochastic simulations that sustantially improves StochKit performances on multi-core platforms.},
keywords = {bioinformatics, fastflow},
pubstate = {published},
tppubtype = {incollection}
}