Papers | Parallel Computing
2015
Maurizio Drocco, Claudia Misale, Guilherme Peretti Pezzi, Fabio Tordini, Marco Aldinucci
Memory-Optimised Parallel Processing of Hi-C Data Proceedings Article
In: Proc. of 23rd Euromicro Intl. Conference on Parallel Distributed and network-based Processing (PDP), pp. 1–8, IEEE, 2015.
Abstract | Links | BibTeX | Tags: bioinformatics, fastflow, impact, paraphrase, repara
@inproceedings{nuchart:speedup:15,
title = {Memory-Optimised Parallel Processing of Hi-C Data},
author = {Maurizio Drocco and Claudia Misale and Guilherme Peretti Pezzi and Fabio Tordini and Marco Aldinucci},
url = {https://iris.unito.it/retrieve/handle/2318/1521910/40615/2015_pdp_memopt.pdf},
doi = {10.1109/PDP.2015.63},
year = {2015},
date = {2015-03-01},
booktitle = {Proc. of 23rd Euromicro Intl. Conference on Parallel Distributed and network-based Processing (PDP)},
pages = {1–8},
publisher = {IEEE},
abstract = {This paper presents the optimisation efforts on the creation of a graph-based mapping representation of gene adjacency. The method is based on the Hi-C process, starting from Next Generation Sequencing data, and it analyses a huge amount of static data in order to produce maps for one or more genes. Straightforward parallelisation of this scheme does not yield acceptable performance on multicore architectures since the scalability is rather limited due to the memory bound nature of the problem. This work focuses on the memory optimisations that can be applied to the graph construction algorithm and its (complex) data structures to derive a cache-oblivious algorithm and eventually to improve the memory bandwidth utilisation. We used as running example NuChart-II, a tool for annotation and statistic analysis of Hi-C data that creates a gene-centric neighborhood graph. The proposed approach, which is exemplified for Hi-C, addresses several common issue in the parallelisation of memory bound algorithms for multicore. Results show that the proposed approach is able to increase the parallel speedup from 7x to 22x (on a 32-core platform). Finally, the proposed C++ implementation outperforms the first R NuChart prototype, by which it was not possible to complete the graph generation because of strong memory-saturation problems.},
keywords = {bioinformatics, fastflow, impact, paraphrase, repara},
pubstate = {published},
tppubtype = {inproceedings}
}
Fabio Tordini, Maurizio Drocco, Claudia Misale, Luciano Milanesi, Pietro Liò, Ivan Merelli, Marco Aldinucci
Parallel Exploration of the Nuclear Chromosome Conformation with NuChart-II Proceedings Article
In: Proc. of 23rd Euromicro Intl. Conference on Parallel Distributed and network-based Processing (PDP), IEEE, 2015.
Abstract | Links | BibTeX | Tags: bioinformatics, fastflow, impact, paraphrase, repara
@inproceedings{nuchar:tool:15,
title = {Parallel Exploration of the Nuclear Chromosome Conformation with NuChart-II},
author = {Fabio Tordini and Maurizio Drocco and Claudia Misale and Luciano Milanesi and Pietro Liò and Ivan Merelli and Marco Aldinucci},
url = {https://iris.unito.it/retrieve/handle/2318/1522038/40619/2015_pdp_nuchartff.pdf},
doi = {10.1109/PDP.2015.104},
year = {2015},
date = {2015-03-01},
booktitle = {Proc. of 23rd Euromicro Intl. Conference on Parallel Distributed and network-based Processing (PDP)},
publisher = {IEEE},
abstract = {High-throughput molecular biology techniques are widely used to identify physical interactions between genetic elements located throughout the human genome. Chromosome Conformation Capture (3C) and other related techniques allow to investigate the spatial organisation of chromosomes in the cell's natural state. Recent results have shown that there is a large correlation between co-localization and co-regulation of genes, but these important information are hampered by the lack of biologists-friendly analysis and visualisation software. In this work we introduce NuChart-II, a tool for Hi-C data analysis that provides a gene-centric view of the chromosomal neighbour- hood in a graph-based manner. NuChart-II is an efficient and highly optimized C++ re-implementation of a previous prototype package developed in R. Representing Hi-C data using a graph-based approach overcomes the common view relying on genomic coordinates and permits the use of graph analysis techniques to explore the spatial conformation of a gene neighbourhood.},
keywords = {bioinformatics, fastflow, impact, paraphrase, repara},
pubstate = {published},
tppubtype = {inproceedings}
}
Paolo Viviani
Parallel Computing Techniques for High Energy Physics Masters Thesis
Physics Department, University of Torino, 2015.
Abstract | BibTeX | Tags: fastflow, impact
@mastersthesis{tesi:viviani:15,
title = {Parallel Computing Techniques for High Energy Physics},
author = {Paolo Viviani},
year = {2015},
date = {2015-01-01},
school = {Physics Department, University of Torino},
abstract = {Modern experimental achievements, with LHC results as a prominent but not exclusive representative, have undisclosed a new range of challenges concerning theoretical com- putations. Tree level QED calculation are no more satisfactory due to the very small experimental uncertainty of precision e+ e- measurements, so Next To Leading and Next to Next to Leading Order calculations are required. At the same time many-legs, high-order QCD processes needed to simulate LHC events are raising even more the bar of computational complexity. The drive for the present work has been the interest in calculating high multiplicity Higgs boson processes with a dedicated software library (RECOLA) currently under development at the University of Torino, as well as the related technological challenges. This thesis undertakes the task of exploring the possibilities offered by present and upcoming computing technologies in order to face these challenges properly. The first two chapters outlines the theoretical context and the available technologies. In chapter 3 a a case study is examined in full detail, in order to explore the suitability of different parallel computing solutions. In the chapter 4, some of those solutions are implemented in the context of the RECOLA library, allowing it to handle processes at a previously unexplored scale of complexity. Alongside, the potential of new, cost-effective parallel architectures is tested.},
keywords = {fastflow, impact},
pubstate = {published},
tppubtype = {mastersthesis}
}
Marco Aldinucci, Guilherme Peretti Pezzi, Maurizio Drocco, Concetto Spampinato, Massimo Torquati
Parallel Visual Data Restoration on Multi-GPGPUs using Stencil-Reduce Pattern Journal Article
In: International Journal of High Performance Computing Applications, vol. 29, no. 4, pp. 461–472, 2015.
Abstract | Links | BibTeX | Tags: fastflow, HPC, impact, paraphrase
@article{ff:denoiser:ijhpca:15,
title = {Parallel Visual Data Restoration on Multi-GPGPUs using Stencil-Reduce Pattern},
author = {Marco Aldinucci and Guilherme Peretti Pezzi and Maurizio Drocco and Concetto Spampinato and Massimo Torquati},
url = {https://iris.unito.it/retrieve/handle/2318/1522073/299200/ijhpca_4aperto.pdf},
doi = {10.1177/1094342014567907},
year = {2015},
date = {2015-01-01},
journal = {International Journal of High Performance Computing Applications},
volume = {29},
number = {4},
pages = {461–472},
abstract = {In this paper, a highly effective parallel filter for visual data restoration is presented. The filter is designed following a skeletal approach, using a newly proposed stencil-reduce, and has been implemented by way of the FastFlow parallel programming library. As a result of its high-level design, it is possible to run the filter seamlessly on a multicore machine, on multi-GPGPUs, or on both. The design and implementation of the filter are discussed, and an experimental evaluation is presented.},
keywords = {fastflow, HPC, impact, paraphrase},
pubstate = {published},
tppubtype = {article}
}
Guilherme Peretti Pezzi, Evelyne Vaissié, Yann Viala, Denis Caromel, Philippe Gourbesville
Parallel profiling of water distribution networks using the Clément formula Journal Article
In: Applied Mathematics and Computation, vol. 267, pp. 83–95, 2015, ISSN: 0096-3003, (The Fourth European Seminar on Computing (ESCO 2014)).
Abstract | Links | BibTeX | Tags: impact
@article{PerettiPezzi201583,
title = {Parallel profiling of water distribution networks using the Clément formula},
author = {Guilherme Peretti Pezzi and Evelyne Vaissié and Yann Viala and Denis Caromel and Philippe Gourbesville},
url = {http://www.sciencedirect.com/science/article/pii/S0096300315007080},
doi = {10.1016/j.amc.2015.05.084},
issn = {0096-3003},
year = {2015},
date = {2015-01-01},
journal = {Applied Mathematics and Computation},
volume = {267},
pages = {83–95},
abstract = {Abstract Optimization of water distribution is a crucial issue which has been targeted by many modeling tools. Useful models, implemented several decades ago, need to be updated and implemented in more powerful computing environments. This paper presents the distributed and redesigned version of a legacy hydraulic simulation software written in Fortran (IRMA) that has been used for over 30 years by the Société du Canal de Provence in order to design and to maintain water distribution networks. IRMA was developed aiming mainly at the treatment of irrigation networks – by using the Clément demand model and is now used to manage more than 6000 km of piped networks. The complexity and size of networks have been growing since the creation of IRMA and the legacy software could not handle the simulation of very large networks in terms of performance.SAC This limitation has finally imposed to redesign the code by using modern tools and language (Java), and also to run distributed simulations by using the ProActive Parallel Suite.},
note = {The Fourth European Seminar on Computing (ESCO 2014)},
keywords = {impact},
pubstate = {published},
tppubtype = {article}
}
2014
Marco Aldinucci, Massimo Torquati, Maurizio Drocco, Guilherme Peretti Pezzi, Concetto Spampinato
FastFlow: Combining Pattern-Level Abstraction and Efficiency in GPGPUs Proceedings Article
In: GPU Technology Conference (GTC), San Jose, CA, USA, 2014.
Abstract | Links | BibTeX | Tags: fastflow, HPC, impact, paraphrase
@inproceedings{ff:gtc:2014,
title = {FastFlow: Combining Pattern-Level Abstraction and Efficiency in GPGPUs},
author = {Marco Aldinucci and Massimo Torquati and Maurizio Drocco and Guilherme Peretti Pezzi and Concetto Spampinato},
url = {http://calvados.di.unipi.it/storage/talks/2014_S4729-Marco-Aldinucci.pdf},
year = {2014},
date = {2014-03-01},
booktitle = {GPU Technology Conference (GTC)},
address = {San Jose, CA, USA},
abstract = {Learn how FastFlow's parallel patterns can be used to design parallel applications for execution on both CPUs and GPGPUs while avoiding most of the complex low-level detail needed to make them efficient, portable and rapid to prototype. As use case, we will show the design and effectiveness of a novel universal image filtering template based on the variational approach.},
keywords = {fastflow, HPC, impact, paraphrase},
pubstate = {published},
tppubtype = {inproceedings}
}
Marco Aldinucci, Massimo Torquati, Maurizio Drocco, Guilherme Peretti Pezzi, Concetto Spampinato
An Overview of FastFlow: Combining Pattern-Level Abstraction and Efficiency in GPGPUs Proceedings Article
In: GPU Technology Conference (GTC), San Jose, CA, USA, 2014.
Abstract | Links | BibTeX | Tags: fastflow, HPC, impact, paraphrase
@inproceedings{ff:gtc:2014:short,
title = {An Overview of FastFlow: Combining Pattern-Level Abstraction and Efficiency in GPGPUs},
author = {Marco Aldinucci and Massimo Torquati and Maurizio Drocco and Guilherme Peretti Pezzi and Concetto Spampinato},
url = {http://calvados.di.unipi.it/storage/talks/2014_S4585-Marco-Aldinucci.pdf},
year = {2014},
date = {2014-03-01},
booktitle = {GPU Technology Conference (GTC)},
address = {San Jose, CA, USA},
abstract = {Get an overview of FastFlow's parallel patterns can be used to design parallel applications for execution on both CPUs and GPGPUs while avoiding most of the complex low-level detail needed to make them efficient, portable and rapid to prototype. For a more detailed and technical review of FastFlow's parallel patterns as well as a use case where we will show the design and effectiveness of a novel universal image filtering template based on the variational approach.},
keywords = {fastflow, HPC, impact, paraphrase},
pubstate = {published},
tppubtype = {inproceedings}
}
Marco Aldinucci, Massimo Torquati, Concetto Spampinato, Maurizio Drocco, Claudia Misale, Cristina Calcagno, Mario Coppo
Parallel stochastic systems biology in the cloud Journal Article
In: Briefings in Bioinformatics, vol. 15, no. 5, pp. 798–813, 2014, ISSN: 1467-5463.
Abstract | Links | BibTeX | Tags: bioinformatics, fastflow, impact, paraphrase
@article{cwc:cloud:bib:13,
title = {Parallel stochastic systems biology in the cloud},
author = {Marco Aldinucci and Massimo Torquati and Concetto Spampinato and Maurizio Drocco and Claudia Misale and Cristina Calcagno and Mario Coppo},
url = {https://iris.unito.it/retrieve/handle/2318/140080/22528/FF_Cloud_briefings_final_submitted_copy.pdf},
doi = {10.1093/bib/bbt040},
issn = {1467-5463},
year = {2014},
date = {2014-01-01},
journal = {Briefings in Bioinformatics},
volume = {15},
number = {5},
pages = {798–813},
abstract = {The stochastic modelling of biological systems, coupled with Monte Carlo simulation of models, is an increasingly popular technique in bioinformatics. The simulation-analysis workflow may result computationally expensive reducing the interactivity required in the model tuning. In this work, we advocate the high-level software design as a vehicle for building efficient and portable parallel simulators for the cloud. In particular, the Calculus of Wrapped Components (CWC) simulator for systems biology, which is designed according to the FastFlow pattern-based approach, is presented and discussed. Thanks to the FastFlow framework, the CWC simulator is designed as a high-level workflow that can simulate CWC models, merge simulation results and statistically analyse them in a single parallel workflow in the cloud. To improve interactivity, successive phases are pipelined in such a way that the workflow begins to output a stream of analysis results immediately after simulation is started. Performance and effectiveness of the CWC simulator are validated on the Amazon Elastic Compute Cloud.},
keywords = {bioinformatics, fastflow, impact, paraphrase},
pubstate = {published},
tppubtype = {article}
}
Guilherme Peretti Pezzi, Evelyne Vaissié, Yann Viala, Denis Caromel, Philippe Gourbesville
Parallel Profiling of Water Distribution Networks Using the Clément Formula Proceedings Article
In: 4th European Seminar on Computing, 2014.
Abstract | BibTeX | Tags: impact
@inproceedings{pezzi-clement:14,
title = {Parallel Profiling of Water Distribution Networks Using the Clément Formula},
author = {Guilherme Peretti Pezzi and Evelyne Vaissié and Yann Viala and Denis Caromel and Philippe Gourbesville},
year = {2014},
date = {2014-01-01},
booktitle = {4th European Seminar on Computing},
abstract = {Optimization of water distribution is a crucial issue which has been targeted by many modelling tools. Useful models, implemented several decades ago, need to be updated and implemented in more powerful computing environments. This paper presents the distributed and redesigned version of a legacy hydraulic simulation software written in Fortran (IRMA) that has been used for over 30 years by the Societé du Canal de Provence in order to design and to maintain water distribution networks. IRMA was developed aiming mainly the treatment of irrigation networks – by using the Clément demand model and is now used to manage more than 6.000 km of piped networks. The growing complexity and size of networks requested to redesign the code by using modern tools and language (Java) and also to run distributed simulations by using the ProActive Parallel Suite.},
keywords = {impact},
pubstate = {published},
tppubtype = {inproceedings}
}
Marco Aldinucci, Maurizio Drocco, Guilherme Peretti Pezzi, Claudia Misale, Fabio Tordini, Massimo Torquati
Exercising high-level parallel programming on streams: a systems biology use case Proceedings Article
In: Proc. of 34th IEEE Intl. Conference on Distributed Computing Systems Workshops (ICDCSW), IEEE, Madrid, Spain, 2014.
Abstract | Links | BibTeX | Tags: bioinformatics, fastflow, impact, paraphrase
@inproceedings{cwc:gpu:dcperf:14,
title = {Exercising high-level parallel programming on streams: a systems biology use case},
author = {Marco Aldinucci and Maurizio Drocco and Guilherme Peretti Pezzi and Claudia Misale and Fabio Tordini and Massimo Torquati},
url = {https://iris.unito.it/retrieve/handle/2318/154516/26657/2014_dcperf_cwc_gpu.pdf},
doi = {10.1109/ICDCSW.2014.38},
year = {2014},
date = {2014-01-01},
booktitle = {Proc. of 34th IEEE Intl. Conference on Distributed Computing Systems Workshops (ICDCSW)},
publisher = {IEEE},
address = {Madrid, Spain},
abstract = {The stochastic modelling of biological systems, cou- pled with Monte Carlo simulation of models, is an increasingly popular technique in Bioinformatics. The simulation-analysis workflow may result into a computationally expensive task reducing the interactivity required in the model tuning. In this work, we advocate high-level software design as a vehicle for building efficient and portable parallel simulators for a variety of platforms, ranging from multi-core platforms to GPGPUs to cloud. In particular, the Calculus of Wrapped Compartments (CWC) parallel simulator for systems biology equipped with on- line mining of results, which is designed according to the FastFlow pattern-based approach, is discussed as a running example. In this work, the CWC simulator is used as a paradigmatic example of a complex C++ application where the quality of results is correlated with both computation and I/O bounds, and where high-quality results might turn into big data. The FastFlow parallel programming framework, which advocates C++ pattern- based parallel programming makes it possible to develop portable parallel code without relinquish neither run-time efficiency nor performance tuning opportunities. Performance and effectiveness of the approach are validated on a variety of platforms, inter-alia cache-coherent multi-cores, cluster of multi-core (Ethernet and Infiniband) and the Amazon Elastic Compute Cloud.},
keywords = {bioinformatics, fastflow, impact, paraphrase},
pubstate = {published},
tppubtype = {inproceedings}
}
Marco Aldinucci, Guilherme Peretti Pezzi, Maurizio Drocco, Fabio Tordini, Peter Kilpatrick, Massimo Torquati
Parallel video denoising on heterogeneous platforms Proceedings Article
In: Proc. of Intl. Workshop on High-level Programming for Heterogeneous and Hierarchical Parallel Systems (HLPGPU), 2014.
Abstract | Links | BibTeX | Tags: fastflow, impact, paraphrase
@inproceedings{ff:video:hlpgpu:14,
title = {Parallel video denoising on heterogeneous platforms},
author = {Marco Aldinucci and Guilherme Peretti Pezzi and Maurizio Drocco and Fabio Tordini and Peter Kilpatrick and Massimo Torquati},
url = {http://calvados.di.unipi.it/storage/paper_files/2014_ff_video_denoiser_hlpgpu.pdf},
year = {2014},
date = {2014-01-01},
booktitle = {Proc. of Intl. Workshop on High-level Programming for Heterogeneous and Hierarchical Parallel Systems (HLPGPU)},
abstract = {In this paper, a highly-effective parallel filter for video denoising is presented. The filter is designed using a skeletal approach, and has been implemented by way of the FastFlow parallel programming library. As a result of its high-level design, it is possible to run the filter seamlessly on a multi-core machine, on GPGPU(s), or on both. The design and the implementation of the filter are discussed, and an experimental evaluation is presented. Various mappings of the filtering stages are comparatively discussed.},
keywords = {fastflow, impact, paraphrase},
pubstate = {published},
tppubtype = {inproceedings}
}
Alessandro Secco, Irfan Uddin, Guilherme Peretti Pezzi, Massimo Torquati
Message passing on InfiniBand RDMA for parallel run-time supports Proceedings Article
In: Aldinucci, Marco, D'Agostino, Daniele, Kilpatrick, Peter (Ed.): Proc. of Intl. Euromicro PDP 2014: Parallel Distributed and network-based Processing, IEEE, Torino, Italy, 2014.
Abstract | Links | BibTeX | Tags: fastflow, impact, paraphrase
@inproceedings{ff:infiniband:pdp:14,
title = {Message passing on InfiniBand RDMA for parallel run-time supports},
author = {Alessandro Secco and Irfan Uddin and Guilherme Peretti Pezzi and Massimo Torquati},
editor = {Marco Aldinucci and Daniele D'Agostino and Peter Kilpatrick},
url = {https://iris.unito.it/retrieve/handle/2318/151178/690885/2014_ff_infiniband_pdp.pdf},
doi = {10.1109/PDP.2014.23},
year = {2014},
date = {2014-01-01},
booktitle = {Proc. of Intl. Euromicro PDP 2014: Parallel Distributed and network-based Processing},
publisher = {IEEE},
address = {Torino, Italy},
abstract = {InfiniBand networks are commonly used in the high performance computing area. They offer RDMA-based opera- tions that help to improve the performance of communication subsystems. In this paper, we propose a minimal message-passing communication layer providing the programmer with a point-to- point communication channel implemented by way of InfiniBand RDMA features. Differently from other libraries exploiting the InfiniBand features, such as the well-known Message Passing Interface (MPI), the proposed library is a communication layer only rather than a programming model, and can be easily used as building block for high-level parallel programming frameworks. Evaluated on micro-benchmarks, the proposed RDMA-based communication channel implementation achieves a comparable performance with highly optimised MPI/InfiniBand implemen- tations. Eventually, the flexibility of the communication layer is evaluated by integrating it within the FastFlow parallel frame- work, currently supporting TCP/IP networks (via the ZeroMQ communication library).},
keywords = {fastflow, impact, paraphrase},
pubstate = {published},
tppubtype = {inproceedings}
}
2012
Marco Aldinucci, Concetto Spampinato, Maurizio Drocco, Massimo Torquati, Simone Palazzo
A Parallel Edge Preserving Algorithm for Salt and Pepper Image Denoising Proceedings Article
In: Djemal, K., Deriche, M., Puech, W., Ucan, Osman N. (Ed.): Proc. of 2nd Intl. Conference on Image Processing Theory Tools and Applications (IPTA), pp. 97–102, IEEE, Istambul, Turkey, 2012, ISBN: 978-1-4673-2582-0.
Abstract | Links | BibTeX | Tags: fastflow, impact
@inproceedings{denoiser:ff:ipta:12,
title = {A Parallel Edge Preserving Algorithm for Salt and Pepper Image Denoising},
author = {Marco Aldinucci and Concetto Spampinato and Maurizio Drocco and Massimo Torquati and Simone Palazzo},
editor = {K. Djemal and M. Deriche and W. Puech and Osman N. Ucan},
url = {http://calvados.di.unipi.it/storage/paper_files/2012_2phasedenoiser_ff_ipta.pdf},
doi = {10.1109/IPTA.2012.6469567},
isbn = {978-1-4673-2582-0},
year = {2012},
date = {2012-10-01},
booktitle = {Proc. of 2nd Intl. Conference on Image Processing Theory Tools and Applications (IPTA)},
pages = {97–102},
publisher = {IEEE},
address = {Istambul, Turkey},
abstract = {In this paper a two-phase filter for removing ``salt and pepper'' noise is proposed. In the first phase, an adaptive median filter is used to identify the set of the noisy pixels; in the second phase, these pixels are restored according to a regularization method, which contains a data-fidelity term reflecting the impulse noise characteristics. The algorithm, which exhibits good performance both in denoising and in restoration, can be easily and effectively parallelized to exploit the full power of multi-core CPUs and GPGPUs; the proposed implementation based on the FastFlow library achieves both close-to-ideal speedup and very good wall-clock execution figures.},
keywords = {fastflow, impact},
pubstate = {published},
tppubtype = {inproceedings}
}