Papers | Parallel Computing
2017
Maurizio Drocco
Parallel Programming with Global Asynchronous Memory: Models, C++ APIs and Implementations PhD Thesis
Computer Science Department, University of Torino, 2017.
Abstract | Links | BibTeX | Tags: fastflow, paraphrase, repara, rephrase, toreador
@phdthesis{17:gam:drocco:thesis,
title = {Parallel Programming with Global Asynchronous Memory: Models, C++ APIs and Implementations},
author = {Maurizio Drocco},
url = {https://zenodo.org/record/1037585/files/Drocco_phd_thesis.pdf},
doi = {10.5281/zenodo.1037585},
year = {2017},
date = {2017-10-01},
school = {Computer Science Department, University of Torino},
abstract = {In the realm of High Performance Computing (HPC), message passing has been the programming paradigm of choice for over twenty years. The durable MPI (Message Passing Interface) standard, with send/receive communication, broadcast, gather/scatter, and reduction collectives is still used to construct parallel programs where each communication is orchestrated by the de-vel-oper-based precise knowledge of data distribution and overheads; collective communications simplify the orchestration but might induce excessive synchronization. Early attempts to bring shared-memory programming model—with its programming adv-antages—to distributed computing, referred as the Distributed Shared Memory (DSM) model, faded away; one of the main issue was to combine performance and programmability with the memory consistency model. The recently proposed Partitioned Global Address Space (PGAS) model is a modern revamp of DSM that exposes data placement to enable optimizations based on locality, but it still addresses (simple) data-parallelism only and it relies on expensive sharing protocols. We advocate an alternative programming model for distributed computing based on a Global Asynchronous Memory (GAM), aiming to emphavoid coherency and consistency problems rather than solving them. We materialize GAM by designing and implementing a emphdistributed smart pointers library, inspired by C++ smart pointers. In this model, public and private pointers (resembling C++ shared and unique pointers, respectively) are moved around instead of messages (i.e., data), thus alleviating the user from the burden of minimizing transfers. On top of smart pointers, we propose a high-level C++ template library for writing applications in terms of dataflow-like networks, namely GAM nets, consisting of stateful processors exchanging pointers in fully asynchronous fashion. We demonstrate the validity of the proposed approach, from the expressiveness perspective, by showing how GAM nets can be exploited to implement higher-level parallel programming models, such as data and task parallelism. As for the performance perspective, the execution of two non-toy benchmarks on a number of different small-scale HPC clusters exhibits both close-to-ideal scalability and negligible overhead with respect to state-of-the-art benchmark implementations. For instance, the GAM implementation of a high-quality video restoration filter sustains a 100 fps throughput over 70%-noisy high-quality video streams on a 4-node cluster of Graphics Processing Units (GPUs), with minimal programming effort.},
keywords = {fastflow, paraphrase, repara, rephrase, toreador},
pubstate = {published},
tppubtype = {phdthesis}
}
Claudia Misale
PiCo: A Domain-Specific Language for Data Analytics Pipelines PhD Thesis
Computer Science Department, University of Torino, 2017.
Abstract | Links | BibTeX | Tags: fastflow, paraphrase, repara, rephrase, toreador
@phdthesis{17:pico:misale:thesis,
title = {PiCo: A Domain-Specific Language for Data Analytics Pipelines},
author = {Claudia Misale},
url = {https://iris.unito.it/retrieve/handle/2318/1633743/320170/Misale_thesis.pdf},
doi = {10.5281/zenodo.579753},
year = {2017},
date = {2017-05-01},
school = {Computer Science Department, University of Torino},
abstract = {In the world of Big Data analytics, there is a series of tools aiming at simplifying programming applications to be executed on clusters. Although each tool claims to provide better programming, data and execution models—for which only informal (and often confusing) semantics is generally provided—all share a common under- lying model, namely, the Dataflow model. Using this model as a starting point, it is possible to categorize and analyze almost all aspects about Big Data analytics tools from a high level perspective. This analysis can be considered as a first step toward a formal model to be exploited in the design of a (new) framework for Big Data analytics. By putting clear separations between all levels of abstraction (i.e., from the runtime to the user API), it is easier for a programmer or software designer to avoid mixing low level with high level aspects, as we are often used to see in state-of-the-art Big Data analytics frameworks.
From the user-level perspective, we think that a clearer and simple semantics is preferable, together with a strong separation of concerns. For this reason, we use the Dataflow model as a starting point to build a programming environment with a simplified programming model implemented as a Domain-Specific Language, that is on top of a stack of layers that build a prototypical framework for Big Data analytics.
The contribution of this thesis is twofold: first, we show that the proposed model is (at least) as general as existing batch and streaming frameworks (e.g., Spark, Flink, Storm, Google Dataflow), thus making it easier to understand high-level data-processing applications written in such frameworks. As result of this analysis, we provide a layered model that can represent tools and applications following the Dataflow paradigm and we show how the analyzed tools fit in each level.
Second, we propose a programming environment based on such layered model in the form of a Domain-Specific Language (DSL) for processing data collections, called PiCo (Pipeline Composition). The main entity of this programming model is the Pipeline, basically a DAG-composition of processing elements. This model is intended to give the user an unique interface for both stream and batch processing, hiding completely data management and focusing only on operations, which are represented by Pipeline stages. Our DSL will be built on top of the FastFlow library, exploiting both shared and distributed parallelism, and implemented in C++11/14 with the aim of porting C++ into the Big Data world.},
keywords = {fastflow, paraphrase, repara, rephrase, toreador},
pubstate = {published},
tppubtype = {phdthesis}
}
From the user-level perspective, we think that a clearer and simple semantics is preferable, together with a strong separation of concerns. For this reason, we use the Dataflow model as a starting point to build a programming environment with a simplified programming model implemented as a Domain-Specific Language, that is on top of a stack of layers that build a prototypical framework for Big Data analytics.
The contribution of this thesis is twofold: first, we show that the proposed model is (at least) as general as existing batch and streaming frameworks (e.g., Spark, Flink, Storm, Google Dataflow), thus making it easier to understand high-level data-processing applications written in such frameworks. As result of this analysis, we provide a layered model that can represent tools and applications following the Dataflow paradigm and we show how the analyzed tools fit in each level.
Second, we propose a programming environment based on such layered model in the form of a Domain-Specific Language (DSL) for processing data collections, called PiCo (Pipeline Composition). The main entity of this programming model is the Pipeline, basically a DAG-composition of processing elements. This model is intended to give the user an unique interface for both stream and batch processing, hiding completely data management and focusing only on operations, which are represented by Pipeline stages. Our DSL will be built on top of the FastFlow library, exploiting both shared and distributed parallelism, and implemented in C++11/14 with the aim of porting C++ into the Big Data world.
2016
Marco Aldinucci, Sonia Campa, Marco Danelutto, Peter Kilpatrick, Massimo Torquati
Pool Evolution: A Parallel Pattern for Evolutionary and Symbolic Computing Journal Article
In: International Journal of Parallel Programming, vol. 44, no. 3, pp. 531–551, 2016, ISSN: 0885-7458.
Abstract | Links | BibTeX | Tags: fastflow, paraphrase, repara
@article{pool:ijpp:15,
title = {Pool Evolution: A Parallel Pattern for Evolutionary and Symbolic Computing},
author = {Marco Aldinucci and Sonia Campa and Marco Danelutto and Peter Kilpatrick and Massimo Torquati},
url = {https://iris.unito.it/retrieve/handle/2318/1522392/42139/2015_ff_pool_ijpp.pdf},
doi = {10.1007/s10766-015-0358-5},
issn = {0885-7458},
year = {2016},
date = {2016-01-01},
journal = {International Journal of Parallel Programming},
volume = {44},
number = {3},
pages = {531–551},
publisher = {Springer US},
abstract = {We introduce a new parallel pattern derived from a specific application domain and show how it turns out to have application beyond its domain of origin. The pool evolution pattern models the parallel evolution of a population subject to mutations and evolving in such a way that a given fitness function is optimized. The pattern has been demonstrated to be suitable for capturing and modeling the parallel patterns underpinning various evolutionary algorithms, as well as other parallel patterns typical of symbolic computation. In this paper we introduce the pattern, we discuss its implementation on modern multi/many core architectures and finally present experimental results obtained with FastFlow and Erlang implementations to assess its feasibility and scalability.},
keywords = {fastflow, paraphrase, repara},
pubstate = {published},
tppubtype = {article}
}
Andrea Bracciali, Marco Aldinucci, Murray Patterson, Tobias Marschall, Nadia Pisanti, Ivan Merelli, Massimo Torquati
pWhatsHap: efficient haplotyping for future generation sequencing Journal Article
In: BMC Bioinformatics, vol. 17, no. Suppl 11, pp. 342, 2016.
Abstract | Links | BibTeX | Tags: fastflow, paraphrase, rephrase
@article{16:pwhatshap:bmc,
title = {pWhatsHap: efficient haplotyping for future generation sequencing},
author = {Andrea Bracciali and Marco Aldinucci and Murray Patterson and Tobias Marschall and Nadia Pisanti and Ivan Merelli and Massimo Torquati},
url = {http://bmcbioinformatics.biomedcentral.com/track/pdf/10.1186/s12859-016-1170-y?site=bmcbioinformatics.biomedcentral.com},
doi = {10.1186/s12859-016-1170-y},
year = {2016},
date = {2016-01-01},
journal = {BMC Bioinformatics},
volume = {17},
number = {Suppl 11},
pages = {342},
abstract = {Background: Haplotype phasing is an important problem in the analysis of genomics information. Given a set of DNA fragments of an individual, it consists of determining which one of the possible alleles (alternative forms of a gene) each fragment comes from. Haplotype information is relevant to gene regulation, epigenetics, genome-wide association studies, evolutionary and population studies, and the study of mutations. Haplotyping is currently addressed as an optimisation problem aiming at solutions that minimise, for instance, error correction costs, where costs are a measure of the confidence in the accuracy of the information acquired from DNA sequencing. Solutions have typically an exponential computational complexity. WhatsHap is a recent optimal approach which moves computational complexity from DNA fragment length to fragment overlap, i.e., coverage, and is hence of particular interest when considering sequencing technology's current trends that are producing longer fragments. Results: Given the potential relevance of efficient haplotyping in several analysis pipelines, we have designed and engineered pWhatsHap, a parallel, high-performance version of WhatsHap. pWhatsHap is embedded in a toolkit developed in Python and supports genomics datasets in standard file formats. Building on WhatsHap, pWhatsHap exhibits the same complexity exploring a number of possible solutions which is exponential in the coverage of the dataset. The parallel implementation on multi-core architectures allows for a relevant reduction of the execution time for haplotyping, while the provided results enjoy the same high accuracy as that provided by WhatsHap, which increases with coverage. Conclusions: Due to its structure and management of the large datasets, the parallelisation of WhatsHap posed demanding technical challenges, which have been addressed exploiting a high-level parallel programming framework. The result, pWhatsHap, is a freely available toolkit that improves the efficiency of the analysis of genomics information.},
keywords = {fastflow, paraphrase, rephrase},
pubstate = {published},
tppubtype = {article}
}
2015
Fabio Tordini, Maurizio Drocco, Ivan Merelli, Luciano Milanesi, Pietro Liò, Marco Aldinucci
NuChart-II: a graph-based approach for the analysis and interpretation of Hi-C data Proceedings Article
In: Serio, Clelia Di, Liò, Pietro, Nonis, Alessandro, Tagliaferri, Roberto (Ed.): Proc. of 11th Intl. Meeting on Computational Intelligence Methods for Bioinformatics and Biostatistics (CIBB), pp. 298–311, Springer, Cambridge, UK, 2015, ISBN: 978-3-319-24461-7.
Abstract | Links | BibTeX | Tags: bioinformatics, fastflow, paraphrase, repara
@inproceedings{14:ff:nuchart:cibb,
title = {NuChart-II: a graph-based approach for the analysis and interpretation of Hi-C data},
author = {Fabio Tordini and Maurizio Drocco and Ivan Merelli and Luciano Milanesi and Pietro Liò and Marco Aldinucci},
editor = {Clelia Di Serio and Pietro Liò and Alessandro Nonis and Roberto Tagliaferri},
url = {http://calvados.di.unipi.it/storage/paper_files/2014_nuchart_cibb.pdf},
doi = {10.1007/978-3-319-24462-4_25},
isbn = {978-3-319-24461-7},
year = {2015},
date = {2015-06-01},
booktitle = {Proc. of 11th Intl. Meeting on Computational Intelligence Methods for Bioinformatics and Biostatistics (CIBB)},
volume = {8623},
pages = {298–311},
publisher = {Springer},
address = {Cambridge, UK},
series = {LNCS},
abstract = {Long-range chromosomal associations between genomic regions, and their repositioning in the 3D space of the nucleus, are now considered to be key contributors to the regulation of gene expressions, and important links have been highlighted with other genomic features involved in DNA rearrangements. Recent Chromosome Conformation Capture (3C) measurements performed with high throughput sequencing (Hi-C) and molecular dynamics studies show that there is a large correlation between co-localization and co-regulation of genes, but these important researches are hampered by the lack of biologists-friendly analysis and visualisation software. In this work we present NuChart-II, a software that allows the user to annotate and visualize a list of input genes with information relying on Hi-C data, integrating knowledge data about genomic features that are involved in the chromosome spatial organization. This software works directly with sequenced reads to identify related Hi-C fragments, with the aim of creating gene-centric neighbourhood graphs on which multi-omics features can be mapped. NuChart-II is a highly optimized implementation of a previous prototype package developed in R, in which the graph-based representation of Hi-C data was tested. The prototype showed inevitable problems of scalability while working genome-wide on large datasets: particular attention has been paid in optimizing the data structures employed while constructing the neighbourhood graph, so as to foster an efficient parallel implementation of the software. The normalization of Hi-C data has been modified and improved, in order to provide a reliable estimation of proximity likelihood for the genes.},
keywords = {bioinformatics, fastflow, paraphrase, repara},
pubstate = {published},
tppubtype = {inproceedings}
}
Maurizio Drocco, Claudia Misale, Guilherme Peretti Pezzi, Fabio Tordini, Marco Aldinucci
Memory-Optimised Parallel Processing of Hi-C Data Proceedings Article
In: Proc. of 23rd Euromicro Intl. Conference on Parallel Distributed and network-based Processing (PDP), pp. 1–8, IEEE, 2015.
Abstract | Links | BibTeX | Tags: bioinformatics, fastflow, impact, paraphrase, repara
@inproceedings{nuchart:speedup:15,
title = {Memory-Optimised Parallel Processing of Hi-C Data},
author = {Maurizio Drocco and Claudia Misale and Guilherme Peretti Pezzi and Fabio Tordini and Marco Aldinucci},
url = {https://iris.unito.it/retrieve/handle/2318/1521910/40615/2015_pdp_memopt.pdf},
doi = {10.1109/PDP.2015.63},
year = {2015},
date = {2015-03-01},
booktitle = {Proc. of 23rd Euromicro Intl. Conference on Parallel Distributed and network-based Processing (PDP)},
pages = {1–8},
publisher = {IEEE},
abstract = {This paper presents the optimisation efforts on the creation of a graph-based mapping representation of gene adjacency. The method is based on the Hi-C process, starting from Next Generation Sequencing data, and it analyses a huge amount of static data in order to produce maps for one or more genes. Straightforward parallelisation of this scheme does not yield acceptable performance on multicore architectures since the scalability is rather limited due to the memory bound nature of the problem. This work focuses on the memory optimisations that can be applied to the graph construction algorithm and its (complex) data structures to derive a cache-oblivious algorithm and eventually to improve the memory bandwidth utilisation. We used as running example NuChart-II, a tool for annotation and statistic analysis of Hi-C data that creates a gene-centric neighborhood graph. The proposed approach, which is exemplified for Hi-C, addresses several common issue in the parallelisation of memory bound algorithms for multicore. Results show that the proposed approach is able to increase the parallel speedup from 7x to 22x (on a 32-core platform). Finally, the proposed C++ implementation outperforms the first R NuChart prototype, by which it was not possible to complete the graph generation because of strong memory-saturation problems.},
keywords = {bioinformatics, fastflow, impact, paraphrase, repara},
pubstate = {published},
tppubtype = {inproceedings}
}
Fabio Tordini, Maurizio Drocco, Claudia Misale, Luciano Milanesi, Pietro Liò, Ivan Merelli, Marco Aldinucci
Parallel Exploration of the Nuclear Chromosome Conformation with NuChart-II Proceedings Article
In: Proc. of 23rd Euromicro Intl. Conference on Parallel Distributed and network-based Processing (PDP), IEEE, 2015.
Abstract | Links | BibTeX | Tags: bioinformatics, fastflow, impact, paraphrase, repara
@inproceedings{nuchar:tool:15,
title = {Parallel Exploration of the Nuclear Chromosome Conformation with NuChart-II},
author = {Fabio Tordini and Maurizio Drocco and Claudia Misale and Luciano Milanesi and Pietro Liò and Ivan Merelli and Marco Aldinucci},
url = {https://iris.unito.it/retrieve/handle/2318/1522038/40619/2015_pdp_nuchartff.pdf},
doi = {10.1109/PDP.2015.104},
year = {2015},
date = {2015-03-01},
booktitle = {Proc. of 23rd Euromicro Intl. Conference on Parallel Distributed and network-based Processing (PDP)},
publisher = {IEEE},
abstract = {High-throughput molecular biology techniques are widely used to identify physical interactions between genetic elements located throughout the human genome. Chromosome Conformation Capture (3C) and other related techniques allow to investigate the spatial organisation of chromosomes in the cell's natural state. Recent results have shown that there is a large correlation between co-localization and co-regulation of genes, but these important information are hampered by the lack of biologists-friendly analysis and visualisation software. In this work we introduce NuChart-II, a tool for Hi-C data analysis that provides a gene-centric view of the chromosomal neighbour- hood in a graph-based manner. NuChart-II is an efficient and highly optimized C++ re-implementation of a previous prototype package developed in R. Representing Hi-C data using a graph-based approach overcomes the common view relying on genomic coordinates and permits the use of graph analysis techniques to explore the spatial conformation of a gene neighbourhood.},
keywords = {bioinformatics, fastflow, impact, paraphrase, repara},
pubstate = {published},
tppubtype = {inproceedings}
}
Marco Aldinucci, Guilherme Peretti Pezzi, Maurizio Drocco, Concetto Spampinato, Massimo Torquati
Parallel Visual Data Restoration on Multi-GPGPUs using Stencil-Reduce Pattern Journal Article
In: International Journal of High Performance Computing Applications, vol. 29, no. 4, pp. 461–472, 2015.
Abstract | Links | BibTeX | Tags: fastflow, HPC, impact, paraphrase
@article{ff:denoiser:ijhpca:15,
title = {Parallel Visual Data Restoration on Multi-GPGPUs using Stencil-Reduce Pattern},
author = {Marco Aldinucci and Guilherme Peretti Pezzi and Maurizio Drocco and Concetto Spampinato and Massimo Torquati},
url = {https://iris.unito.it/retrieve/handle/2318/1522073/299200/ijhpca_4aperto.pdf},
doi = {10.1177/1094342014567907},
year = {2015},
date = {2015-01-01},
journal = {International Journal of High Performance Computing Applications},
volume = {29},
number = {4},
pages = {461–472},
abstract = {In this paper, a highly effective parallel filter for visual data restoration is presented. The filter is designed following a skeletal approach, using a newly proposed stencil-reduce, and has been implemented by way of the FastFlow parallel programming library. As a result of its high-level design, it is possible to run the filter seamlessly on a multicore machine, on multi-GPGPUs, or on both. The design and implementation of the filter are discussed, and an experimental evaluation is presented.},
keywords = {fastflow, HPC, impact, paraphrase},
pubstate = {published},
tppubtype = {article}
}
2014
Marco Aldinucci, Sonia Campa, Marco Danelutto, Peter Kilpatrick, Massimo Torquati
Pool evolution: a domain specific parallel pattern Proceedings Article
In: Proc.of the 7th Intl. Symposium on High-level Parallel Programming and Applications (HLPP), Amsterdam, The Netherlands, 2014.
Abstract | Links | BibTeX | Tags: fastflow, paraphrase, repara
@inproceedings{2014:ff:pool:hlpp,
title = {Pool evolution: a domain specific parallel pattern},
author = {Marco Aldinucci and Sonia Campa and Marco Danelutto and Peter Kilpatrick and Massimo Torquati},
url = {http://calvados.di.unipi.it/storage/paper_files/2014_hlpp_pool.pdf},
year = {2014},
date = {2014-07-01},
booktitle = {Proc.of the 7th Intl. Symposium on High-level Parallel Programming and Applications (HLPP)},
address = {Amsterdam, The Netherlands},
abstract = {We introduce a new parallel pattern derived from a specific application domain and show how it turns out to have application beyond its domain of origin. The pool evolution pattern models the parallel evolution of a population subject to mutations and evolving in such a way that a given fitness function is optimized. The pattern has been demonstrated to be suitable for capturing and modeling the parallel patterns underpinning various evolutionary algorithms, as well as other parallel patterns typical of symbolic computation. In this paper we introduce the pattern, developed in the framework of the ParaPhrase EU-funded FP7 project, we discuss its implementation on modern multi/many core architectures and finally present experimental results obtained with FastFlow and Erlang implementations to assess its feasibility and scalability.},
keywords = {fastflow, paraphrase, repara},
pubstate = {published},
tppubtype = {inproceedings}
}
Marco Aldinucci, Massimo Torquati, Maurizio Drocco, Guilherme Peretti Pezzi, Concetto Spampinato
FastFlow: Combining Pattern-Level Abstraction and Efficiency in GPGPUs Proceedings Article
In: GPU Technology Conference (GTC), San Jose, CA, USA, 2014.
Abstract | Links | BibTeX | Tags: fastflow, HPC, impact, paraphrase
@inproceedings{ff:gtc:2014,
title = {FastFlow: Combining Pattern-Level Abstraction and Efficiency in GPGPUs},
author = {Marco Aldinucci and Massimo Torquati and Maurizio Drocco and Guilherme Peretti Pezzi and Concetto Spampinato},
url = {http://calvados.di.unipi.it/storage/talks/2014_S4729-Marco-Aldinucci.pdf},
year = {2014},
date = {2014-03-01},
booktitle = {GPU Technology Conference (GTC)},
address = {San Jose, CA, USA},
abstract = {Learn how FastFlow's parallel patterns can be used to design parallel applications for execution on both CPUs and GPGPUs while avoiding most of the complex low-level detail needed to make them efficient, portable and rapid to prototype. As use case, we will show the design and effectiveness of a novel universal image filtering template based on the variational approach.},
keywords = {fastflow, HPC, impact, paraphrase},
pubstate = {published},
tppubtype = {inproceedings}
}
Marco Aldinucci, Massimo Torquati, Maurizio Drocco, Guilherme Peretti Pezzi, Concetto Spampinato
An Overview of FastFlow: Combining Pattern-Level Abstraction and Efficiency in GPGPUs Proceedings Article
In: GPU Technology Conference (GTC), San Jose, CA, USA, 2014.
Abstract | Links | BibTeX | Tags: fastflow, HPC, impact, paraphrase
@inproceedings{ff:gtc:2014:short,
title = {An Overview of FastFlow: Combining Pattern-Level Abstraction and Efficiency in GPGPUs},
author = {Marco Aldinucci and Massimo Torquati and Maurizio Drocco and Guilherme Peretti Pezzi and Concetto Spampinato},
url = {http://calvados.di.unipi.it/storage/talks/2014_S4585-Marco-Aldinucci.pdf},
year = {2014},
date = {2014-03-01},
booktitle = {GPU Technology Conference (GTC)},
address = {San Jose, CA, USA},
abstract = {Get an overview of FastFlow's parallel patterns can be used to design parallel applications for execution on both CPUs and GPGPUs while avoiding most of the complex low-level detail needed to make them efficient, portable and rapid to prototype. For a more detailed and technical review of FastFlow's parallel patterns as well as a use case where we will show the design and effectiveness of a novel universal image filtering template based on the variational approach.},
keywords = {fastflow, HPC, impact, paraphrase},
pubstate = {published},
tppubtype = {inproceedings}
}
Marco Aldinucci, Massimo Torquati, Concetto Spampinato, Maurizio Drocco, Claudia Misale, Cristina Calcagno, Mario Coppo
Parallel stochastic systems biology in the cloud Journal Article
In: Briefings in Bioinformatics, vol. 15, no. 5, pp. 798–813, 2014, ISSN: 1467-5463.
Abstract | Links | BibTeX | Tags: bioinformatics, fastflow, impact, paraphrase
@article{cwc:cloud:bib:13,
title = {Parallel stochastic systems biology in the cloud},
author = {Marco Aldinucci and Massimo Torquati and Concetto Spampinato and Maurizio Drocco and Claudia Misale and Cristina Calcagno and Mario Coppo},
url = {https://iris.unito.it/retrieve/handle/2318/140080/22528/FF_Cloud_briefings_final_submitted_copy.pdf},
doi = {10.1093/bib/bbt040},
issn = {1467-5463},
year = {2014},
date = {2014-01-01},
journal = {Briefings in Bioinformatics},
volume = {15},
number = {5},
pages = {798–813},
abstract = {The stochastic modelling of biological systems, coupled with Monte Carlo simulation of models, is an increasingly popular technique in bioinformatics. The simulation-analysis workflow may result computationally expensive reducing the interactivity required in the model tuning. In this work, we advocate the high-level software design as a vehicle for building efficient and portable parallel simulators for the cloud. In particular, the Calculus of Wrapped Components (CWC) simulator for systems biology, which is designed according to the FastFlow pattern-based approach, is presented and discussed. Thanks to the FastFlow framework, the CWC simulator is designed as a high-level workflow that can simulate CWC models, merge simulation results and statistically analyse them in a single parallel workflow in the cloud. To improve interactivity, successive phases are pipelined in such a way that the workflow begins to output a stream of analysis results immediately after simulation is started. Performance and effectiveness of the CWC simulator are validated on the Amazon Elastic Compute Cloud.},
keywords = {bioinformatics, fastflow, impact, paraphrase},
pubstate = {published},
tppubtype = {article}
}
Marco Aldinucci, Sonia Campa, Marco Danelutto, Peter Kilpatrick, Massimo Torquati
Design patterns percolating to parallel programming framework implementation Journal Article
In: International Journal of Parallel Programming, vol. 42, no. 6, pp. 1012–1031, 2014, ISSN: 0885-7458.
Abstract | Links | BibTeX | Tags: fastflow, paraphrase
@article{ijpp:patterns:13,
title = {Design patterns percolating to parallel programming framework implementation},
author = {Marco Aldinucci and Sonia Campa and Marco Danelutto and Peter Kilpatrick and Massimo Torquati},
url = {https://iris.unito.it/retrieve/handle/2318/140069/22527/2013_ijpp_patterns-web_4aperto_1238811.pdf},
doi = {10.1007/s10766-013-0273-6},
issn = {0885-7458},
year = {2014},
date = {2014-01-01},
journal = {International Journal of Parallel Programming},
volume = {42},
number = {6},
pages = {1012–1031},
abstract = {Structured parallel programming is recognised as a viable and effective means of tackling parallel programming problems. Recently, a set of simple and powerful parallel building blocks (RISC-pb2l) has been proposed to support modelling and implementation of parallel frameworks. In this work we demonstrate how that same parallel building block set may be used to model both general purpose parallel programming abstractions, not usually listed in classical skeleton sets, and more specialized domain specific parallel patterns. We show how an implementation of RISC-pb2l can be realised via the FastFlow framework and present experimental evidence of the feasibility and efficiency of the approach.},
keywords = {fastflow, paraphrase},
pubstate = {published},
tppubtype = {article}
}
Marco Aldinucci, Salvatore Ruggieri, Massimo Torquati
Decision Tree Building on Multi-Core using FastFlow Journal Article
In: Concurrency and Computation: Practice and Experience, vol. 26, no. 3, pp. 800–820, 2014.
Abstract | Links | BibTeX | Tags: fastflow, paraphrase
@article{yadtff:ccpe:13,
title = {Decision Tree Building on Multi-Core using FastFlow},
author = {Marco Aldinucci and Salvatore Ruggieri and Massimo Torquati},
url = {https://iris.unito.it/retrieve/handle/2318/139522/118602/yadtff-j.pdf},
doi = {10.1002/cpe.3063},
year = {2014},
date = {2014-01-01},
journal = {Concurrency and Computation: Practice and Experience},
volume = {26},
number = {3},
pages = {800–820},
abstract = {The whole computer hardware industry embraced multi-core. The extreme optimisation of sequential algorithms is then no longer sufficient to squeeze the real machine power, which can be only exploited via thread-level parallelism. Decision tree algorithms exhibit natural concurrency that makes them suitable to be parallelised. This paper presents an in-depth study of the parallelisation of an implementation of the C4.5 algorithm for multi-core architectures. We characterise elapsed time lower bounds for the forms of parallelisations adopted, and achieve close to optimal performances. Our implementation is based on the FastFlow parallel programming environment and it requires minimal changes to the original sequential code.},
keywords = {fastflow, paraphrase},
pubstate = {published},
tppubtype = {article}
}
Marco Aldinucci, Cristina Calcagno, Mario Coppo, Ferruccio Damiani, Maurizio Drocco, Eva Sciacca, Salvatore Spinella, Massimo Torquati, Angelo Troina
On designing multicore-aware simulators for systems biology endowed with on-line statistics Journal Article
In: BioMed Research International, 2014.
Abstract | Links | BibTeX | Tags: bioinformatics, fastflow, paraphrase
@article{cwcsim:ff:multicore:biomed:14,
title = {On designing multicore-aware simulators for systems biology endowed with on-line statistics},
author = {Marco Aldinucci and Cristina Calcagno and Mario Coppo and Ferruccio Damiani and Maurizio Drocco and Eva Sciacca and Salvatore Spinella and Massimo Torquati and Angelo Troina},
url = {http://downloads.hindawi.com/journals/bmri/2014/207041.pdf},
doi = {10.1155/2014/207041},
year = {2014},
date = {2014-01-01},
journal = {BioMed Research International},
abstract = {The paper arguments are on enabling methodologies for the design of a fully parallel, online, interactive tool aiming to support the bioinformatics scientists .In particular, the features of these methodologies, supported by the FastFlow parallel programming framework, are shown on a simulation tool to perform the modeling, the tuning, and the sensitivity analysis of stochastic biological models. A stochastic simulation needs thousands of independent simulation trajectories turning into big data that should be analysed by statistic and data mining tools. In the considered approach the two stages are pipelined in such a way that the simulation stage streams out the partial results of all simulation trajectories to the analysis stage that immediately produces a partial result. The simulation-analysis workflow is validated for performance and effectiveness of the online analysis in capturing biological systems behavior on a multicore platform and representative proof-of-concept biological systems. The exploited methodologies include pattern-based parallel programming and data streaming that provide key features to the software designers such as performance portability and efficient in-memory (big) data management and movement. Two paradigmatic classes of biological systems exhibiting multistable and oscillatory behavior are used as a testbed.},
keywords = {bioinformatics, fastflow, paraphrase},
pubstate = {published},
tppubtype = {article}
}
Marco Aldinucci, Maurizio Drocco, Guilherme Peretti Pezzi, Claudia Misale, Fabio Tordini, Massimo Torquati
Exercising high-level parallel programming on streams: a systems biology use case Proceedings Article
In: Proc. of 34th IEEE Intl. Conference on Distributed Computing Systems Workshops (ICDCSW), IEEE, Madrid, Spain, 2014.
Abstract | Links | BibTeX | Tags: bioinformatics, fastflow, impact, paraphrase
@inproceedings{cwc:gpu:dcperf:14,
title = {Exercising high-level parallel programming on streams: a systems biology use case},
author = {Marco Aldinucci and Maurizio Drocco and Guilherme Peretti Pezzi and Claudia Misale and Fabio Tordini and Massimo Torquati},
url = {https://iris.unito.it/retrieve/handle/2318/154516/26657/2014_dcperf_cwc_gpu.pdf},
doi = {10.1109/ICDCSW.2014.38},
year = {2014},
date = {2014-01-01},
booktitle = {Proc. of 34th IEEE Intl. Conference on Distributed Computing Systems Workshops (ICDCSW)},
publisher = {IEEE},
address = {Madrid, Spain},
abstract = {The stochastic modelling of biological systems, cou- pled with Monte Carlo simulation of models, is an increasingly popular technique in Bioinformatics. The simulation-analysis workflow may result into a computationally expensive task reducing the interactivity required in the model tuning. In this work, we advocate high-level software design as a vehicle for building efficient and portable parallel simulators for a variety of platforms, ranging from multi-core platforms to GPGPUs to cloud. In particular, the Calculus of Wrapped Compartments (CWC) parallel simulator for systems biology equipped with on- line mining of results, which is designed according to the FastFlow pattern-based approach, is discussed as a running example. In this work, the CWC simulator is used as a paradigmatic example of a complex C++ application where the quality of results is correlated with both computation and I/O bounds, and where high-quality results might turn into big data. The FastFlow parallel programming framework, which advocates C++ pattern- based parallel programming makes it possible to develop portable parallel code without relinquish neither run-time efficiency nor performance tuning opportunities. Performance and effectiveness of the approach are validated on a variety of platforms, inter-alia cache-coherent multi-cores, cluster of multi-core (Ethernet and Infiniband) and the Amazon Elastic Compute Cloud.},
keywords = {bioinformatics, fastflow, impact, paraphrase},
pubstate = {published},
tppubtype = {inproceedings}
}
Marco Aldinucci, Guilherme Peretti Pezzi, Maurizio Drocco, Fabio Tordini, Peter Kilpatrick, Massimo Torquati
Parallel video denoising on heterogeneous platforms Proceedings Article
In: Proc. of Intl. Workshop on High-level Programming for Heterogeneous and Hierarchical Parallel Systems (HLPGPU), 2014.
Abstract | Links | BibTeX | Tags: fastflow, impact, paraphrase
@inproceedings{ff:video:hlpgpu:14,
title = {Parallel video denoising on heterogeneous platforms},
author = {Marco Aldinucci and Guilherme Peretti Pezzi and Maurizio Drocco and Fabio Tordini and Peter Kilpatrick and Massimo Torquati},
url = {http://calvados.di.unipi.it/storage/paper_files/2014_ff_video_denoiser_hlpgpu.pdf},
year = {2014},
date = {2014-01-01},
booktitle = {Proc. of Intl. Workshop on High-level Programming for Heterogeneous and Hierarchical Parallel Systems (HLPGPU)},
abstract = {In this paper, a highly-effective parallel filter for video denoising is presented. The filter is designed using a skeletal approach, and has been implemented by way of the FastFlow parallel programming library. As a result of its high-level design, it is possible to run the filter seamlessly on a multi-core machine, on GPGPU(s), or on both. The design and the implementation of the filter are discussed, and an experimental evaluation is presented. Various mappings of the filtering stages are comparatively discussed.},
keywords = {fastflow, impact, paraphrase},
pubstate = {published},
tppubtype = {inproceedings}
}
Claudia Misale
Accelerating Bowtie2 with a lock-less concurrency approach and memory affinity Proceedings Article
In: Aldinucci, Marco, D'Agostino, Daniele, Kilpatrick, Peter (Ed.): Proc. of Intl. Euromicro PDP 2014: Parallel Distributed and network-based Processing, IEEE, Torino, Italy, 2014, ((Best paper award)).
Abstract | Links | BibTeX | Tags: fastflow, paraphrase
@inproceedings{ff:bowtie2:pdp:14,
title = {Accelerating Bowtie2 with a lock-less concurrency approach and memory affinity},
author = {Claudia Misale},
editor = {Marco Aldinucci and Daniele D'Agostino and Peter Kilpatrick},
url = {http://calvados.di.unipi.it/storage/paper_files/2014_pdp_bowtieff.pdf},
doi = {10.1109/PDP.2014.50},
year = {2014},
date = {2014-01-01},
booktitle = {Proc. of Intl. Euromicro PDP 2014: Parallel Distributed and network-based Processing},
publisher = {IEEE},
address = {Torino, Italy},
abstract = {The implementation of DNA alignment tools for Bioinformatics lead to face different problems that dip into performances. A single alignment takes an amount of time that is not predictable and there are different factors that can affect performances, for instance the length of sequences can determine the computational grain of the task and mismatches or insertion/deletion (indels) increase time needed to complete an alignment. Moreover, an alignment is a strong memory- bound problem because of the irregular memory access pat- terns and limitations in memory-bandwidth. Over the years, many alignment tools were implemented. A concrete example is Bowtie2, one of the fastest (concurrent, Pthread-based) and state of the art not GPU-based alignment tool. Bowtie2 exploits concurrency by instantiating a pool of threads, which have access to a global input dataset, share the reference genome and have access to different objects for collecting alignment results. In this paper a modified implementation of Bowtie2 is presented, in which the concurrency structure has been changed. The proposed implementation exploits the task-farm skeleton pattern implemented as a Master-Worker. The Master-Worker pattern permits to delegate only to the Master thread dataset reading and to make private to each Worker data structures that are shared in the original version. Only the reference genome is left shared. As a further optimisation, the Master and each Worker were pinned on cores and the reference genome was allocated interleaved among memory nodes. The proposed implementation is able to gain up to 10 speedup points over the original implementation.},
note = {(Best paper award)},
keywords = {fastflow, paraphrase},
pubstate = {published},
tppubtype = {inproceedings}
}
Alessandro Secco, Irfan Uddin, Guilherme Peretti Pezzi, Massimo Torquati
Message passing on InfiniBand RDMA for parallel run-time supports Proceedings Article
In: Aldinucci, Marco, D'Agostino, Daniele, Kilpatrick, Peter (Ed.): Proc. of Intl. Euromicro PDP 2014: Parallel Distributed and network-based Processing, IEEE, Torino, Italy, 2014.
Abstract | Links | BibTeX | Tags: fastflow, impact, paraphrase
@inproceedings{ff:infiniband:pdp:14,
title = {Message passing on InfiniBand RDMA for parallel run-time supports},
author = {Alessandro Secco and Irfan Uddin and Guilherme Peretti Pezzi and Massimo Torquati},
editor = {Marco Aldinucci and Daniele D'Agostino and Peter Kilpatrick},
url = {https://iris.unito.it/retrieve/handle/2318/151178/690885/2014_ff_infiniband_pdp.pdf},
doi = {10.1109/PDP.2014.23},
year = {2014},
date = {2014-01-01},
booktitle = {Proc. of Intl. Euromicro PDP 2014: Parallel Distributed and network-based Processing},
publisher = {IEEE},
address = {Torino, Italy},
abstract = {InfiniBand networks are commonly used in the high performance computing area. They offer RDMA-based opera- tions that help to improve the performance of communication subsystems. In this paper, we propose a minimal message-passing communication layer providing the programmer with a point-to- point communication channel implemented by way of InfiniBand RDMA features. Differently from other libraries exploiting the InfiniBand features, such as the well-known Message Passing Interface (MPI), the proposed library is a communication layer only rather than a programming model, and can be easily used as building block for high-level parallel programming frameworks. Evaluated on micro-benchmarks, the proposed RDMA-based communication channel implementation achieves a comparable performance with highly optimised MPI/InfiniBand implemen- tations. Eventually, the flexibility of the communication layer is evaluated by integrating it within the FastFlow parallel frame- work, currently supporting TCP/IP networks (via the ZeroMQ communication library).},
keywords = {fastflow, impact, paraphrase},
pubstate = {published},
tppubtype = {inproceedings}
}
Claudia Misale, Giulio Ferrero, Massimo Torquati, Marco Aldinucci
Sequence alignment tools: one parallel pattern to rule them all? Journal Article
In: BioMed Research International, 2014.
Abstract | Links | BibTeX | Tags: bioinformatics, fastflow, paraphrase, repara
@article{bowtie-bwa:ff:multicore:biomed:14,
title = {Sequence alignment tools: one parallel pattern to rule them all?},
author = {Claudia Misale and Giulio Ferrero and Massimo Torquati and Marco Aldinucci},
url = {http://downloads.hindawi.com/journals/bmri/2014/539410.pdf},
doi = {10.1155/2014/539410},
year = {2014},
date = {2014-01-01},
journal = {BioMed Research International},
abstract = {In this paper we advocate high-level programming methodology for Next Generation Sequencers (NGS) alignment tools for both productivity and absolute performance. We analyse the problem of parallel alignment and review the parallelisation strategies of the most popular alignment tools, which can all be abstracted to a single parallel paradigm. We compare these tools against their porting onto the FastFlow pattern-based programming framework, which provides programmers with high-level parallel patterns. By using a high-level approach, programmers are liberated from all complex aspects of parallel programming, such as synchronisation protocols and task scheduling, gaining more possibility for seamless performance tuning. In this work we show some use case in which, by using a high-level approach for parallelising NGS tools, it is possible to obtain comparable or even better absolute performance for all used datasets.},
keywords = {bioinformatics, fastflow, paraphrase, repara},
pubstate = {published},
tppubtype = {article}
}
2013
Marco Aldinucci, Marco Danelutto, Peter Kilpatrick, Carlo Montangero, Laura Semini
Managing Adaptivity in Parallel Systems Book Section
In: Beckert, Bernhard, Damiani, Ferruccio, Boer, Frank S., Bonsangue, Marcello M. (Ed.): Formal Methods for Components and Objects: Intl. Symposium, FMCO 2011, Torino, Italy, October 3-5, 2011, Revised Invited Lectures, vol. 7542, pp. 199–217, Springer, 2013, ISBN: 978-3-642-35886-9.
Abstract | Links | BibTeX | Tags: paraphrase
@incollection{adaptivity:fmco:11,
title = {Managing Adaptivity in Parallel Systems},
author = {Marco Aldinucci and Marco Danelutto and Peter Kilpatrick and Carlo Montangero and Laura Semini},
editor = {Bernhard Beckert and Ferruccio Damiani and Frank S. Boer and Marcello M. Bonsangue},
url = {http://calvados.di.unipi.it/storage/paper_files/2013_fmco11_adaptivity.pdf},
doi = {10.1007/978-3-642-35887-6_11},
isbn = {978-3-642-35886-9},
year = {2013},
date = {2013-01-01},
booktitle = {Formal Methods for Components and Objects: Intl. Symposium, FMCO 2011, Torino, Italy, October 3-5, 2011, Revised Invited Lectures},
volume = {7542},
pages = {199–217},
publisher = {Springer},
series = {LNCS},
abstract = {The management of non-functional features (performance, security, power management, etc.) is traditionally a difficult, error prone task for programmers of parallel applications. To take care of these non-functional features, autonomic managers running policies represented as rules using sensors and actuators to monitor and transform a running parallel application may be used. We discuss an approach aimed at providing formal tool support to the integration of independently developed autonomic managers taking care of different non-functional concerns within the same parallel application. Our approach builds on the Behavioural Skeleton experience (autonomic management of non-functional features in structured parallel applications) and on previous results on conflict detection and resolution in rule-based systems.},
keywords = {paraphrase},
pubstate = {published},
tppubtype = {incollection}
}
Marco Aldinucci, Sonia Campa, Peter Kilpatrick, Massimo Torquati
Structured Data Access Annotations for Massively Parallel Computations Proceedings Article
In: Euro-Par 2012 Workshops, Proc. of the ParaPhrase Workshop on Parallel Processing, pp. 381–390, Springer, 2013.
Abstract | Links | BibTeX | Tags: fastflow, paraphrase
@inproceedings{annotation:para:12,
title = {Structured Data Access Annotations for Massively Parallel Computations},
author = {Marco Aldinucci and Sonia Campa and Peter Kilpatrick and Massimo Torquati},
url = {http://calvados.di.unipi.it/storage/paper_files/2013_annot_europar_workshops.pdf},
doi = {10.1007/978-3-642-36949-0_42},
year = {2013},
date = {2013-01-01},
booktitle = {Euro-Par 2012 Workshops, Proc. of the ParaPhrase Workshop on Parallel Processing},
volume = {7640},
pages = {381–390},
publisher = {Springer},
series = {LNCS},
abstract = {We describe an approach aimed at addressing the issue of joint exploitation of control (stream) and data parallelism in a skele-ton based parallel programming environment, based on annotations and refactoring. Annotations drive efficient implementation of a parallel com-putation. Refactoring is used to transform the associated skeleton tree into a more efficient, functionally equivalent skeleton tree. In most cases,cost models are used to drive the refactoring process. We show howsample use case applications/kernels may be optimized and discuss pre-liminary experiments with FastFlow assessing the theoretical results.},
keywords = {fastflow, paraphrase},
pubstate = {published},
tppubtype = {inproceedings}
}
Marco Aldinucci, Sonia Campa, Marco Danelutto, Peter Kilpatrick, Massimo Torquati
Targeting Distributed Systems in FastFlow Proceedings Article
In: Euro-Par 2012 Workshops, Proc. of the CoreGrid Workshop on Grids, Clouds and P2P Computing, pp. 47–56, Springer, 2013.
Abstract | Links | BibTeX | Tags: fastflow, paraphrase
@inproceedings{ff:distr:cgs:12,
title = {Targeting Distributed Systems in FastFlow},
author = {Marco Aldinucci and Sonia Campa and Marco Danelutto and Peter Kilpatrick and Massimo Torquati},
url = {http://calvados.di.unipi.it/storage/paper_files/2012_distr_ff_cgsymph.pdf},
doi = {10.1007/978-3-642-36949-0_7},
year = {2013},
date = {2013-01-01},
booktitle = {Euro-Par 2012 Workshops, Proc. of the CoreGrid Workshop on Grids, Clouds and P2P Computing},
volume = {7640},
pages = {47–56},
publisher = {Springer},
series = {LNCS},
abstract = {FastFlow is a structured parallel programming framework targeting shared memory multi-core architectures. In this paper we introduce a FastFlow extension aimed at supporting a network of multi-core workstation as well. The extension supports the execution of FastFlow programs by coordinating – in a structured way – the fine grain parallel activities running on a single workstation. We discuss the design and the implementation of this extension presenting preliminary experimental results validating it on state-of-the-art networked multi-core nodes.},
keywords = {fastflow, paraphrase},
pubstate = {published},
tppubtype = {inproceedings}
}
Kevin Hammond, Marco Aldinucci, Chris Brown, Francesco Cesarini, Marco Danelutto, Horacio González-Vélez, Peter Kilpatrick, Rainer Keller, Michael Rossbory, Gilad Shainer
The ParaPhrase Project: Parallel Patterns for Adaptive Heterogeneous Multicore Systems Book Section
In: Beckert, Bernhard, Damiani, Ferruccio, Boer, Frank S., Bonsangue, Marcello M. (Ed.): Formal Methods for Components and Objects: Intl. Symposium, FMCO 2011, Torino, Italy, October 3-5, 2011, Revised Invited Lectures, vol. 7542, pp. 218–236, Springer, 2013, ISBN: 978-3-642-35886-9.
Abstract | Links | BibTeX | Tags: paraphrase
@incollection{paraphrase:fmco:11,
title = {The ParaPhrase Project: Parallel Patterns for Adaptive Heterogeneous Multicore Systems},
author = {Kevin Hammond and Marco Aldinucci and Chris Brown and Francesco Cesarini and Marco Danelutto and Horacio González-Vélez and Peter Kilpatrick and Rainer Keller and Michael Rossbory and Gilad Shainer},
editor = {Bernhard Beckert and Ferruccio Damiani and Frank S. Boer and Marcello M. Bonsangue},
url = {http://calvados.di.unipi.it/storage/paper_files/2013_fmco11_paraphrase.pdf},
doi = {10.1007/978-3-642-35887-6_12},
isbn = {978-3-642-35886-9},
year = {2013},
date = {2013-01-01},
booktitle = {Formal Methods for Components and Objects: Intl. Symposium, FMCO 2011, Torino, Italy, October 3-5, 2011, Revised Invited Lectures},
volume = {7542},
pages = {218–236},
publisher = {Springer},
series = {LNCS},
abstract = {This paper describes the ParaPhrase project, a new 3-year targeted research project funded under EU Framework 7 Objective 3.4 (Computer Systems), starting in October 2011. ParaPhrase aims to follow a new approach to introducing parallelism using advanced refactoring techniques coupled with high-level parallel design patterns. The refactoring approach will use these design patterns to restructure programs defined as networks of software components into other forms that are more suited to parallel execution. The programmer will be aided by high-level cost information that will be integrated into the refactoring tools. The implementation of these patterns will then use a well-understood algorithmic skeleton approach to achieve good parallelism. A key ParaPhrase design goal is that parallel components are intended to match heterogeneous architectures, defined in terms of CPU/GPU combinations, for example. In order to achieve this, the ParaPhrase approach will map components at link time to the available hardware, and will then re-map them during program execution, taking account of multiple applications, changes in hardware resource availability, the desire to reduce communication costs etc. In this way, we aim to develop a new approach to programming that will be able to produce software that can adapt to dynamic changes in the system environment. Moreover, by using a strong component basis for parallelism, we can achieve potentially significant gains in terms of reducing sharing at a high level of abstraction, and so in reducing or even eliminating the costs that are usually associated with cache management, locking, and synchronisation.},
keywords = {paraphrase},
pubstate = {published},
tppubtype = {incollection}
}
Marco Aldinucci, Sonia Campa, Fabio Tordini, Massimo Torquati, Peter Kilpatrick
An abstract annotation model for skeletons Book Section
In: Beckert, Bernhard, Damiani, Ferruccio, Boer, Frank S., Bonsangue, Marcello M. (Ed.): Formal Methods for Components and Objects: Intl. Symposium, FMCO 2011, Torino, Italy, October 3-5, 2011, Revised Invited Lectures, vol. 7542, pp. 257–276, Springer, 2013, ISBN: 978-3-642-35886-9.
Abstract | Links | BibTeX | Tags: fastflow, paraphrase
@incollection{toolchain:fmco:11,
title = {An abstract annotation model for skeletons},
author = {Marco Aldinucci and Sonia Campa and Fabio Tordini and Massimo Torquati and Peter Kilpatrick},
editor = {Bernhard Beckert and Ferruccio Damiani and Frank S. Boer and Marcello M. Bonsangue},
url = {http://calvados.di.unipi.it/storage/paper_files/2013_fmco11_annotation.pdf},
doi = {10.1007/978-3-642-35887-6_14},
isbn = {978-3-642-35886-9},
year = {2013},
date = {2013-01-01},
booktitle = {Formal Methods for Components and Objects: Intl. Symposium, FMCO 2011, Torino, Italy, October 3-5, 2011, Revised Invited Lectures},
volume = {7542},
pages = {257–276},
publisher = {Springer},
series = {LNCS},
abstract = {Multi-core and many-core platforms are becoming increasingly heterogeneous and asymmetric. This significantly increases the porting and tuning effort required for parallel codes, which in turn often leads to a growing gap between peak machine power and actual application performance. In this work a first step toward the automated optimization of high level skeleton-based parallel code is discussed. The paper presents an abstract annotation model for skeleton programs aimed at formally describing suitable mapping of parallel activities on a high-level platform representation. The derived mapping and scheduling strategies are used to generate optimized run-time code.},
keywords = {fastflow, paraphrase},
pubstate = {published},
tppubtype = {incollection}
}
2012
Marco Aldinucci, Marco Danelutto, Peter Kilpatrick, Massimiliano Meneghin, Massimo Torquati
An Efficient Unbounded Lock-Free Queue for Multi-core Systems Proceedings Article
In: Proc. of 18th Intl. Euro-Par 2012 Parallel Processing, pp. 662–673, Springer, Rhodes Island, Greece, 2012.
Abstract | Links | BibTeX | Tags: fastflow, paraphrase
@inproceedings{ff:spsc:europar:12,
title = {An Efficient Unbounded Lock-Free Queue for Multi-core Systems},
author = {Marco Aldinucci and Marco Danelutto and Peter Kilpatrick and Massimiliano Meneghin and Massimo Torquati},
url = {http://calvados.di.unipi.it/storage/paper_files/2012_spsc_europar.pdf},
doi = {10.1007/978-3-642-32820-6_65},
year = {2012},
date = {2012-08-01},
booktitle = {Proc. of 18th Intl. Euro-Par 2012 Parallel Processing},
volume = {7484},
pages = {662–673},
publisher = {Springer},
address = {Rhodes Island, Greece},
series = {LNCS},
abstract = {The use of efficient synchronization mechanisms is crucial for implementing fine grained parallel programs on modern shared cache multi-core architectures. In this paper we study this problem by considering Single-Producer/Single-Consumer (SPSC) coordination using unbounded queues. A novel unbounded SPSC algorithm capable of reducing the row synchronization latency and speeding up Producer-Consumer coordination is presented. The algorithm has been extensively tested on a shared-cache multi-core platform and a sketch proof of correctness is presented. The queues proposed have been used as basic building blocks to implement the FastFlow parallel framework, which has been demonstrated to offer very good performance for fine-grain parallel applications.},
keywords = {fastflow, paraphrase},
pubstate = {published},
tppubtype = {inproceedings}
}
Marco Aldinucci, Marco Danelutto, Peter Kilpatrick, Massimo Torquati
Targeting heterogeneous architectures via macro data flow Journal Article
In: Parallel Processing Letters, vol. 22, no. 2, 2012, ISSN: 0129-6264.
Abstract | Links | BibTeX | Tags: fastflow, paraphrase
@article{mdf:hplgpu:ppl:12,
title = {Targeting heterogeneous architectures via macro data flow},
author = {Marco Aldinucci and Marco Danelutto and Peter Kilpatrick and Massimo Torquati},
url = {http://calvados.di.unipi.it/storage/paper_files/2012_mdf_PPL-hplgpu.pdf},
doi = {10.1142/S0129626412400063},
issn = {0129-6264},
year = {2012},
date = {2012-06-01},
journal = {Parallel Processing Letters},
volume = {22},
number = {2},
abstract = {We propose a data flow based run time system as an efficient tool for supporting execution of parallel code on heterogeneous architectures hosting both multicore CPUs and GPUs. We discuss how the proposed run time system may be the target of both structured parallel applications developed using algorithmic skeletons/parallel design patterns and also more ``domain specific'' programming models. Experimental results demonstrating the feasibility of the approach are presented.},
keywords = {fastflow, paraphrase},
pubstate = {published},
tppubtype = {article}
}