Projects | Parallel Computing
ICSC This work was supported by the following two projects: the Spoke 1 “FutureHPC & BigData” of the ICSC – Centro Nazionale di Ricerca in “High-Performance Computing, Big Data and Quantum Computing,” funded by European Union – NextGenerationEU
Publications
2024
Gianluca Mittone, Giulio Malenza, Marco Aldinucci, Robert Birke
Distributed Edge Inference: an Experimental Study on Multiview Detection Proceedings Article
In: Proc. of the 16th IEEE/ACM Intl. Conference on Utility and Cloud Computing Companion (UCC), pp. 1-6, ACM, Taormina, Italy, 2024, (eupilot, icsc).
Abstract | Links | BibTeX | Tags: ai, eupilot, icsc
@inproceedings{23:mittone:multiview,
title = {Distributed Edge Inference: an Experimental Study on Multiview Detection},
author = {Gianluca Mittone and Giulio Malenza and Marco Aldinucci and Robert Birke},
url = {https://iris.unito.it/handle/2318/1950083},
doi = {10.1145/3603166.3632561},
year = {2024},
date = {2024-12-01},
booktitle = {Proc. of the 16th IEEE/ACM Intl. Conference on Utility and Cloud Computing Companion (UCC)},
volume = {30},
pages = {1-6},
publisher = {ACM},
address = {Taormina, Italy},
institution = {Computer Science Department, University of Torino},
abstract = {Computing is evolving rapidly to cater to the increasing demand for sophisticated services, and Cloud computing lays a solid foundation for flexible on-demand provisioning. However, as the size of applications grows, the centralised client-server approach used by Cloud computing increasingly limits the applications' scalability. To achieve ultra-scalability, cloud/edge/fog computing converges into the compute continuum, completely decentralising the infrastructure to encompass universal, pervasive resources. The compute continuum makes devising applications benefitting from this complex environment a challenging research problem. We put the opportunities the compute continuum others to the test through a real-world multi-view detection model (MvDet) implemented with the FastFL C/C++ high-performance edge inference framework. Computational performance is discussed considering many experimental scenarios, encompassing different edge computational capabilities and network bandwidths. We obtain up to 1.92x speedup in inference time over a centralised solution using the same devices.},
note = {eupilot, icsc},
keywords = {ai, eupilot, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Bruno Casella, Alessio Barbaro Chisari, Marco Aldinucci, Sebastiano Battiato, Mario Valerio Giuffrida
Federated Learning in a Semi-Supervised Environment for Earth Observation Data Proceedings Article
In: Proceedings of the 32nd European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning, ESANN, Bruges, Belgium, 2024.
Abstract | Links | BibTeX | Tags: ai, epi, icsc
@inproceedings{24:casella:fedrec,
title = {Federated Learning in a Semi-Supervised Environment for Earth Observation Data},
author = {Bruno Casella and Alessio Barbaro Chisari and Marco Aldinucci and Sebastiano Battiato and Mario Valerio Giuffrida},
url = {https://iris.unito.it/retrieve/a798d7b8-6b98-48c2-92f4-327d2aaa8788/ES2024-214.pdf},
doi = {10.14428/esann/2024.es2024-214},
year = {2024},
date = {2024-10-01},
booktitle = {Proceedings of the 32nd European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning, ESANN},
address = {Bruges, Belgium},
abstract = {We propose FedRec, a federated learning workflow taking advantage of unlabelled data in a semi-supervised environment to assist in the training of a supervised aggregated model. In our proposed method, an encoder architecture extracting features from unlabelled data is aggregated with the feature extractor of a classification model via weight averaging. The fully connected layers of the supervised models are also averaged in a federated fashion. We show the effectiveness of our approach by comparing it with the state-of-the-art federated algorithm, an isolated and a centralised baseline, on novel cloud detection datasets.},
keywords = {ai, epi, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Bruno Casella, Jakobs Matthias, Marco Aldinucci, Sebastian Buschjager
Federated Time Series Classification with ROCKET features Proceedings Article
In: Proceedings of the 32nd European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning, ESANN, Bruges, Belgium, 2024.
Abstract | Links | BibTeX | Tags: ai, epi, icsc
@inproceedings{24:casella:frocks,
title = {Federated Time Series Classification with ROCKET features},
author = {Bruno Casella and Jakobs Matthias and Marco Aldinucci and Sebastian Buschjager},
url = {https://iris.unito.it/retrieve/51b63fc1-3e22-4ad4-8926-84af69cde739/ES2024-61.pdf},
doi = {10.14428/esann/2024.es2024-61},
year = {2024},
date = {2024-10-01},
booktitle = {Proceedings of the 32nd European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning, ESANN},
address = {Bruges, Belgium},
abstract = {This paper proposes FROCKS, a federated time series classification method using ROCKET features. Our approach dynamically adapts the models’ features by selecting and exchanging the best-performing ROCKET kernels from a federation of clients. Specifically, the server gathers the best-performing kernels of the clients together with the associated model parameters, and it performs a weighted average if a kernel is best-performing for more than one client. We compare the proposed method with state-of-the-art approaches on the UCR archive binary classification datasets and show superior performance on most datasets.},
keywords = {ai, epi, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Samuele Fonio, Mirko Polato, Roberto Esposito
FedHP: Federated Learning with Hyperspherical Prototypical Regularization Proceedings Article
In: 32nd European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning, (ESANN), Bruges, Belgium, 2024.
Abstract | Links | BibTeX | Tags: ai, icsc
@inproceedings{24:esann:fonio:fedhp,
title = {FedHP: Federated Learning with Hyperspherical Prototypical Regularization},
author = {Samuele Fonio and Mirko Polato and Roberto Esposito},
url = {https://www.esann.org/sites/default/files/proceedings/2024/ES2024-183.pdf},
year = {2024},
date = {2024-10-01},
booktitle = {32nd European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning, (ESANN)},
address = {Bruges, Belgium},
abstract = {This paper presents FedHP, an algorithm that amalgamates federated learning, hyperspherical geometries, and prototype learning. Federated Learning (FL) has garnered attention as a privacy-preserving method for constructing robust models across distributed datasets. Traditionally, FL involves exchanging model parameters to uphold data privacy; however, in scenarios with costly data communication, exchanging large neural net- work models becomes impractical. In such instances, prototype learning provides a feasible solution by necessitating the exchange of a few class prototypes instead of entire deep learning models. Motivated by these considerations, our approach leverages recent advancements in prototype learning, particularly the benefits offered by non-Euclidean geometries. Alongside introducing FedHP, we provide empirical evidence demonstrat- ing its comparable performance to other state-of-the-art approaches while significantly reducing communication costs.},
keywords = {ai, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Simone Leo, Michael R. Crusoe, Laura Rodríguez-Navas, Raül Sirvent, Alexander Kanitz, Paul De Geest, Rudolf Wittner, Luca Pireddu, Daniel Garijo, José M. Fernández, Iacopo Colonnelli, Matej Gallo, Tazro Ohta, Hirotaka Suetake, Salvador Capella-Gutierrez, Renske Wit, Bruno P. Kinoshita, Stian Soiland-Reyes
Recording provenance of workflow runs with RO-Crate Journal Article
In: PLoS ONE, vol. 19, no. 9, pp. 1–35, 2024.
Abstract | Links | BibTeX | Tags: across, eupex, icsc, streamflow
@article{24:pone:wfrunrocrate,
title = {Recording provenance of workflow runs with RO-Crate},
author = {Simone Leo and Michael R. Crusoe and Laura Rodríguez-Navas and Raül Sirvent and Alexander Kanitz and Paul De Geest and Rudolf Wittner and Luca Pireddu and Daniel Garijo and José M. Fernández and Iacopo Colonnelli and Matej Gallo and Tazro Ohta and Hirotaka Suetake and Salvador Capella-Gutierrez and Renske Wit and Bruno P. Kinoshita and Stian Soiland-Reyes},
url = {https://iris.unito.it/retrieve/d261a069-1afb-4384-88e8-97d62b183b55/journal.pone.0309210.pdf},
doi = {10.1371/journal.pone.0309210},
year = {2024},
date = {2024-09-01},
journal = {PLoS ONE},
volume = {19},
number = {9},
pages = {1–35},
publisher = {Public Library of Science},
abstract = {Recording the provenance of scientific computation results is key to the support of traceability, reproducibility and quality assessment of data products. Several data models have been explored to address this need, providing representations of workflow plans and their executions as well as means of packaging the resulting information for archiving and sharing. However, existing approaches tend to lack interoperable adoption across workflow management systems. In this work we present Workflow Run RO-Crate, an extension of RO-Crate (Research Object Crate) and Schema.org to capture the provenance of the execution of computational workflows at different levels of granularity and bundle together all their associated objects (inputs, outputs, code, etc.). The model is supported by a diverse, open community that runs regular meetings, discussing development, maintenance and adoption aspects. Workflow Run RO-Crate is already implemented by several workflow management systems, allowing interoperable comparisons between workflow runs from heterogeneous systems. We describe the model, its alignment to standards such as W3C PROV, and its implementation in six workflow systems. Finally, we illustrate the application of Workflow Run RO-Crate in two use cases of machine learning in the digital image analysis domain.},
keywords = {across, eupex, icsc, streamflow},
pubstate = {published},
tppubtype = {article}
}
Iacopo Colonnelli, Doriana Medić, Alberto Mulone, Viviana Bono, Luca Padovani, Marco Aldinucci
Introducing SWIRL: An Intermediate Representation Language for Scientific Workflows Proceedings Article
In: Platzer, André, Rozier, Kristin Yvonne, Pradella, Matteo, Rossi, Matteo (Ed.): Formal Methods. FM 2024, pp. 226–244, Springer Nature Switzerland, Milano, Italy, 2024.
Abstract | Links | BibTeX | Tags: eupex, icsc
@inproceedings{24:fm:swirl,
title = {Introducing SWIRL: An Intermediate Representation Language for Scientific Workflows},
author = {Iacopo Colonnelli and Doriana Medić and Alberto Mulone and Viviana Bono and Luca Padovani and Marco Aldinucci},
editor = {André Platzer and Kristin Yvonne Rozier and Matteo Pradella and Matteo Rossi},
url = {https://iris.unito.it/retrieve/b39a6f09-a8d3-4974-abf6-c109916694fa/PDFEditoriale.pdf},
doi = {10.1007/978-3-031-71162-6_12},
year = {2024},
date = {2024-09-01},
booktitle = {Formal Methods. FM 2024},
volume = {14933},
pages = {226–244},
publisher = {Springer Nature Switzerland},
address = {Milano, Italy},
series = {Lecture Notes in Computer Science},
abstract = {In the ever-evolving landscape of scientific computing, properly supporting the modularity and complexity of modern scientific applications requires new approaches to workflow execution, like seamless interoperability between different workflow systems, distributed-by-design workflow models, and automatic optimisation of data movements. In order to address this need, this article introduces SWIRL, an intermediate representation language for scientific workflows. In contrast with other product-agnostic workflow languages, SWIRL is not designed for human interaction but to serve as a low-level compilation target for distributed workflow execution plans. The main advantages of SWIRL semantics are low-level primitives based on the send/receive programming model and a formal framework ensuring the consistency of the semantics and the specification of translating workflow models represented by Directed Acyclic Graphs (DAGs) into SWIRL workflow descriptions. Additionally, SWIRL offers rewriting rules designed to optimise execution traces, accompanied by corresponding equivalence. An open-source SWIRL compiler toolchain has been developed using the ANTLR Python3 bindings.},
keywords = {eupex, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Alberto Mulone, Doriana Medić, Marco Aldinucci
A Fault Tolerance mechanism for Hybrid Scientific Workflows Proceedings Article
In: 1st workshop about High-Performance e-Science (HiPES), Madrid, Spain, 2024.
Abstract | BibTeX | Tags: eupex, icsc, streamflow
@inproceedings{24:madrid:hipes,
title = {A Fault Tolerance mechanism for Hybrid Scientific Workflows},
author = {Alberto Mulone and Doriana Medić and Marco Aldinucci},
year = {2024},
date = {2024-08-01},
booktitle = {1st workshop about High-Performance e-Science (HiPES)},
address = {Madrid, Spain},
abstract = {In large distributed systems, failures are a daily event occurring frequently, especially with growing numbers of computation tasks and locations on which they are deployed. The advantage of representing an application as a workflow is possibility to utilize the Workflow Management Systems which are reliable systems guaranteeing the correct execution of the application and providing the features such as portability, scalability, and fault tolerance. Over recent years, the emergence of hybrid workflows has posed new and intriguing challenges by increasing the possibility of distributing computations involving heterogeneous and independent environments. As a consequence, the number of possible points of failure in the execution augmented, creating different important challenges interesting to study.},
keywords = {eupex, icsc, streamflow},
pubstate = {published},
tppubtype = {inproceedings}
}
Alessia Antelmi, Massimo Torquati, Giacomo Corridori, Daniele Gregori, Francesco Polzella, Gianmarco Spinatelli, Marco Aldinucci
Analyzing FOSS license usage in publicly available software at scale via the SWH-analytics framework Journal Article
In: The Journal of Supercomputing, vol. 80, no. 11, pp. 15799-15833, 2024, ISSN: 1573-0484.
Abstract | Links | BibTeX | Tags: analytics, icsc
@article{Antelmi_JSUPE_2024,
title = {Analyzing FOSS license usage in publicly available software at scale via the SWH-analytics framework},
author = {Alessia Antelmi and Massimo Torquati and Giacomo Corridori and Daniele Gregori and Francesco Polzella and Gianmarco Spinatelli and Marco Aldinucci},
url = {https://doi.org/10.1007/s11227-024-06069-x},
doi = {10.1007/s11227-024-06069-x},
issn = {1573-0484},
year = {2024},
date = {2024-07-01},
journal = {The Journal of Supercomputing},
volume = {80},
number = {11},
pages = {15799-15833},
abstract = {The Software Heritage (SWH) dataset represents an invaluable source of open-source code as it aims to collect, preserve, and share all publicly available software in source code form ever produced by humankind. Although designed to archive deduplicated small files thanks to the use of a Merkle tree as the underlying data structure, querying the SWH dataset presents challenges due to the nature of these structures, which organize content based on hash values rather than any locality principle. The magnitude of the repository, coupled with the resource-intensive nature of the download process, highlights the need for specialized infrastructure and computational resources to effectively handle and study the extensive dataset housed within SWH. Currently, there is a lack of infrastructures specifically tailored for running analytics on the SWH dataset, leaving users to handle these issues manually. To address these challenges, we implemented the SWH-Analytics (SWHA) framework, a development environment that transparently runs custom analytic applications on publicly available software data preserved over time by SWH. Specifically, this work shows how SWHA can be effectively exploited to study usage patterns of free and open-source software licenses, highlighting the need to improve license literacy among developers.},
keywords = {analytics, icsc},
pubstate = {published},
tppubtype = {article}
}
Miruna Bețianu, Abele Mălan, Marco Aldinucci, Robert Birke, Lydia Chen
DALLMi: Domain Adaption for LLM-based Multi-label Classifier Proceedings Article
In: Yang, De-Nian, Xie, Xing, Tseng, Vincent S., Pei, Jian, Huang, Jen-Wei, Lin, Jerry Chun-Wei (Ed.): Proceedings of the 28th Pacific-Asia Conference on Knowledge Discovery and Data Mining, pp. 277–289, Springer, Taipei, Taiwan, 2024.
Abstract | Links | BibTeX | Tags: ai, eupilot, icsc
@inproceedings{24:betianu:llm,
title = {DALLMi: Domain Adaption for LLM-based Multi-label Classifier},
author = {Miruna Bețianu and Abele Mălan and Marco Aldinucci and Robert Birke and Lydia Chen},
editor = {De-Nian Yang and Xing Xie and Vincent S. Tseng and Jian Pei and Jen-Wei Huang and Jerry Chun-Wei Lin},
url = {https://hdl.handle.net/2318/1976672},
doi = {10.1007/978-981-97-2259-4_21},
year = {2024},
date = {2024-05-01},
booktitle = {Proceedings of the 28th Pacific-Asia Conference on Knowledge Discovery and Data Mining},
volume = {14647},
pages = {277–289},
publisher = {Springer},
address = {Taipei, Taiwan},
series = {Lecture Notes in Computer Science},
abstract = {Large language models (LLMs) increasingly serve as the backbone for classifying text associated with distinct domains and simultaneously several labels (classes). When encountering domain shifts, e.g., classifier of movie reviews from IMDb to Rotten Tomatoes, adapting such an LLM-based multi-label classifier is challenging due to incomplete label sets at the target domain and daunting training overhead. The existing domain adaptation methods address either image multi-label classifiers or text binary classifiers. In this paper, we design DALLMi, Domain Adaptation Large Language Model interpolator, a first-of-its-kind semi-supervised domain adaptation method for text data models based on LLMs, specifically BERT. The core of DALLMi is the novel variation loss and MixUp regularization, which jointly leverage the limited positively labeled and large quantity of unlabeled text and, importantly, their interpolation from the BERT word embeddings. DALLMi also introduces a label-balanced sampling strategy to overcome the imbalance between labeled and unlabeled data. We evaluate DALLMi against the partial-supervised and unsupervised approach on three datasets under different scenarios of label availability for the target domain. Our results show that DALLMi achieves higher mAP than unsupervised and partially-supervised approaches by 19.9% and 52.2%, respectively.},
keywords = {ai, eupilot, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Chi Hong, Robert Birke, Pin-Yu Chen, Lydia Chen
On Dark Knowledge for Distilling Generators Proceedings Article
In: Yang, De-Nian, Xie, Xing, Tseng, Vincent S., Pei, Jian, Huang, Jen-Wei, Lin, Jerry Chun-Wei (Ed.): Proceedings of the 28th Pacific-Asia Conference on Knowledge Discovery and Data Mining, pp. 235–247, Springer, Taipei, Taiwan, 2024.
Abstract | Links | BibTeX | Tags: ai, epi, icsc
@inproceedings{24:chen:llm,
title = {On Dark Knowledge for Distilling Generators},
author = {Chi Hong and Robert Birke and Pin-Yu Chen and Lydia Chen},
editor = {De-Nian Yang and Xing Xie and Vincent S. Tseng and Jian Pei and Jen-Wei Huang and Jerry Chun-Wei Lin},
url = {https://hdl.handle.net/2318/1976671},
doi = {10.1007/978-981-97-2253-2_19},
year = {2024},
date = {2024-05-01},
booktitle = {Proceedings of the 28th Pacific-Asia Conference on Knowledge Discovery and Data Mining},
volume = {14646},
pages = {235–247},
publisher = {Springer},
address = {Taipei, Taiwan},
series = {Lecture Notes in Computer Science},
abstract = {Knowledge distillation has been applied on generative models, such as Variational Autoencoder (VAE) and Generative Adversarial Networks (GANs). To distill the knowledge, the synthetic outputs of a teacher generator are used to train a student model. While the dark knowledge, i.e., the probabilistic output, is well explored in distilling classifiers, little is known about the existence of an equivalent dark knowledge for generative models and its extractability. In this paper, we derive the first kind of empirical risk bound for distilling generative models from a Bayesian perspective. Through our analysis, we show the existence of the dark knowledge for generative models, i.e., Bayes probability distribution of a synthetic output from a given input, which achieves lower empirical risk bound than merely using the synthetic output of the generators. Furthermore, we propose a Dark Knowledge based Distillation , DKtill, which trains the student generator based on the (approximate) dark knowledge. Our extensive evaluation on distilling VAE, conditional GANs, and translation GANs on Facades and CelebA datasets show that the FID of student generators trained by DKtill combining dark knowledge are lower than student generators trained only by the synthetic outputs by up to 42.66%, and 78.99%, respectively.},
keywords = {ai, epi, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Bruno Casella, Iacopo Colonnelli, Gianluca Mittone, Robert Birke, Walter Riviera, Antonio Sciarappa, Carlo Cavazzoni, Marco Aldinucci
A Performance Analysis for Confidential Federated Learning Proceedings Article
In: Proceedings of the 2024 Deep Learning Security and Privacy Workshop, IEEE Symposium on Security and Privacy 2024, San Francisco, CA, 2024.
Abstract | Links | BibTeX | Tags: ai, confidential, epi, icsc
@inproceedings{24:casella:sgx,
title = {A Performance Analysis for Confidential Federated Learning},
author = {Bruno Casella and Iacopo Colonnelli and Gianluca Mittone and Robert Birke and Walter Riviera and Antonio Sciarappa and Carlo Cavazzoni and Marco Aldinucci},
url = {https://iris.unito.it/retrieve/b5877a97-2d8d-4e95-8791-0aa4a1b953b3/DLSP___CONFIDENTIAL_FL.pdf},
doi = {10.1109/SPW63631.2024.00009},
year = {2024},
date = {2024-05-01},
booktitle = {Proceedings of the 2024 Deep Learning Security and Privacy Workshop, IEEE Symposium on Security and Privacy 2024},
address = {San Francisco, CA},
abstract = {Federated Learning (FL) has emerged as a solution to preserve data privacy by keeping the data locally on each participant's device. However, FL alone is still vulnerable to attacks that can cause privacy leaks. Therefore, it becomes necessary to take additional security measures at the cost of increasing runtimes. The Trusted Execution Environment (TEE) approach promises to offer the highest degree of security during execution. However, TEEs suffer from memory limits which prevent safe end-to-end FL training of modern deep models. State-of- the-art approaches limit secure training to selected layers, failing to avert the full spectrum of attacks or adopt layer-wise training affecting model performance. We benchmark the usage of a library OS (LibOS) to run the full, unmodified end-to-end FL training inside the TEE. We extensively evaluate and model the overhead of the different security mechanisms needed to protect the data and model during computation (TEE), communication (TLS), and storage (disk encryption). The obtained results across three datasets and two models demonstrate that LibOSes are a viable way to seamlessly inject security into FL with limited overhead (at most 2x), offering valuable guidance for researchers and developers aiming to apply FL in data-security-focused contexts.},
keywords = {ai, confidential, epi, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Giulio Malenza, Valentina Cesare, Marco Aldinucci, Ugo Becciani, Alberto Vecchiato
Toward HPC application portability via C++ PSTL: the Gaia AVU-GSR code assessment Journal Article
In: The Journal of Supercomputing, 2024, ISSN: 09208542.
Abstract | Links | BibTeX | Tags: eupex, HPC, icsc
@article{24:jsupe:Gaia,
title = {Toward HPC application portability via C++ PSTL: the Gaia AVU-GSR code assessment},
author = {Giulio Malenza and Valentina Cesare and Marco Aldinucci and Ugo Becciani and Alberto Vecchiato},
doi = {10.1007/s11227-024-06011-1},
issn = {09208542},
year = {2024},
date = {2024-03-01},
journal = {The Journal of Supercomputing},
publisher = {Springer},
abstract = {The computing capacity needed to process the data generated in modern scientific experiments is approaching ExaFLOPs. Currently, achieving such performances is only feasible through GPU-accelerated supercomputers. Different languages were developed to program GPUs at different levels of abstraction. Typically, the more abstract the languages, the more portable they are across different GPUs. However, the less abstract and co-designed with the hardware, the more room for code optimization and, eventually, the more performance. In the HPC context, portability and performance are a fairly traditional dichotomy. The current C++ Parallel Standard Template Library (PSTL) has the potential to go beyond this dichotomy. In this work, we analyze the main performance benefits and limitations of PSTL using as a use-case the Gaia Astrometric Verification Unit-Global Sphere Reconstruction parallel solver developed by the European Space Agency Gaia mission. The code aims to find the astrometric parameters of $$sim10^8$$stars in the Milky Way by iteratively solving a linear system of equations with the LSQR algorithm, originally GPU-ported with the CUDA language. We show that the performance obtained with the PSTL version, which is intrinsically more portable than CUDA, is comparable to the CUDA one on NVIDIA GPU architecture.},
keywords = {eupex, HPC, icsc},
pubstate = {published},
tppubtype = {article}
}
Marco Edoardo Santimaria, Samuele Fonio, Giulio Malenza, Iacopo Colonnelli, Marco Aldinucci
Benchmarking Parallelization Models through Karmarkar Interior-point method Proceedings Article
In: Chis, Horacio González-Vélez Adriana E. (Ed.): Proc. of 32nd Euromicro intl. Conference on Parallel, Distributed and Network-based Processing (PDP), pp. 1-8, IEEE, Dublin, Ireland, 2024, ISSN: 2377-5750.
Abstract | Links | BibTeX | Tags: HPC, icsc
@inproceedings{24:pdp:karmarkar,
title = {Benchmarking Parallelization Models through Karmarkar Interior-point method},
author = {Marco Edoardo Santimaria and Samuele Fonio and Giulio Malenza and Iacopo Colonnelli and Marco Aldinucci},
editor = {Horacio González-Vélez Adriana E. Chis},
url = {https://hdl.handle.net/2318/1964571},
doi = {10.1109/PDP62718.2024.00010},
issn = {2377-5750},
year = {2024},
date = {2024-03-01},
booktitle = {Proc. of 32nd Euromicro intl. Conference on Parallel, Distributed and Network-based Processing (PDP)},
pages = {1-8},
publisher = {IEEE},
address = {Dublin, Ireland},
abstract = {Optimization problems are one of the main focus of scientific research. Their computational-intensive nature makes them prone to be parallelized with consistent improvements in performance. This paper sheds light on different parallel models for accelerating Karmarkar's Interior-point method. To do so, we assess parallelization strategies for individual operations within the aforementioned Karmarkar's algorithm using OpenMP, GPU acceleration with CUDA, and the recent Parallel Standard C++ Linear Algebra library (PSTL) executing both on GPU and CPU. Our different implementations yield interesting benchmark results that show the optimal approach for parallelizing interior point algorithms for general Linear Programming (LP) problems. In addition, we propose a more theoretical perspective of the parallelization of this algorithm, with a detailed study of our OpenMP implementation, showing the limits of optimizing the single operations},
keywords = {HPC, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Adriano Marques Garcia, Giulio Malenza, Robert Birke, Marco Aldinucci
Assessing Large Language Models Inference Performance on a 64-core RISC-V CPU with Silicon-Enabled Vectors Proceedings Article
In: Antelmi, Alessia, Carlini, Emanuele, Dazzi, Patrizio (Ed.): Proceedings of BigHPC2024: Special Track on Big Data and High-Performance Computing, co-located with the 3textsuperscriptrd Italian Conference on Big Data and Data Science, ITADATA2024, pp. 1-9, CEUR-WS.org, Pisa, Italy, 2024.
Abstract | Links | BibTeX | Tags: eupilot, icsc
@inproceedings{24:garcia:itadata,
title = {Assessing Large Language Models Inference Performance on a 64-core RISC-V CPU with Silicon-Enabled Vectors},
author = {Adriano Marques Garcia and Giulio Malenza and Robert Birke and Marco Aldinucci},
editor = {Alessia Antelmi and Emanuele Carlini and Patrizio Dazzi},
url = {https://iris.unito.it/retrieve/1540f675-5e88-4f57-95e7-df8e0fe5f1df/paper110.pdf},
year = {2024},
date = {2024-01-01},
booktitle = {Proceedings of BigHPC2024: Special Track on Big Data and High-Performance Computing, co-located with the 3textsuperscriptrd Italian Conference on Big Data and Data Science, ITADATA2024},
volume = {3785},
pages = {1-9},
publisher = {CEUR-WS.org},
address = {Pisa, Italy},
series = {CEUR Workshop Proceedings},
abstract = {The rising usage of compute-intensive AI applications with fast response time requirements, such as text generation using large language models, underscores the need for more efficient and versatile hardware solutions. This drives the exploration of emerging architectures like RISC-V, which has the potential to deliver strong performance within tight power constraints. The recent commercial release of processors with RISC-V Vector (RVV) silicon-enabled extensions further amplifies the significance of RISC-V architectures, offering enhanced capabilities for parallel processing and accelerating tasks critical to large language models and other AI applications. This work aims to evaluate the BERT and GPT-2 language models inference performance on the SOPHON SG2042 64-core RISC-V architecture with silicon-enabled RVV v0.7.1. We benchmarked the models with and without RVV, using OpenBLAS and BLIS as BLAS backends for PyTorch to enable vectorization. Enabling RVV in OpenBLAS improved the inference performance by up to 40% in some cases.},
keywords = {eupilot, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Lorenzo Brescia, Iacopo Colonnelli, Marco Aldinucci
Performance Analysis on DNA Alignment Workload with Intel SGX Multithreading Proceedings Article
In: Antelmi, Alessia, Carlini, Emanuele, Dazzi, Patrizio (Ed.): Proceedings of BigHPC2024: Special Track on Big Data and High-Performance Computing, co-located with the 3textsuperscriptrd Italian Conference on Big Data and Data Science, ITADATA2024, CEUR-WS.org, 2024.
Abstract | Links | BibTeX | Tags: confidential, icsc
@inproceedings{24:brescia:itadata,
title = {Performance Analysis on DNA Alignment Workload with Intel SGX Multithreading},
author = {Lorenzo Brescia and Iacopo Colonnelli and Marco Aldinucci},
editor = {Alessia Antelmi and Emanuele Carlini and Patrizio Dazzi},
url = {https://ceur-ws.org/Vol-3785/paper107.pdf},
year = {2024},
date = {2024-01-01},
booktitle = {Proceedings of BigHPC2024: Special Track on Big Data and High-Performance Computing, co-located with the 3textsuperscriptrd Italian Conference on Big Data and Data Science, ITADATA2024},
volume = {3785},
publisher = {CEUR-WS.org},
series = {CEUR Workshop Proceedings},
abstract = {Data confidentiality is a critical issue in the digital age, impacting interactions between users and public services and between scientific computing organizations and Cloud and HPC providers. Performance in parallel computing is essential, yet techniques for establishing Trusted Execution Environments (TEEs) to ensure privacy in remote environments often negatively impact execution time. This paper aims to analyze the performance of a parallel bioinformatics workload for DNA alignment (Bowtie2) executed within the confidential enclaves of Intel SGX processors. The results provide encouraging insights regarding the feasibility of using SGX-based TEEs for parallel computing on large datasets. The findings indicate that, under conditions of high parallelization and with twice as many threads, workloads executed within SGX enclaves perform, on average, 15% faster than non-confidential execution. This empirical demonstration supports the potential of SGX-based TEEs to effectively balance the need for privacy with the demands of high-performance computing.},
keywords = {confidential, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Sunwoo Kim, Soo Yong Lee, Yue Gao, Alessia Antelmi, Mirko Polato, Kijung Shin
A Survey on Hypergraph Neural Networks: An In-Depth and Step-By-Step Guide Proceedings Article
In: Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, pp. 6534–6544, Association for Computing Machinery, Barcelona, Spain, 2024, ISBN: 9798400704901.
Abstract | Links | BibTeX | Tags: ai, analytics, icsc
@inproceedings{Antelmi_KDD_2024,
title = {A Survey on Hypergraph Neural Networks: An In-Depth and Step-By-Step Guide},
author = {Sunwoo Kim and Soo Yong Lee and Yue Gao and Alessia Antelmi and Mirko Polato and Kijung Shin},
url = {https://doi.org/10.1145/3637528.3671457},
doi = {10.1145/3637528.3671457},
isbn = {9798400704901},
year = {2024},
date = {2024-01-01},
booktitle = {Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining},
pages = {6534–6544},
publisher = {Association for Computing Machinery},
address = {Barcelona, Spain},
series = {KDD '24},
abstract = {Higher-order interactions (HOIs) are ubiquitous in real-world complex systems and applications. Investigation of deep learning for HOIs, thus, has become a valuable agenda for the data mining and machine learning communities. As networks of HOIs are expressed mathematically as hypergraphs, hypergraph neural networks (HNNs) have emerged as a powerful tool for representation learning on hypergraphs. Given the emerging trend, we present the first survey dedicated to HNNs, with an in-depth and step-by-step guide. Broadly, the present survey overviews HNN architectures, training strategies, and applications. First, we break existing HNNs down into four design components: (i) input features, (ii) input structures, (iii) message-passing schemes, and (iv) training strategies. Second, we examine how HNNs address and learn HOIs with each of their components. Third, we overview the recent applications of HNNs in recommendation, bioinformatics and medical science, time series analysis, and computer vision. Lastly, we conclude with a discussion on limitations and future directions.},
keywords = {ai, analytics, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Iacopo Colonnelli, Robert Birke, Giulio Malenza, Gianluca Mittone, Alberto Mulone, Jeroen Galjaard, Lydia Y. Chen, Sanzio Bassini, Gabriella Scipione, Jan Martinovič, Vit Vondrák, Marco Aldinucci
Cross-Facility Federated Learning Journal Article
In: Procedia Computer Science, vol. 240, pp. 3–12, 2024, ISSN: 1877-0509.
Abstract | Links | BibTeX | Tags: icsc, space, streamflow
@article{24:eurohpc:xffl,
title = {Cross-Facility Federated Learning},
author = {Iacopo Colonnelli and Robert Birke and Giulio Malenza and Gianluca Mittone and Alberto Mulone and Jeroen Galjaard and Lydia Y. Chen and Sanzio Bassini and Gabriella Scipione and Jan Martinovič and Vit Vondrák and Marco Aldinucci},
url = {https://www.sciencedirect.com/science/article/pii/S1877050924016909},
doi = {10.1016/j.procs.2024.07.003},
issn = {1877-0509},
year = {2024},
date = {2024-01-01},
booktitle = {Proceedings of the First EuroHPC user day},
journal = {Procedia Computer Science},
volume = {240},
pages = {3–12},
publisher = {Elsevier},
address = {Bruxelles, Belgium},
abstract = {In a decade, AI frontier research transitioned from the researcher's workstation to thousands of high-end hardware-accelerated compute nodes. This rapid evolution shows no signs of slowing down in the foreseeable future. While top cloud providers may be able to keep pace with this growth rate, obtaining and efficiently exploiting computing resources at that scale is a daunting challenge for universities and SMEs. This work introduces the Cross-Facility Federated Learning (XFFL) framework to bridge this compute divide, extending the opportunity to efficiently exploit multiple independent data centres for extreme-scale deep learning tasks to data scientists and domain experts. XFFL relies on hybrid workflow abstractions to decouple tasks from environment-specific technicalities, reducing complexity and enhancing reusability. In addition, Federated Learning (FL) algorithms eliminate the need to move large amounts of data between different facilities, reducing time-to-solution and preserving data privacy. The XFFL approach is empirically evaluated by training a full LLaMAv2 7B instance on two facilities of the EuroHPC JU, showing how the increased computing power completely compensates for the additional overhead introduced by two data centres.},
keywords = {icsc, space, streamflow},
pubstate = {published},
tppubtype = {article}
}
Lorenzo Brescia, Marco Aldinucci
Secure Generic Remote Workflow Execution with TEEs Proceedings Article
In: Proc. of the 2nd Workshop on Workflows in Distributed Environments (WiDE), pp. 8-13, ACM, Athens, Greece, 2024.
Abstract | Links | BibTeX | Tags: confidential, icsc
@inproceedings{23:brescia:wide,
title = {Secure Generic Remote Workflow Execution with TEEs},
author = {Lorenzo Brescia and Marco Aldinucci},
doi = {10.1145/3642978.3652834},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. of the 2nd Workshop on Workflows in Distributed Environments (WiDE)},
pages = {8-13},
publisher = {ACM},
address = {Athens, Greece},
abstract = {In scientific environments, the frequent need to process substantial volumes of data poses a common challenge. Individuals tasked with executing these computations frequently encounter a deficit in local computational resources, leading them to opt for the facilities of a Cloud Service Provider (CSP) for data processing. However, the data subjected to these calculations may be subject to confidentiality constraints. This paper introduces a proof-of-concept framework that leverages Gramine LibOS and Intel SGX, enabling the protection of generic remote workflow computations through SGX enclaves as Trusted Execution Environments (TEEs). The framework entails the delineation of user and CSP behavior and has been implemented using Bash scripts. Furthermore, an infrastructure has been designed for the Data Center Attestation Primitives (DCAP) remote attestation mechanism, wherein the user gains trust in the proper instantiation of the enclave within the CSP. To assess the framework efficacy, it has been tested on two distinct workflows, one trivial and the other involving real-world bioinformatics applications for processing DNA data. The performance study revealed that the framework incurred an acceptable overhead, ranging from a factor of x1.4 to x1.8 compared to unsafe execution practice.},
howpublished = {Proceedings of the 2nd Workshop on Workflows in Distributed Environments},
keywords = {confidential, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Bruno Casella, Roberto Esposito, Antonio Sciarappa, Carlo Cavazzoni, Marco Aldinucci
Experimenting With Normalization Layers in Federated Learning on Non-IID Scenarios Journal Article
In: IEEE Access, vol. 12, pp. 47961-47971, 2024.
Links | BibTeX | Tags: epi, icsc
@article{24:casella:normalization,
title = {Experimenting With Normalization Layers in Federated Learning on Non-IID Scenarios},
author = {Bruno Casella and Roberto Esposito and Antonio Sciarappa and Carlo Cavazzoni and Marco Aldinucci},
doi = {10.1109/ACCESS.2024.3383783},
year = {2024},
date = {2024-01-01},
journal = {IEEE Access},
volume = {12},
pages = {47961-47971},
keywords = {epi, icsc},
pubstate = {published},
tppubtype = {article}
}
Daniele De Vinco, Alessia Antelmi, Carmine Spagnuolo, Luca Maria Aiello
Deciphering Conversational Networks: Stance Detection via Hypergraphs and LLMs Proceedings Article
In: Companion Publication of the 16th ACM Web Science Conference, pp. 3–4, Association for Computing Machinery, Stuttgart, Germany, 2024, ISBN: 9798400704536.
Abstract | Links | BibTeX | Tags: analytics, icsc
@inproceedings{Antelmi_WebSci_2024,
title = {Deciphering Conversational Networks: Stance Detection via Hypergraphs and LLMs},
author = {Daniele De Vinco and Alessia Antelmi and Carmine Spagnuolo and Luca Maria Aiello},
url = {https://doi.org/10.1145/3630744.3658418},
doi = {10.1145/3630744.3658418},
isbn = {9798400704536},
year = {2024},
date = {2024-01-01},
booktitle = {Companion Publication of the 16th ACM Web Science Conference},
pages = {3–4},
publisher = {Association for Computing Machinery},
address = {Stuttgart, Germany},
series = {Websci Companion '24},
abstract = {Understanding the structural and linguistic properties of conversational data in social media is crucial for extracting meaningful insights to understand opinion dynamics, (mis-)information spreading, and the evolution of harmful behavior. Current state-of-the-art mathematical frameworks, such as hypergraphs and linguistic tools, such as large language models (LLMs), offer robust methodologies for modeling high-order group interactions and unprecedented capabilities for dealing with natural language-related tasks. In this study, we propose an innovative approach that blends these worlds by abstracting conversational networks via hypergraphs and analyzing their dynamics through LLMs. Our aim is to enhance the stance detection task by incorporating the high-order interactions naturally embedded within a conversation, thereby enriching the contextual understanding of LLMs regarding the intricate human dynamics underlying social media data.},
keywords = {analytics, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Alessia Antelmi, Daniele De Vinco, Carmine Spagnuolo
HypergraphRepository: A Community-Driven and Interactive Hypernetwork Data Collection Proceedings Article
In: Dewar, Megan, Kamiński, Bogumił, Kaszyński, Daniel, Kraiński, Łukasz, Prałat, Paweł, Théberge, François, Wrzosek, Małgorzata (Ed.): Modelling and Mining Networks, pp. 159–173, Springer Nature Switzerland, Cham, 2024, ISBN: 978-3-031-59205-8.
Abstract | Links | BibTeX | Tags: analytics, icsc
@inproceedings{Antelmi_WAW_2024,
title = {HypergraphRepository: A Community-Driven and Interactive Hypernetwork Data Collection},
author = {Alessia Antelmi and Daniele De Vinco and Carmine Spagnuolo},
editor = {Megan Dewar and Bogumił Kamiński and Daniel Kaszyński and Łukasz Kraiński and Paweł Prałat and François Théberge and Małgorzata Wrzosek},
doi = {10.1007/978-3-031-59205-8_11},
isbn = {978-3-031-59205-8},
year = {2024},
date = {2024-01-01},
booktitle = {Modelling and Mining Networks},
pages = {159–173},
publisher = {Springer Nature Switzerland},
address = {Cham},
abstract = {Hypergraph research has been thriving over the past few years, with a growing interest in a plethora of domains. Despite this remarkable surge, the lack of a comprehensive platform for searching and downloading diverse and well-curated datasets poses a significant obstacle to the continued advancement of the field. This absence hinders the ability of researchers and practitioners to validate and benchmark their hypergraph algorithms and models effectively.},
keywords = {analytics, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Alessia Antelmi, Pasquale Caramante, Gennaro Cordasco, Giuseppe D'Ambrosio, Daniele De Vinco, Francesco Foglia, Luca Postiglione, Carmine Spagnuolo
Reliable and Efficient Agent-Based Modeling and Simulation Journal Article
In: Journal of Artificial Societies and Social Simulation, vol. 27, no. 2, pp. 4, 2024, ISSN: 1460-7425.
Abstract | Links | BibTeX | Tags: analytics, icsc
@article{Antelmi_JASSS_2024,
title = {Reliable and Efficient Agent-Based Modeling and Simulation},
author = {Alessia Antelmi and Pasquale Caramante and Gennaro Cordasco and Giuseppe D'Ambrosio and Daniele De Vinco and Francesco Foglia and Luca Postiglione and Carmine Spagnuolo},
url = {http://jasss.soc.surrey.ac.uk/27/2/4.html},
doi = {10.18564/jasss.5300},
issn = {1460-7425},
year = {2024},
date = {2024-01-01},
journal = {Journal of Artificial Societies and Social Simulation},
volume = {27},
number = {2},
pages = {4},
abstract = {Agent-based models represent a primary methodology to untangle and study complex systems. Over the last decade, the need for more elaborate computing-demanding models gave rise to many frameworks and tools to run ABM simulations. Current state-of-the-art ABM tools either focus on ease of use, performance, or a trade-off between these two elements. Still, efficiency-oriented solutions (required for both large and small-scale simulations) are vulnerable to memory flaws which could invalidate the experiment results. This work aims to merge efficiency, reliability, and safeness under an innovative ABM software framework based on the Rust programming language. Our framework, krABMaga, is an open-source library that offers a high-level environment by exploiting metaprogramming and expandable visualization features. We equipped our library with a dynamic simulation monitoring system and model exploration and optimization capabilities over parallel, distributed, and cloud architectures. After having presented the overall architecture and functionalities of krABMaga, we discuss a performance comparison of our framework against the mostly adopted ABM software and the scalability potential of our simulation engine on a model calibration experiment running over an AWS EC2 virtual cluster machine. All code and examples models are available on GitHub.},
keywords = {analytics, icsc},
pubstate = {published},
tppubtype = {article}
}
Bruno Casella, Walter Riviera, Marco Aldinucci, Gloria Menegaz
Protocol for training MERGE: A federated multi-input neural network for COVID-19 prognosis Journal Article
In: STAR Protocols, 2024, (https://prod-shared-star-protocols.s3.amazonaws.com/protocols/3225.pdf).
Abstract | Links | BibTeX | Tags: epi, icsc
@article{24:casella:starprotocol,
title = {Protocol for training MERGE: A federated multi-input neural network for COVID-19 prognosis},
author = {Bruno Casella and Walter Riviera and Marco Aldinucci and Gloria Menegaz},
url = {https://prod-shared-star-protocols.s3.amazonaws.com/protocols/3225.pdf},
doi = {10.1016/j.xpro.2023.102812},
year = {2024},
date = {2024-01-01},
journal = {STAR Protocols},
institution = {Computer Science Department, University of Torino},
abstract = {Federated learning is a cooperative learning approach that has emerged as an effective way to address privacy concerns. Here, we present a protocol for training MERGE: a federated multi-input neural network (NN) for COVID-19 prognosis. We describe steps for collecting and preprocessing datasets. We then detail the process of training a multi-input NN. This protocol can be adapted for use with datasets containing both image- and table-based input sources.},
note = {https://prod-shared-star-protocols.s3.amazonaws.com/protocols/3225.pdf},
keywords = {epi, icsc},
pubstate = {published},
tppubtype = {article}
}
2023
Alberto Riccardo Martinelli, Massimo Torquati, Marco Aldinucci, Iacopo Colonnelli, Barbara Cantalupo
CAPIO: a Middleware for Transparent I/O Streaming in Data-Intensive Workflows Proceedings Article
In: 2023 IEEE 30th International Conference on High Performance Computing, Data, and Analytics (HiPC), IEEE, Goa, India, 2023.
Abstract | Links | BibTeX | Tags: admire, capio, eupex, icsc
@inproceedings{23:hipc:capio,
title = {CAPIO: a Middleware for Transparent I/O Streaming in Data-Intensive Workflows},
author = {Alberto Riccardo Martinelli and Massimo Torquati and Marco Aldinucci and Iacopo Colonnelli and Barbara Cantalupo},
url = {https://iris.unito.it/retrieve/27380f37-0978-409e-a9d8-2b5e95a4bb85/CAPIO-HiPC23-preprint.pdf},
doi = {10.1109/HiPC58850.2023.00031},
year = {2023},
date = {2023-12-01},
booktitle = {2023 IEEE 30th International Conference on High Performance Computing, Data, and Analytics (HiPC)},
publisher = {IEEE},
address = {Goa, India},
abstract = {With the increasing amount of digital data available for analysis and simulation, the class of I/O-intensive HPC workflows is fated to quickly expand, further exacerbating the performance gap between computing, memory, and storage technologies. This paper introduces CAPIO (Cross-Application Programmable I/O), a middleware capable of injecting I/O streaming capabilities into file-based workflows, improving the computation-I/O overlap without the need to change the application code. The contribution is twofold: 1) at design time, a new I/O coordination language allows users to annotate workflow data dependencies with synchronization semantics; 2) at run time, a user-space middleware automatically and transparently to the user turns a workflow batch execution into a streaming execution according to the semantics expressed in the configuration file. CAPIO has been tested on synthetic benchmarks simulating typical workflow I/O patterns and two real-world workflows. Experiments show that CAPIO reduces the execution time by 10% to 66% for data-intensive workflows that use the file system as a communication medium.},
keywords = {admire, capio, eupex, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Marco Aldinucci, Elena Maria Baralis, Valeria Cardellini, Iacopo Colonnelli, Marco Danelutto, Sergio Decherchi, Giuseppe Di Modica, Luca Ferrucci, Marco Gribaudo, Francesco Iannone, Marco Lapegna, Doriana Medic, Giuseppa Muscianisi, Francesca Righetti, Eva Sciacca, Nicola Tonellotto, Mauro Tortonesi, Paolo Trunfio, Tullio Vardanega
A Systematic Mapping Study of Italian Research on Workflows Proceedings Article
In: Proceedings of the SC '23 Workshops of The International Conference on High Performance Computing, Network, Storage, and Analysis, SC-W 2023, pp. 2065–2076, ACM, Denver, CO, USA, 2023.
Abstract | Links | BibTeX | Tags: icsc, jupyter-workflow, streamflow
@inproceedings{WORKS2023,
title = {A Systematic Mapping Study of Italian Research on Workflows},
author = {Marco Aldinucci and Elena Maria Baralis and Valeria Cardellini and Iacopo Colonnelli and Marco Danelutto and Sergio Decherchi and Giuseppe Di Modica and Luca Ferrucci and Marco Gribaudo and Francesco Iannone and Marco Lapegna and Doriana Medic and Giuseppa Muscianisi and Francesca Righetti and Eva Sciacca and Nicola Tonellotto and Mauro Tortonesi and Paolo Trunfio and Tullio Vardanega},
url = {https://doi.org/10.1145/3624062.3624285},
doi = {10.1145/3624062.3624285},
year = {2023},
date = {2023-11-01},
booktitle = {Proceedings of the SC '23 Workshops of The International Conference on High Performance Computing, Network, Storage, and Analysis, SC-W 2023},
pages = {2065–2076},
publisher = {ACM},
address = {Denver, CO, USA},
abstract = {An entire ecosystem of methodologies and tools revolves around scientific workflow management. They cover crucial non-functional requirements that standard workflow models fail to target, such as interactive execution, energy efficiency, performance portability, Big Data management, and intelligent orchestration in the Computing Continuum. Characterizing and monitoring this ecosystem is crucial to develop an informed view of current and future research directions. This work conducts a systematic mapping study of the Italian workflow research community, collecting and analyzing 25 tools and 10 applications from several scientific domains in the context of the ``National Research Centre for HPC, Big Data, and Quantum Computing'' (ICSC). The study aims to outline the main current research directions and determine how they address the critical needs of modern scientific applications. The findings highlight a variegated research ecosystem of tools, with a prominent interest in advanced workflow orchestration and still immature but promising efforts toward energy efficiency.},
keywords = {icsc, jupyter-workflow, streamflow},
pubstate = {published},
tppubtype = {inproceedings}
}
Zilong Zhao, Robert Birke, Lydia Y. Chen
FCT-GAN: Enhancing Global Correlation of Table Synthesis via Fourier Transform Proceedings Article
In: 32nd ACM International Conference on Information and Knowledge Management (CIKM '23), ACM, Birmingham, United Kingdom, 2023.
Abstract | Links | BibTeX | Tags: icsc
@inproceedings{23:zhao:fctgan,
title = {FCT-GAN: Enhancing Global Correlation of Table Synthesis via Fourier Transform},
author = {Zilong Zhao and Robert Birke and Lydia Y. Chen},
url = {https://iris.unito.it/retrieve/966ba767-dbbd-41e1-b4e3-7ab7ba09303f/FCT-GAN.pdf},
doi = {10.1145/3583780.3615202},
year = {2023},
date = {2023-10-01},
booktitle = {32nd ACM International Conference on Information and Knowledge Management (CIKM '23)},
publisher = {ACM},
address = {Birmingham, United Kingdom},
abstract = {An alternative method for sharing knowledge while complying with strict data access regulations, such as the European General Data Protection Regulation (GDPR), is the emergence of synthetic tabular data. Mainstream table synthesizers utilize methodologies derived from Generative Adversarial Networks (GAN). Although several state-of-the-art (SOTA) tabular GAN algorithms inherit Convolutional Neural Network (CNN)-based architectures, which have proven effective for images, they tend to overlook two critical properties of tabular data: (i) the global correlation across columns, and (ii) the semantic invariance to the column order. Permuting columns in a table does not alter the semantic meaning of the data, but features extracted by CNNs can change significantly due to their limited convolution filter kernel size. To address the above problems, we propose FCT-GAN– the first conditional tabular GAN to adopt Fourier networks into table synthesis. FCT-GAN enhances permutation invariant GAN training by strengthening the learning of global correlations via Fourier layers. Extensive evaluation on benchmarks and real-world datasets show that FCT-GAN can synthesize tabular data with better (up to 27.8%) machine learning utility (i.e. a proxy of global correlations) and higher (up to 26.5%) statistical similarity to real data. FCT-GAN also has the least variation on synthetic data quality among 7 SOTA baselines on 3 different training-data column orders.},
keywords = {icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Samuele Fonio, Lorenzo Paletto, Mattia Cerrato, Dino Ienco, Roberto Esposito
Hierarchical priors for Hyperspherical Prototypical Networks Proceedings Article
In: 31th European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning, ESANN, Bruges, Belgium, 2023, (In print).
Abstract | Links | BibTeX | Tags: ai, icsc
@inproceedings{23:esann:fonio,
title = {Hierarchical priors for Hyperspherical Prototypical Networks},
author = {Samuele Fonio and Lorenzo Paletto and Mattia Cerrato and Dino Ienco and Roberto Esposito},
url = {https://www.esann.org/sites/default/files/proceedings/2023/ES2023-65.pdf},
year = {2023},
date = {2023-10-01},
booktitle = {31th European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning, ESANN},
address = {Bruges, Belgium},
abstract = {In this paper, we explore the usage of hierarchical priors to improve learning in contexts where the number of available examples is extremely low. Specifically, we consider a Prototype Learning setting where deep neural networks are used to embed data in hyperspherical geometries.In this scenario, we propose an innovative way to learn the prototypes by combining class separation and hierarchical information. In addition, we introduce a contrastive loss function capable of balancing the exploitation of prototypes through a prototype pruning mechanism. We compare the proposed method with state-of-the-art approaches on two public datasets.},
note = {In print},
keywords = {ai, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Samuele Fonio
Benchmarking Federated Learning Frameworks for Medical Imaging Tasks Proceedings Article
In: Foresti, G. L., Fusiello, A., Hancock, E. (Ed.): Image Analysis and Processing - ICIAP 2023 Workshops. ICIAP 2023, Springer, Cham, Udine, Italy, 2023, (In print).
Abstract | Links | BibTeX | Tags: ai, eupilot, icsc
@inproceedings{23:iciap:fedmed:ws:fonio,
title = {Benchmarking Federated Learning Frameworks for Medical Imaging Tasks},
author = {Samuele Fonio},
editor = {G. L. Foresti and A. Fusiello and E. Hancock},
url = {https://link.springer.com/chapter/10.1007/978-3-031-51026-7_20},
doi = {10.1007/978-3-031-51026-7_20},
year = {2023},
date = {2023-09-01},
booktitle = {Image Analysis and Processing - ICIAP 2023 Workshops. ICIAP 2023},
volume = {14366},
publisher = {Springer, Cham},
address = {Udine, Italy},
abstract = {This paper presents a comprehensive benchmarking study of various Federated Learning (FL) frameworks applied to the task of Medical Image Classification. The research specifically addresses the often neglected and complex aspects of scalability and usability in off-the-shelf FL frameworks. Through experimental validation using real case deployments, we provide empirical evidence of the performance and practical relevance of open source FL frameworks. Our findings contribute valuable insights for anyone interested in deploying a FL system, with a particular focus on the healthcare domain—an increasingly attractive field for FL applications.},
note = {In print},
keywords = {ai, eupilot, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Gianluca Mittone, Samuele Fonio
Benchmarking Federated Learning Scalability Proceedings Article
In: Proceedings of the 2nd Italian Conference on Big Data and Data Science, ITADATA 2023, September 11-13, 2023, CEUR, Naples, Italy, 2023.
Abstract | Links | BibTeX | Tags: eupilot, HPC, icsc
@inproceedings{23:itadata:extabstract:mittone:fonio,
title = {Benchmarking Federated Learning Scalability},
author = {Gianluca Mittone and Samuele Fonio},
url = {https://hdl.handle.net/2318/1933852},
year = {2023},
date = {2023-09-01},
booktitle = {Proceedings of the 2nd Italian Conference on Big Data and Data Science, ITADATA 2023, September 11-13, 2023},
publisher = {CEUR},
address = {Naples, Italy},
abstract = {Federated Learning (FL) is a widespread Machine Learning paradigm handling distributed Big Data. In this work, we demonstrate that different FL frameworks expose different scaling performances despite adopting the same technologies, highlighting the need for a more comprehensive study on the topic.},
keywords = {eupilot, HPC, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Chi Hong, Jiyue Huang, Robert Birke, Lydia Y. Chen
Exploring and Exploiting Data-Free Model Stealing Proceedings Article
In: European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases (ECML PKDD), Turin, Italy, 2023.
Abstract | Links | BibTeX | Tags: eupilot, icsc
@inproceedings{23:hong:datafree,
title = {Exploring and Exploiting Data-Free Model Stealing},
author = {Chi Hong and Jiyue Huang and Robert Birke and Lydia Y. Chen},
url = {https://iris.unito.it/retrieve/ce44dec6-12c9-443d-99e7-f1141e50aa3a/Data-free%20Model%20Stealing.pdf},
doi = {10.1007/978-3-031-43424-2_2},
year = {2023},
date = {2023-09-01},
booktitle = {European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases (ECML PKDD)},
address = {Turin, Italy},
abstract = {Deep machine learning models, e.g., image classifier, are increasingly deployed in the wild to provide services to users. Adversaries are shown capable of stealing the knowledge of these models by sending inference queries and then training substitute models based on query results. The availability and quality of adversarial query inputs are undoubtedly crucial in the stealing process. The recent prior art demonstrates the feasibility of replacing real data by exploring the synthetic adversarial queries, so called data-free attacks, under strong adversarial assumptions, i.e., the deployed classier returns not only class labels but also class probabilities. In this paper, we consider a general adversarial model and propose an effective data-free stealing algorithm, Tandem-GAN, which not only explores synthetic queries but also explicitly exploits the high quality ones. The core of TandemGAN is composed of (i) substitute model which imitates the target model through synthetic queries and their inferred labels; and (ii) a tandem generator consisting of two networks, Gx and Ge, which first explores the synthetic data space via Gx and then exploits high-quality examples via Ge to maximize the knowledge transfer from the target to the substitute model. Our results on four datasets show that the accuracy of our trained substitute model ranges between 96-67% of the target model and outperforms the existing state-of-the-art data-free model stealing approach by up to 2.5X.},
keywords = {eupilot, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Gianluca Mittone, Walter Riviera, Iacopo Colonnelli, Robert Birke, Marco Aldinucci
Model-Agnostic Federated Learning Proceedings Article
In: Euro-Par 2023: Parallel Processing, pp. 383–396, Springer, Limassol, Cyprus, 2023.
Abstract | Links | BibTeX | Tags: ai, confidential, eupilot, icsc, riscv
@inproceedings{23:mittone:mafl,
title = {Model-Agnostic Federated Learning},
author = {Gianluca Mittone and Walter Riviera and Iacopo Colonnelli and Robert Birke and Marco Aldinucci},
url = {https://doi.org/10.1007/978-3-031-39698-4_26},
doi = {10.1007/978-3-031-39698-4_26},
year = {2023},
date = {2023-08-01},
booktitle = {Euro-Par 2023: Parallel Processing},
volume = {14100},
pages = {383–396},
publisher = {Springer},
address = {Limassol, Cyprus},
institution = {Computer Science Department, University of Torino},
abstract = {Since its debut in 2016, Federated Learning (FL) has been tied to the inner workings of Deep Neural Networks (DNNs). On the one hand, this allowed its development and widespread use as DNNs proliferated. On the other hand, it neglected all those scenarios in which using DNNs is not possible or advantageous. The fact that most current FL frameworks only allow training DNNs reinforces this problem. To address the lack of FL solutions for non-DNN-based use cases, we propose MAFL (Model-Agnostic Federated Learning). MAFL marries a model-agnostic FL algorithm, AdaBoost.F, with an open industry-grade FL framework: Intel OpenFL. MAFL is the first FL system not tied to any specific type of machine learning model, allowing exploration of FL scenarios beyond DNNs and trees. We test MAFL from multiple points of view, assessing its correctness, flexibility and scaling properties up to 64 nodes. We optimised the base software achieving a 5.5x speedup on a standard FL scenario. MAFL is compatible with x86-64, ARM-v8, Power and RISC-V.},
keywords = {ai, confidential, eupilot, icsc, riscv},
pubstate = {published},
tppubtype = {inproceedings}
}
Iacopo Colonnelli, Robert Birke, Marco Aldinucci
Experimenting with PyTorch on RISC-V Proceedings Article
In: RISC-V Summit Europe 2023, Barcelona, Spain, 2023, (Poster).
Abstract | Links | BibTeX | Tags: eupilot, icsc, riscv
@inproceedings{23:risc-v-summit,
title = {Experimenting with PyTorch on RISC-V},
author = {Iacopo Colonnelli and Robert Birke and Marco Aldinucci},
url = {https://iris.unito.it/retrieve/429bf344-9090-42c3-809c-1b8ac320a930/2023-06-08-Iacopo-COLONNELLI-abstract.pdf},
year = {2023},
date = {2023-06-01},
booktitle = {RISC-V Summit Europe 2023},
address = {Barcelona, Spain},
abstract = {RISC-V is an emerging instruction set architecture. Its modular and extensible open-source royalty-free design is increasingly attracting interest from both research and industry. Nowadays, different RISC-V-based boards can be bought off the shelf. However, software availability is equivalently vital in guaranteeing the RISC-V ecosystem's success. Here we contribute with the first publicly available port of PyTorch. PyTorch is one of the most popular Deep Learning libraries available today. As such, it is a crucial enabler in running state-of-the-art AI applications on RISC-V-based systems and a first step towards a fully democratic end-to-end codesign process.},
note = {Poster},
keywords = {eupilot, icsc, riscv},
pubstate = {published},
tppubtype = {inproceedings}
}
Marco Aldinucci, Robert Birke, Antonio Brogi, Emanuele Carlini, Massimo Coppola, Marco Danelutto, Patrizio Dazzi, Luca Ferrucci, Forti Stefano, Hanna Kavalionak, Gabriele Mencagli, Matteo Mordacchin, Marcelo Pasin, Federica Paganelli, Massimo Torquati
A Proposal for a Continuum-aware Programming Model: From Workflows to Services Autonomously Interacting in the Compute Continuum Proceedings Article
In: 2023 IEEE 47th Annual Computers, Software, and Applications Conference (COMPSAC), IEEE, Turin, Italy, 2023.
Abstract | Links | BibTeX | Tags: icsc
@inproceedings{23:aldinucci:continuum,
title = {A Proposal for a Continuum-aware Programming Model: From Workflows to Services Autonomously Interacting in the Compute Continuum},
author = {Marco Aldinucci and Robert Birke and Antonio Brogi and Emanuele Carlini and Massimo Coppola and Marco Danelutto and Patrizio Dazzi and Luca Ferrucci and Forti Stefano and Hanna Kavalionak and Gabriele Mencagli and Matteo Mordacchin and Marcelo Pasin and Federica Paganelli and Massimo Torquati},
url = {https://iris.unito.it/retrieve/2ae13a33-5814-43da-8ea6-2d3e8b122384/Continuum-aware-PM.pdf},
doi = {10.1109/COMPSAC57700.2023.00287},
year = {2023},
date = {2023-06-01},
booktitle = {2023 IEEE 47th Annual Computers, Software, and Applications Conference (COMPSAC)},
publisher = {IEEE},
address = {Turin, Italy},
abstract = {This paper proposes a continuum-aware programming model enabling the execution of application workflows across the compute continuum: cloud, fog and edge resources. It simplifies the management of heterogeneous nodes while alleviating the burden of programmers and unleashing innovation. This model optimizes the continuum through advanced development experiences by transforming workflows into autonomous service collaborations. It reduces complexity in positioning/interconnecting services across the continuum. A meta-model introduces high-level workflow descriptions as service networks with defined contracts and quality of service, thus enabling the deployment/management of workflows as first-class entities. It also provides automation based on policies, monitoring and heuristics. Tailored mechanisms orchestrate/manage services across the continuum, optimizing performance, cost, data protection and sustainability while managing risks. This model facilitates incremental development with visibility of design impacts and seamless evolution of applications and infrastructures. In this work, we explore this new computing paradigm showing how it can trigger the development of a new generation of tools to support the compute continuum progress.},
keywords = {icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Gianluca Mittone, Nicolò Tonci, Robert Birke, Iacopo Colonnelli, Doriana Medić, Andrea Bartolini, Roberto Esposito, Emanuele Parisi, Francesco Beneventi, Mirko Polato, Massimo Torquati, Luca Benini, Marco Aldinucci
Experimenting with Emerging RISC-V Systems for Decentralised Machine Learning Proceedings Article
In: 20th ACM International Conference on Computing Frontiers (CF '23), ACM, Bologna, Italy, 2023, ISBN: 979-8-4007-0140-5/23/05, (https://arxiv.org/abs/2302.07946).
Abstract | Links | BibTeX | Tags: ai, confidential, eupilot, HPC, icsc, riscv
@inproceedings{23:mittone:fl-riscv,
title = {Experimenting with Emerging RISC-V Systems for Decentralised Machine Learning},
author = {Gianluca Mittone and Nicolò Tonci and Robert Birke and Iacopo Colonnelli and Doriana Medić and Andrea Bartolini and Roberto Esposito and Emanuele Parisi and Francesco Beneventi and Mirko Polato and Massimo Torquati and Luca Benini and Marco Aldinucci},
url = {https://dl.acm.org/doi/pdf/10.1145/3587135.3592211},
doi = {10.1145/3587135.3592211},
isbn = {979-8-4007-0140-5/23/05},
year = {2023},
date = {2023-05-01},
booktitle = {20th ACM International Conference on Computing Frontiers (CF '23)},
publisher = {ACM},
address = {Bologna, Italy},
institution = {Computer Science Department, University of Torino},
abstract = {Decentralised Machine Learning (DML) enables collaborative machine learning without centralised input data. Federated Learning (FL) and Edge Inference are examples of DML. While tools for DML (especially FL) are starting to flourish, many are not flexible and portable enough to experiment with novel systems (e.g., RISC-V), non-fully connected topologies, and asynchronous collaboration schemes. We overcome these limitations via a domain-specific language allowing to map DML schemes to an underlying middleware, i.e. the FastFlow parallel programming library. We experiment with it by generating different working DML schemes on two emerging architectures (ARM-v8, RISC-V) and the x86-64 platform. We characterise the performance and energy efficiency of the presented schemes and systems. As a byproduct, we introduce a RISC-V porting of the PyTorch framework, the first publicly available to our knowledge.},
note = {https://arxiv.org/abs/2302.07946},
keywords = {ai, confidential, eupilot, HPC, icsc, riscv},
pubstate = {published},
tppubtype = {inproceedings}
}
Gianluca Mittone, Filip Svoboda, Marco Aldinucci, Nicholas D. Lane, Pietro Lio
A Federated Learning Benchmark for Drug-Target Interaction Proceedings Article
In: Companion Proceedings of the ACM Web Conference 2023 (WWW '23 Companion), ACM, Austin, Texas, 2023, ISBN: 978-1-4503-9419-2/23/04, (https://arxiv.org/abs/2302.07684).
Abstract | Links | BibTeX | Tags: ai, confidential, eupilot, icsc
@inproceedings{23:mittone:dti,
title = {A Federated Learning Benchmark for Drug-Target Interaction},
author = {Gianluca Mittone and Filip Svoboda and Marco Aldinucci and Nicholas D. Lane and Pietro Lio},
url = {https://hdl.handle.net/2318/1898472},
doi = {10.1145/3543873.3587687},
isbn = {978-1-4503-9419-2/23/04},
year = {2023},
date = {2023-04-01},
booktitle = {Companion Proceedings of the ACM Web Conference 2023 (WWW '23 Companion)},
publisher = {ACM},
address = {Austin, Texas},
institution = {Computer Science Department, University of Torino},
abstract = {Aggregating pharmaceutical data in the drug-target interaction (DTI) domain has the potential to deliver life-saving breakthroughs. It is, however, notoriously difficult due to regulatory constraints and commercial interests. This work proposes the application of federated learning, which we argue to be reconcilable with the industry's constraints, as it does not require sharing of any information that would reveal the entities' data or any other high-level summary of it. When used on a representative GraphDTA model and the KIBA dataset it achieves up to 15 percent improved performance relative to the best available non-privacy preserving alternative. Our extensive battery of experiments shows that, unlike in other domains, the non-IID data distribution in the DTI datasets does not deteriorate FL performance. Additionally, we identify a material trade-off between the benefits of adding new data, and the cost of adding more clients.},
note = {https://arxiv.org/abs/2302.07684},
keywords = {ai, confidential, eupilot, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Bruno Casella, Roberto Esposito, Antonio Sciarappa, Carlo Cavazzoni, Marco Aldinucci
Experimenting with Normalization Layers in Federated Learning on non-IID scenarios Technical Report
Computer Science Department, University of Torino 2023.
Abstract | Links | BibTeX | Tags: confidential, epi, icsc
@techreport{23:casella:normalization,
title = {Experimenting with Normalization Layers in Federated Learning on non-IID scenarios},
author = {Bruno Casella and Roberto Esposito and Antonio Sciarappa and Carlo Cavazzoni and Marco Aldinucci},
url = {https://arxiv.org/pdf/2303.10630.pdf},
year = {2023},
date = {2023-01-01},
institution = {Computer Science Department, University of Torino},
abstract = {Training Deep Learning (DL) models require large, high-quality datasets, often assembled with data from different institutions. Federated Learning (FL) has been emerging as a method for privacy-preserving pooling of datasets employing collaborative training from different institutions by iteratively globally aggregating locally trained models. One critical performance challenge of FL is operating on datasets not independently and identically distributed (non-IID) among the federation participants. Even though this fragility cannot be eliminated, it can be debunked by a suitable optimization of two hyperparameters: layer normalization methods and collaboration frequency selection. In this work, we benchmark five different normalization layers for training Neural Networks (NNs), two families of non-IID data skew, and two datasets. Results show that Batch Normalization, widely employed for centralized DL, is not the best choice for FL, whereas Group and Layer Normalization consistently outperform Batch Normalization. Similarly, frequent model aggregation decreases convergence speed and mode quality.},
keywords = {confidential, epi, icsc},
pubstate = {published},
tppubtype = {techreport}
}
Alessia Antelmi, Luca La Cava, Arianna Pera
Tell Me Who You Are and I Will Predict Your Vulnerability to Political Persuasion Techniques Proceedings Article
In: The 12th International Conference on Complex Networks and their Applications-Book of Abstracts, 2023.
Abstract | Links | BibTeX | Tags: analytics, icsc
@inproceedings{Antelmi_CNA1_2023,
title = {Tell Me Who You Are and I Will Predict Your Vulnerability to Political Persuasion Techniques},
author = {Alessia Antelmi and Luca La Cava and Arianna Pera},
url = {https://iris.unito.it/bitstream/2318/1949370/1/_CNA__23__Personality_vs_propaganda.pdf},
year = {2023},
date = {2023-01-01},
booktitle = {The 12th International Conference on Complex Networks and their Applications-Book of Abstracts},
abstract = {Given the evolving role of social media in political communication and the strategic use of these platforms by politicians to shape public opinion, research has commonly focused on investigating computational propaganda as a means for automated information diffusion. Focusing on a less explored yet promising line, we aim to assess political persuasion in digital contexts by introducing a computational framework that combines Natural Language Processing and Network Science methods to investigate the linkage between persuasion techniques on social media and personality traits of online political audiences. Our final goal is to enhance public awareness of political tactics and encourage critical thinking in response to the online spread of political information.},
keywords = {analytics, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Alessia Antelmi, Luca La Cava, Arianna Pera
Finding Hidden Swingers in the 2022 Italian Elections Twitter Discourse Proceedings Article
In: The 12th International Conference on Complex Networks and their Applications-Book of Abstracts, 2023.
Abstract | Links | BibTeX | Tags: analytics, icsc
@inproceedings{Antelmi_CNA_2023,
title = {Finding Hidden Swingers in the 2022 Italian Elections Twitter Discourse},
author = {Alessia Antelmi and Luca La Cava and Arianna Pera},
url = {https://iris.unito.it/bitstream/2318/1949354/1/_CNA__23__TweetYourMind.pdf},
year = {2023},
date = {2023-01-01},
booktitle = {The 12th International Conference on Complex Networks and their Applications-Book of Abstracts},
abstract = {The volume of the Italian online political discourse on social media has recently increased, but the coverage level does not compare with other Countries such as the US. Nonetheless, researchers focused on studying polarization and homophily with respect to political debates or investigating the role of populism in online engagement. In this research landscape, the analysis of political preference shifts through social media remains to be explored. We aim to bridge this gap by examining the Twitter discourse during the 2022 Italian general elections, with a specific emphasis on political "swingers". In particular, our findings indicate a stable political discourse in Italy, yet they also uncover a growing presence of political swingers willing to shift their support to significantly different factions.},
keywords = {analytics, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Alessia Antelmi, Massimo Torquati, Daniele Gregori, Francesco Polzella, Gianmarco Spinatelli, Marco Aldinucci
The SWH-Analytics Framework Proceedings Article
In: Bena, Nicola, Martino, Beniamino Di, Maratea, Antonio, Sperduti, Alessandro, Nardo, Emanuel Di, Ciaramella, Angelo, Montella, Raffaele, Ardagna, Claudio A. (Ed.): Proceedings of the 2nd Italian Conference on Big Data and Data Science (ITADATA 2023), Naples, Italy, September 11-13, 2023, CEUR-WS.org, 2023.
Abstract | Links | BibTeX | Tags: admire, analytics, icsc
@inproceedings{Antelmi_ITADATA_2023,
title = {The SWH-Analytics Framework},
author = {Alessia Antelmi and Massimo Torquati and Daniele Gregori and Francesco Polzella and Gianmarco Spinatelli and Marco Aldinucci},
editor = {Nicola Bena and Beniamino Di Martino and Antonio Maratea and Alessandro Sperduti and Emanuel Di Nardo and Angelo Ciaramella and Raffaele Montella and Claudio A. Ardagna},
url = {https://ceur-ws.org/Vol-3606/paper76.pdf},
year = {2023},
date = {2023-01-01},
booktitle = {Proceedings of the 2nd Italian Conference on Big Data and Data Science (ITADATA 2023), Naples, Italy, September 11-13, 2023},
volume = {3606},
publisher = {CEUR-WS.org},
series = {CEUR Workshop Proceedings},
abstract = {The Software Heritage (SWH) dataset serves as a vast repository for open-source code, with the ambitious goal of preserving all publicly available open-source projects. Despite being designed to effectively archive project files, its size of nearly 1 petabyte presents challenges in efficiently supporting Big Data MapReduce or AI systems. To address this disparity and enable seamless custom analytics on the SWH dataset, we present the SWH-Analytics (SWHA) architecture. This development environment quickly and transparently runs custom analytic applications on open-source software data preserved over time by SWH.},
keywords = {admire, analytics, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Iacopo Colonnelli
Workflow Models for Heterogeneous Distributed Systems Proceedings Article
In: Bena, Nicola, Martino, Beniamino Di, Maratea, Antonio, Sperduti, Alessandro, Nardo, Emanuel Di, Ciaramella, Angelo, Montella, Raffaele, Ardagna, Claudio A. (Ed.): Proceedings of the 2nd Italian Conference on Big Data and Data Science (ITADATA 2023), Naples, Italy, September 11-13, 2023, CEUR-WS.org, 2023.
Abstract | Links | BibTeX | Tags: across, eupex, icsc, jupyter-workflow, streamflow
@inproceedings{23:colonnelli:itadata,
title = {Workflow Models for Heterogeneous Distributed Systems},
author = {Iacopo Colonnelli},
editor = {Nicola Bena and Beniamino Di Martino and Antonio Maratea and Alessandro Sperduti and Emanuel Di Nardo and Angelo Ciaramella and Raffaele Montella and Claudio A. Ardagna},
url = {https://ceur-ws.org/Vol-3606/invited77.pdf},
year = {2023},
date = {2023-01-01},
booktitle = {Proceedings of the 2nd Italian Conference on Big Data and Data Science (ITADATA 2023), Naples, Italy, September 11-13, 2023},
volume = {3606},
publisher = {CEUR-WS.org},
series = {CEUR Workshop Proceedings},
abstract = {This article introduces a novel hybrid workflow abstraction that injects topology awareness directly into the definition of a distributed workflow model. In particular, the article briefly discusses the advantages brought by this approach to the design and orchestration of large-scale data-oriented workflows, the current level of support from state-of-the-art workflow systems, and some future research directions.},
keywords = {across, eupex, icsc, jupyter-workflow, streamflow},
pubstate = {published},
tppubtype = {inproceedings}
}
Bruno Casella, Lorenzo Paletto
Predicting Cryptocurrencies Market Phases through On-Chain Data Long-Term Forecasting Proceedings Article
In: Proceedings of the 2023 IEEE International Conference on Blockchain and Cryptocurrency (ICBC), 1-5 May 2023, Dubai, 2023, (https://ieeexplore.ieee.org/document/10174989).
Abstract | Links | BibTeX | Tags: epi, icsc
@inproceedings{23:casella:onchain,
title = {Predicting Cryptocurrencies Market Phases through On-Chain Data Long-Term Forecasting},
author = {Bruno Casella and Lorenzo Paletto},
url = {https://iris.unito.it/bitstream/2318/1902652/1/6.%20ICBC23%20-%20PREDICTING%20BTC.pdf},
doi = {https://doi.org/10.1109/ICBC56567.2023.10174989},
year = {2023},
date = {2023-01-01},
booktitle = {Proceedings of the 2023 IEEE International Conference on Blockchain and Cryptocurrency (ICBC), 1-5 May 2023, Dubai},
abstract = {Blockchain, the underlying technology of Bitcoin and several other cryptocurrencies, like Ethereum, produces a massive amount of open-access data that can be analyzed, providing important information about the network's activity and its respective token. The on-chain data have extensively been used as input to Machine Learning algorithms for predicting cryptocurrencies' future prices; however, there is a lack of study in predicting the future behaviour of on-chain data. This study aims to show how on-chain data can be used to detect cryptocurrency market regimes, like minimum and maximum, bear and bull market phases, and how forecasting these data can provide an optimal asset allocation for long-term investors.},
note = {https://ieeexplore.ieee.org/document/10174989},
keywords = {epi, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Bruno Casella, Samuele Fonio
Architecture-Based FedAvg for Vertical Federated Learning Proceedings Article
In: Proceedings of the 3rd Workshop on Distributed Machine Learning for the Intelligent Computing Continuum (DML-ICC), IEEE/ACM UCC 2023, Taormina, Italy, 4 December 2023, 2023, (https://iris.unito.it/bitstream/2318/1949730/1/HALF_HVL_for_DML_ICC23___Taormina-2.pdf).
Abstract | Links | BibTeX | Tags: ai, epi, icsc
@inproceedings{23:casella:architecturalfedavg,
title = {Architecture-Based FedAvg for Vertical Federated Learning},
author = {Bruno Casella and Samuele Fonio},
url = {https://iris.unito.it/retrieve/173d9960-8531-419d-9bd5-5acce6694c4e/Aggregation%20Based%20VFL.pdf},
doi = {10.1145/3603166.3632559},
year = {2023},
date = {2023-01-01},
booktitle = {Proceedings of the 3rd Workshop on Distributed Machine Learning for the Intelligent Computing Continuum (DML-ICC), IEEE/ACM UCC 2023, Taormina, Italy, 4 December 2023},
abstract = {Federated Learning (FL) has emerged as a promising solution to address privacy concerns by collaboratively training Deep Learning (DL) models across distributed parties. This work proposes an architecture-based aggregation strategy in Vertical FL, where parties hold data with different attributes but shared instances. Our approach leverages the identical architectural parts, i.e. neural network layers, of different models to selectively aggregate weights, which is particularly relevant when collaborating with institutions holding different types of datasets, i.e., image, text, or tabular datasets. In a scenario where two entities train DL models, such as a Convolutional Neural Network (CNN) and a Multi-Layer Perceptron (MLP), our strategy computes the average only for architecturally identical segments. This preserves data-specific features learned from demographic and clinical data. We tested our approach on two clinical datasets, i.e., the COVID-CXR dataset and the ADNI study. Results show that our method achieves comparable results with the centralized scenario, in which all the data are collected in a single data lake, and benefits from FL generalizability. In particular, compared to the non-federated models, our proposed proof-of-concept model exhibits a slight performance loss on the COVID-CXR dataset (less than 8%), but outperforms ADNI models by up to 12%. Moreover, communication costs between training rounds are minimized by exchanging only the dense layer parameters.},
note = {https://iris.unito.it/bitstream/2318/1949730/1/HALF_HVL_for_DML_ICC23___Taormina-2.pdf},
keywords = {ai, epi, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Bruno Casella, Walter Riviera, Marco Aldinucci, Gloria Menegaz
MERGE: A model for multi-input biomedical federated learning Journal Article
In: Patterns, pp. 100856, 2023, ISSN: 2666-3899.
Abstract | Links | BibTeX | Tags: ai, epi, icsc
@article{23:fl:patterns,
title = {MERGE: A model for multi-input biomedical federated learning},
author = {Bruno Casella and Walter Riviera and Marco Aldinucci and Gloria Menegaz},
url = {https://www.sciencedirect.com/science/article/pii/S2666389923002404},
doi = {10.1016/j.patter.2023.100856},
issn = {2666-3899},
year = {2023},
date = {2023-01-01},
journal = {Patterns},
pages = {100856},
abstract = {Driven by the deep learning (DL) revolution, artificial intelligence (AI) has become a fundamental tool for many biomedical tasks, including analyzing and classifying diagnostic images. Imaging, however, is not the only source of information. Tabular data, such as personal and genomic data and blood test results, are routinely collected but rarely considered in DL pipelines. Nevertheless, DL requires large datasets that often must be pooled from different institutions, raising non-trivial privacy concerns. Federated learning (FL) is a cooperative learning paradigm that aims to address these issues by moving models instead of data across different institutions. Here, we present a federated multi-input architecture using images and tabular data as a methodology to enhance model performance while preserving data privacy. We evaluated it on two showcases: the prognosis of COVID-19 and patients' stratification in Alzheimer's disease, providing evidence of enhanced accuracy and F1 scores against single-input models and improved generalizability against non-federated models.},
keywords = {ai, epi, icsc},
pubstate = {published},
tppubtype = {article}
}
Pedro Ângelo, Viviana Bono, Mariangiola Dezani-Ciancaglini, Mário Florido
Gradual Guarantee for FJ with lambda-Expressions Proceedings Article
In: Tomb, Aaron (Ed.): Proceedings of the 25th ACM International Workshop on Formal Techniques for Java-like Programs, FTfJP 2023, Seattle, WA, USA, 18 July 2023, pp. 32–38, ACM, 2023.
Links | BibTeX | Tags: admire, icsc
@inproceedings{DBLP:conf/ftfjp/AngeloBDF23,
title = {Gradual Guarantee for FJ with lambda-Expressions},
author = {Pedro Ângelo and Viviana Bono and Mariangiola Dezani-Ciancaglini and Mário Florido},
editor = {Aaron Tomb},
url = {https://doi.org/10.1145/3605156.3606453},
doi = {10.1145/3605156.3606453},
year = {2023},
date = {2023-01-01},
booktitle = {Proceedings of the 25th ACM International Workshop on Formal Techniques for Java-like Programs, FTfJP 2023, Seattle, WA, USA, 18 July 2023},
pages = {32–38},
publisher = {ACM},
keywords = {admire, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
William Fornaciari, Federico Reghenzani, Federico Terraneo, Davide Baroffio, Cecilia Metra, Martin Omana, Josie E. Rodriguez Condia, Matteo Sonza Reorda, Robert Birke, Iacopo Colonnelli, Gianluca Mittone, Marco Aldinucci, Gabriele Mencagli, Francesco Iannone, Filippo Palombi, Giuseppe Zummo, Daniele Cesarini, Federico Tesser
RISC-V-based Platforms for HPC: Analyzing Non-functional Properties for Future HPC and Big-Data Clusters Proceedings Article
In: Embedded Computer Systems: Architectures, Modeling, and Simulation - 23rd International Conference, SAMOS 2023, Samos, Greece, 2023, (icsc).
Abstract | Links | BibTeX | Tags: icsc, riscv
@inproceedings{23:SAMOS,
title = {RISC-V-based Platforms for HPC: Analyzing Non-functional Properties for Future HPC and Big-Data Clusters},
author = {William Fornaciari and Federico Reghenzani and Federico Terraneo and Davide Baroffio and Cecilia Metra and Martin Omana and Josie E. Rodriguez Condia and Matteo Sonza Reorda and Robert Birke and Iacopo Colonnelli and Gianluca Mittone and Marco Aldinucci and Gabriele Mencagli and Francesco Iannone and Filippo Palombi and Giuseppe Zummo and Daniele Cesarini and Federico Tesser},
url = {https://iris.unito.it/retrieve/b627eab0-3aa1-4fd7-8685-f47c62c792b3/SAMOS_2023_CN_HPC_FL1.pdf},
doi = {10.1007/978-3-031-46077-7_26},
year = {2023},
date = {2023-01-01},
booktitle = {Embedded Computer Systems: Architectures, Modeling, and Simulation - 23rd International Conference, SAMOS 2023},
address = {Samos, Greece},
abstract = {High-PerformanceComputing(HPC)haveevolvedtobeused to perform simulations of systems where physical experimentation is pro- hibitively impractical, expensive, or dangerous. This paper provides a general overview and showcases the analysis of non-functional properties in RISC-V-based platforms for HPCs. In particular, our analyses target the evaluation of power and energy control, thermal management, and reliability assessment of promising systems, structures, and technologies devised for current and future generation of HPC machines. The main set of design methodologies and technologies developed within the activ- ities of the Future and HPC & Big Data spoke of the National Centre of HPC, Big Data and Quantum Computing project are described along with the description of the testbed for experimenting two-phase cooling approaches.},
note = {icsc},
keywords = {icsc, riscv},
pubstate = {published},
tppubtype = {inproceedings}
}
Alessia Antelmi, Daniele De Vinco, Gennaro Cordasco, Carmine Spagnuolo
Towards Unraveling Developers Communities in Stack Overflow and Reddit Proceedings Article
In: International Conference on Computational Social Science 2023, 2023.
Abstract | Links | BibTeX | Tags: analytics, icsc
@inproceedings{Antelmi_IC2S2_2023,
title = {Towards Unraveling Developers Communities in Stack Overflow and Reddit},
author = {Alessia Antelmi and Daniele De Vinco and Gennaro Cordasco and Carmine Spagnuolo},
url = {https://openreview.net/forum?id=WP5ZaAFP19},
year = {2023},
date = {2023-01-01},
booktitle = {International Conference on Computational Social Science 2023},
abstract = {This work investigates the developers' behavior and community formation around the twenty most popular programming languages. We examined two consecutive years of programming-related questions from Stack Overflow and Reddit, performing a longitudinal study on users' posting activity and their high-order interaction patterns abstracted via hypergraphs. Our analysis highlighted crucial differences in how these QA platforms are utilized by their users. In line with previous literature, it emphasized the constant decline of Stack Overflow in favor of more community-friendly platforms, such as Reddit, which has been growing rapidly lately.},
keywords = {analytics, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Alessia Antelmi
Engagement in Open Data Workshops: The dark side of remote settings Proceedings Article
In: Methodologies and Intelligent Systems for Technology Enhanced Learning, 12th International Conference, Springer International Publishing, Cham, 2023.
Abstract | Links | BibTeX | Tags: analytics, icsc
@inproceedings{Antelmi_TEL4FC_2023,
title = {Engagement in Open Data Workshops: The dark side of remote settings},
author = {Alessia Antelmi},
url = {https://link.springer.com/chapter/10.1007/978-3-031-42134-1_33},
year = {2023},
date = {2023-01-01},
booktitle = {Methodologies and Intelligent Systems for Technology Enhanced Learning, 12th International Conference},
publisher = {Springer International Publishing},
address = {Cham},
abstract = {The increasing availability of Open Data gives birth to a fertile field for interested stakeholders to create value out of them; however, limited technical expertise and poor awareness are crucial barriers to their exploitation. Because of these reasons, there is an urge for learners to acquire data and information literacy competencies, which are essential for 21st-century skills, and become familiar with available Open Data sources and their potential uses. To promote the dialogue around activities to boost recognition of Open Data and improve users' skills to work with them, we proposed a series of workshops to introduce Italian high school learners to searching for, authoring, and building effective communication based on Open Data. This article describes an ongoing activity and details its organization, reports preliminary results on learners' engagement, and discusses both challenges of the remote setting as well as promising learning outcomes.},
keywords = {analytics, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Doriana Medić, Marco Aldinucci
Towards formal model for location aware workflows Proceedings Article
In: Shahriar, Hossain, Teranishi, Yuuichi, Cuzzocrea, Alfredo, Sharmin, Moushumi, Towey, Dave, Majumder, A. K. M. Jahangir Alam, Kashiwazaki, Hiroki, Yang, Ji-Jiang, Takemoto, Michiharu, Sakib, Nazmus, Banno, Ryohei, Ahamed, Sheikh Iqbal (Ed.): 47th IEEE Annual Computers, Software, and Applications Conference, COMPSAC 2023, pp. 1864–1869, IEEE, Torino, Italy, 2023.
Abstract | Links | BibTeX | Tags: eupex, icsc, semantics
@inproceedings{23:medic:formal-model,
title = {Towards formal model for location aware workflows},
author = {Doriana Medić and Marco Aldinucci},
editor = {Hossain Shahriar and Yuuichi Teranishi and Alfredo Cuzzocrea and Moushumi Sharmin and Dave Towey and A. K. M. Jahangir Alam Majumder and Hiroki Kashiwazaki and Ji-Jiang Yang and Michiharu Takemoto and Nazmus Sakib and Ryohei Banno and Sheikh Iqbal Ahamed},
url = {https://iris.unito.it/retrieve/1f9f959c-cd88-4d9c-90ea-54f1c86a15bc/6210-medic.pdf},
doi = {10.1109/COMPSAC57700.2023.00289},
year = {2023},
date = {2023-01-01},
booktitle = {47th IEEE Annual Computers, Software, and Applications Conference, COMPSAC 2023},
pages = {1864–1869},
publisher = {IEEE},
address = {Torino, Italy},
abstract = {Designing complex applications and executing them on large-scale topologies of heterogeneous architectures is becoming increasingly crucial in many scientific domains. As a result, diverse workflow modelling paradigms are developed, most of them with no formalisation provided. In these circumstances, comparing two different models or switching from one system to the other becomes a hard nut to crack. This paper investigates the capability of process algebra to model a location aware workflow system. Distributed π-calculus is considered as the base of the formal model due to its ability to describe the communicating components that change their structure as an outcome of the communication. Later, it is discussed how the base model could be extended or modified to capture different features of location aware workflow system. The intention of this paper is to highlight the fact that due to its flexibility, π-calculus, could be a good candidate to represent the behavioural perspective of the workflow system.},
keywords = {eupex, icsc, semantics},
pubstate = {published},
tppubtype = {inproceedings}
}
Alberto Mulone, Sherine Awad, Davide Chiarugi, Marco Aldinucci
Porting the Variant Calling Pipeline for NGS data in cloud-HPC environment Proceedings Article
In: Shahriar, Hossain, Teranishi, Yuuichi, Cuzzocrea, Alfredo, Sharmin, Moushumi, Towey, Dave, Majumder, A. K. M. Jahangir Alam, Kashiwazaki, Hiroki, Yang, Ji-Jiang, Takemoto, Michiharu, Sakib, Nazmus, Banno, Ryohei, Ahamed, Sheikh Iqbal (Ed.): 47th IEEE Annual Computers, Software, and Applications Conference, COMPSAC 2023, pp. 1858–1863, IEEE, Torino, Italy, 2023.
Abstract | Links | BibTeX | Tags: across, icsc, streamflow
@inproceedings{23:mulone:wide:vcp,
title = {Porting the Variant Calling Pipeline for NGS data in cloud-HPC environment},
author = {Alberto Mulone and Sherine Awad and Davide Chiarugi and Marco Aldinucci},
editor = {Hossain Shahriar and Yuuichi Teranishi and Alfredo Cuzzocrea and Moushumi Sharmin and Dave Towey and A. K. M. Jahangir Alam Majumder and Hiroki Kashiwazaki and Ji-Jiang Yang and Michiharu Takemoto and Nazmus Sakib and Ryohei Banno and Sheikh Iqbal Ahamed},
url = {https://iris.unito.it/bitstream/2318/1919364/1/paper.pdf},
doi = {10.1109/COMPSAC57700.2023.00288},
year = {2023},
date = {2023-01-01},
booktitle = {47th IEEE Annual Computers, Software, and Applications Conference, COMPSAC 2023},
pages = {1858–1863},
publisher = {IEEE},
address = {Torino, Italy},
abstract = {In recent years we have understood the importance of analyzing and sequencing human genetic variation. A relevant aspect that emerged from the Covid-19 pandemic was the need to obtain results very quickly; this involved using High-Performance Computing (HPC) environments to execute the Next Generation Sequencing (NGS) pipeline. However, HPC is not always the most suitable environment for the entire execution of a pipeline, especially when it involves many heterogeneous tools. The ability to execute parts of the pipeline on different environments can lead to higher performance but also cheaper executions. This work shows the design and optimization process that led us to a state-of-the-art Variant Calling hybrid workflow based on the StreamFlow Workflow Management System (WfMS). We also compare StreamFlow with Snakemake, an established WfMS targeting HPC facilities, observing comparable performance on single environments and satisfactory improvements with a hybrid cloud-HPC configuration.},
howpublished = {47th IEEE Annual Computers, Software, and Applications Conference, COMPSAC 2023},
keywords = {across, icsc, streamflow},
pubstate = {published},
tppubtype = {inproceedings}
}
Talks
2024
Lorenzo Brescia
Secure workflow computation Miscellaneous
Presentation of previous works and future directions on securing workflows, 2024.
Links | BibTeX | Tags: confidential, icsc
@misc{24:brescia:swiss:talk,
title = {Secure workflow computation},
author = {Lorenzo Brescia},
url = {https://datacloud.di.unito.it/index.php/s/5nwZ2bi7by3twQB},
year = {2024},
date = {2024-11-01},
address = {Neuchatel, Switzerland},
howpublished = {Presentation of previous works and future directions on securing workflows},
keywords = {confidential, icsc},
pubstate = {published},
tppubtype = {misc}
}
Roberto Esposito Mirko Polato Samuele Fonio
FedHP: Federated Learning with Hyperspherical Prototypical Regularization Miscellaneous
32nd European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning, (ESANN), 2024.
Abstract | Links | BibTeX | Tags: ai, fl, icsc
@misc{24:esann:fedhp,
title = {FedHP: Federated Learning with Hyperspherical Prototypical Regularization},
author = {Roberto Esposito Mirko Polato Samuele Fonio},
url = {https://datacloud.di.unito.it/index.php/s/fKyKSSFQKT3LTxW},
year = {2024},
date = {2024-10-01},
address = {Bruges, Belgium},
abstract = {This paper introduces FedHP, an innovative algorithm that integrates federated learning, hyperspherical geometries, and prototype learning. Federated Learning (FL) has gained prominence as a privacy- preserving method for building robust models across distributed datasets. Traditionally, FL exchanges model parameters to maintain data privacy; however, in scenarios with expensive data communication, exchanging large neural network models becomes impractical. In such cases, prototype learning offers a viable solution by facilitating the exchange of only a few prototypes. Motivated by these considerations, our approach capitalizes on recent advancements in prototype learning, particularly the advantages offered by non-Euclidean geometries. In addition to presenting FedHP, we offer empirical evidence demonstrating its comparability to other state-of- the-art approaches while significantly reducing communication costs.},
howpublished = {32nd European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning, (ESANN)},
keywords = {ai, fl, icsc},
pubstate = {published},
tppubtype = {misc}
}
Adriano Marques Garcia
Assessing Large Language Models Inference Performance on a 64-core RISC-V CPU with Silicon-Enabled Vectors Miscellaneous
Proceedings of BigHPC2024: Special Track on Big Data and High-Performance Computing, co-located with the 3textsuperscriptrd Italian Conference on Big Data and Data Science, ITADATA2024, 2024.
Links | BibTeX | Tags: confidential, icsc
@misc{24:garcia:bigHPC:talk,
title = {Assessing Large Language Models Inference Performance on a 64-core RISC-V CPU with Silicon-Enabled Vectors},
author = {Adriano Marques Garcia},
url = {https://datacloud.di.unito.it/index.php/s/eN6Z62RQr2QsRYa},
year = {2024},
date = {2024-09-01},
address = {Pisa, Italy},
howpublished = {Proceedings of BigHPC2024: Special Track on Big Data and High-Performance Computing, co-located with the 3textsuperscriptrd Italian Conference on Big Data and Data Science, ITADATA2024},
keywords = {confidential, icsc},
pubstate = {published},
tppubtype = {misc}
}
Lorenzo Brescia
Performance Analysis on DNA Alignment Workload with Intel SGX Multithreading Miscellaneous
Proceedings of BigHPC2024: Special Track on Big Data and High-Performance Computing, co-located with the 3textsuperscriptrd Italian Conference on Big Data and Data Science, ITADATA2024, 2024.
Links | BibTeX | Tags: confidential, icsc
@misc{24:brescia:bigHPC:talk,
title = {Performance Analysis on DNA Alignment Workload with Intel SGX Multithreading},
author = {Lorenzo Brescia},
url = {https://datacloud.di.unito.it/index.php/s/wqgYiKpHBw5zbSa},
year = {2024},
date = {2024-09-01},
address = {Pisa, Italy},
howpublished = {Proceedings of BigHPC2024: Special Track on Big Data and High-Performance Computing, co-located with the 3textsuperscriptrd Italian Conference on Big Data and Data Science, ITADATA2024},
keywords = {confidential, icsc},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli
Scientific Workflows in the Heterogeneous Computing Era Miscellaneous
2024.
@misc{24:icolonne:ICSC,
title = {Scientific Workflows in the Heterogeneous Computing Era},
author = {Iacopo Colonnelli},
url = {https://datacloud.di.unito.it/index.php/s/CyxiWsDbdg6rbpQ},
year = {2024},
date = {2024-09-01},
address = {Roma, Italy},
keywords = {icsc},
pubstate = {published},
tppubtype = {misc}
}
Giulio Malenza
Exploiting C++ Parallel Algorithms through FastFlow Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: icsc
@misc{24:gmalenza:BigHPC2024,
title = {Exploiting C++ Parallel Algorithms through FastFlow},
author = {Giulio Malenza},
url = {https://datacloud.di.unito.it/index.php/s/GcpQ8cz9BRyM85B},
year = {2024},
date = {2024-09-01},
address = {Pisa, Italy},
abstract = {High-performance computing and artificial intelligent simulations necessitate the rapid processing of large quantities of data. To handle such data volumes efficiently, leveraging the parallelism inherent in algorithms is crucial. Consequently, parallel programming frameworks have been developed to fully exploit modern parallel architectures. Among these, C++ PSTL stands out for its user-friendliness, portability, and high performance.
In this study, we introduce a back-end for the PSTL implemented using the FastFlow parallel programming framework. We will evaluate correctness and performance of the back-end comparing results with other coming from traditional vendor-dependent back-ends like TBB and nvc++. Performance metrics are derived from running the LULESH application on both RISC-V and ARM architectures. Our results indicate that all three back-ends deliver comparable performance.},
keywords = {icsc},
pubstate = {published},
tppubtype = {misc}
}
In this study, we introduce a back-end for the PSTL implemented using the FastFlow parallel programming framework. We will evaluate correctness and performance of the back-end comparing results with other coming from traditional vendor-dependent back-ends like TBB and nvc++. Performance metrics are derived from running the LULESH application on both RISC-V and ARM architectures. Our results indicate that all three back-ends deliver comparable performance.
Giulio Malenza
Exploring energy consumption of AI frameworks on a 64-core RV64 Server CPU Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: ai, DYMAN, icsc
@misc{24:gmalenza:scihpcexa,
title = {Exploring energy consumption of AI frameworks on a 64-core RV64 Server CPU},
author = {Giulio Malenza},
url = {https://datacloud.di.unito.it/index.php/s/5aTdyzNB6n9CREq},
year = {2024},
date = {2024-09-01},
address = {Pisa, Italy},
abstract = {In today's era of rapid technological advancement, artificial intelligence (AI) applications require large-scale, high-performance, and data-intensive computations, leading to significant energy demands. Addressing this challenge necessitates a combined approach involving both hardware and software innovations. Hardware manufacturers are developing new, efficient, and specialized solutions, with the RISC-V architecture emerging as a prominent player due to its open, extensible, and energy-efficient instruction set architecture (ISA). Simultaneously, software developers are creating new algorithms and frameworks,
yet their energy efficiency often remains unclear.
In this study, we conduct a comprehensive benchmark analysis of machine learning (ML) applications on the 64-core SOPHON SG2042 RISC-V architecture. Specifically, we examine the energy consumption of deep learning inference models across various AI frameworks. By comparing the performance of different frameworks, we aim to provide a detailed understanding of how these frameworks can optimize energy consumption on this architecture.},
keywords = {ai, DYMAN, icsc},
pubstate = {published},
tppubtype = {misc}
}
yet their energy efficiency often remains unclear.
In this study, we conduct a comprehensive benchmark analysis of machine learning (ML) applications on the 64-core SOPHON SG2042 RISC-V architecture. Specifically, we examine the energy consumption of deep learning inference models across various AI frameworks. By comparing the performance of different frameworks, we aim to provide a detailed understanding of how these frameworks can optimize energy consumption on this architecture.
Marco Edoardo Santimaria, Iacopo Colonnelli, Marco Aldinucci
Releasing the CAPIO middleware from MPI derived constraints Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: across, admire, capio, capiocl, eupex, icsc
@misc{24:santimaria:bighpc,
title = {Releasing the CAPIO middleware from MPI derived constraints},
author = {Marco Edoardo Santimaria and Iacopo Colonnelli and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/zrJGD4i36fWdp5g},
year = {2024},
date = {2024-09-01},
address = {Pisa, Italy},
abstract = {CAPIO is a middleware that transparently injects streaming capabilities into file-based workflows. However, its implementation is limited to HPC environments based on the MPI framework, significantly limiting its applications. This paper will illustrate a proposed architecture and some preliminary results aimed at investigating the usage of a distributed files system as a communication media for the CAPIO middleware, with the ultimate goal of supporting both CLOUD-based and HPC-based workflows.},
keywords = {across, admire, capio, capiocl, eupex, icsc},
pubstate = {published},
tppubtype = {misc}
}
Marco Edoardo Santimaria, Iacopo Colonnelli, Massimo Torquati, Marco Aldinucci
CAPIO: Cross Application Programamble IO Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: across, admire, capio, capiocl, eupex, icsc
@misc{24:santimaria:itadata:shpcpee,
title = {CAPIO: Cross Application Programamble IO},
author = {Marco Edoardo Santimaria and Iacopo Colonnelli and Massimo Torquati and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/rg6LWwrZXi6tTXm},
year = {2024},
date = {2024-09-01},
address = {Pisa, Italy},
abstract = {With the increasing amount of digital data available for analysis and simulation, the class of I/O-intensive HPC workflows is fated to expand, further exacerbating quickly the performance gap between computing, memory, and storage technologies. CAPIO (Cross-Application Programmable I/O), is a middleware capable of injecting I/O streaming capabilities into file-based workflows, improving the computation-I/O overlap without the need to change the application code. In this presentation, we will introduce the CAPIO-CL language with its semantics, as well as the implementation of the CAPIO-CL language through the CAPIO middleware. We will also provide some case studies of how CAPIO has been employed to improve workflow execution time as well as some future directions.},
keywords = {across, admire, capio, capiocl, eupex, icsc},
pubstate = {published},
tppubtype = {misc}
}
Gianluca Mittone
Benchmarking HPC Performance for State-of-the-Art AI Workloads Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: ai, eupilot, icsc
@misc{24:mittone:itadata:shpcpee,
title = {Benchmarking HPC Performance for State-of-the-Art AI Workloads},
author = {Gianluca Mittone},
url = {https://datacloud.di.unito.it/index.php/s/5Ep3W7cPW5baZfr},
year = {2024},
date = {2024-09-01},
address = {Pisa, Italy},
abstract = {Benchmarking the performance of modern High-Performance Computing (HPC) infrastructure on Artificial Intelligence (AI) workloads is a hot topic in the supercomputing community. While research communities and big-tech companies actively invest in larger, more powerful data centres to support AI research, the standard computational performance benchmarking tools (e.g., LINPACK) are increasingly becoming outdated since they are not specifically tailored for AI workloads. Some tools, such as MLPerf, are trying to bridge this gap, but the HPC community still has not adopted them as standards. Since this trend became particularly evident with the advent of Large Language Models (LLMs), this work will delve into LLM training at scale as a way to benchmark Top500 HPC infrastructures on current AI workloads. The scalability performances of a major LLM model (i.e., Meta's LLaMA) on different HPCs (Leonardo, LUMI, MeluXina, Karolina) are exposed and discussed along with their Top500 positioning.
However, it should be noted that state-of-the-art LLM models are not trained on thousands of computing nodes but on hundreds. This choice is due to multiple factors, such as the influence of the training scaling on the model's convergence and the instability of large-scale deployments due to hardware/software failure. A benchmarking approach based on the next-generation LLM training approach is proposed to bypass all these issues. State-of-the-art LLMs are not monolithic structures but Mixture-of-Experts (MoE) models; this design implies innovative frontiers for the distributed training of such models due to the experts' training being potentially more parallelisable than a single monolithic model. We thus propose to create an AI-oriented HPC benchmark suite based on the parallel training of MoE models to measure the throughput performance of HPC systems on state-of-the-art AI workloads.},
keywords = {ai, eupilot, icsc},
pubstate = {published},
tppubtype = {misc}
}
However, it should be noted that state-of-the-art LLM models are not trained on thousands of computing nodes but on hundreds. This choice is due to multiple factors, such as the influence of the training scaling on the model's convergence and the instability of large-scale deployments due to hardware/software failure. A benchmarking approach based on the next-generation LLM training approach is proposed to bypass all these issues. State-of-the-art LLMs are not monolithic structures but Mixture-of-Experts (MoE) models; this design implies innovative frontiers for the distributed training of such models due to the experts' training being potentially more parallelisable than a single monolithic model. We thus propose to create an AI-oriented HPC benchmark suite based on the parallel training of MoE models to measure the throughput performance of HPC systems on state-of-the-art AI workloads.
Gianluca Mittone
Pushing Federated Learning Boundaries: Three Innovative Distributed Intelligence Approaches Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: eupilot, fl, icsc
@misc{24:mittone:bighpc,
title = {Pushing Federated Learning Boundaries: Three Innovative Distributed Intelligence Approaches},
author = {Gianluca Mittone},
url = {https://datacloud.di.unito.it/index.php/s/eKbRtSAEdmSFJYW},
year = {2024},
date = {2024-09-01},
address = {Pisa, Italy},
abstract = {Federated learning is a distributed, privacy-preserving machine learning technique used on private, decentralised data. It allows multiple parties to cooperatively solve a common machine learning problem without sharing the local data. Three assumptions of state-of-the-art federated learning software constitute the starting points for this research work: 1) their inner workings being strictly tied to deep learning models, 2) the centralised structure currently implemented by many commercial frameworks, and 3) their assumption of being deployed on private, specialised computing infrastructures. The proposed research expands the federated learning paradigm to handle scenarios in which these three conditions do not hold. Such research problems are addressed methodologically and practically, and three open-source, proof-of-concept software are made freely available as tangible research results: OpenFL-x, FastFL, and xFFL.},
keywords = {eupilot, fl, icsc},
pubstate = {published},
tppubtype = {misc}
}
Samuele Fonio Bruno Casella Oussama Harrak
Federated Adaboost for Survival Analysis Miscellaneous
European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases, 2nd Workshop on Advancements in Federated Learning (WAFL), 2024.
Abstract | Links | BibTeX | Tags: ai, epi, fl, icsc
@misc{23:ecmlpkdd:fedsurvboost,
title = {Federated Adaboost for Survival Analysis},
author = {Samuele Fonio Bruno Casella Oussama Harrak},
url = {https://datacloud.di.unito.it/index.php/s/DtXiQfne6BEC235},
year = {2024},
date = {2024-09-01},
address = {Vilnius, Lithuania},
abstract = {This work proposes FedSurvBoost, a federated learning pipeline for survival analysis based on the AdaBoost.F algorithm, which iteratively aggregates the best local weak hypotheses. Our method extends AdaBoost.F by removing the dependence on the number of classes coefficient from the computation of the weights of the best model. This makes it suitable for regression tasks, such as survival analysis. We show the effectiveness of our approach by comparing it with state-of-the-art methods, specifically developed for survival analysis problems, on two common survival datasets.},
howpublished = {European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases, 2nd Workshop on Advancements in Federated Learning (WAFL)},
keywords = {ai, epi, fl, icsc},
pubstate = {published},
tppubtype = {misc}
}
Alberto Mulone, Doriana Medić, Marco Aldinucci
A Fault Tolerance mechanism for Hybrid Scientific Workflows Miscellaneous
1st workshop about High-Performance e-Science (HiPES), 2024.
Abstract | Links | BibTeX | Tags: eupex, icsc, streamflow
@misc{24:madrid:hipes:talk,
title = {A Fault Tolerance mechanism for Hybrid Scientific Workflows},
author = {Alberto Mulone and Doriana Medić and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/9Ddj6fGgmDbLDXj},
year = {2024},
date = {2024-08-01},
address = {Madrid, Spain},
abstract = {In large distributed systems, failures are a daily event occurring frequently, especially with growing numbers of computation tasks and locations on which they are deployed. The advantage of representing an application as a workflow is possibility to utilize the Workflow Management Systems which are reliable systems guaranteeing the correct execution of the application and providing the features such as portability, scalability, and fault tolerance. Over recent years, the emergence of hybrid workflows has posed new and intriguing challenges by increasing the possibility of distributing computations involving heterogeneous and independent environments. As a consequence, the number of possible points of failure in the execution augmented, creating different important challenges interesting to study.},
howpublished = {1st workshop about High-Performance e-Science (HiPES)},
keywords = {eupex, icsc, streamflow},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli
Scientific Workflows in the Continuum Era Miscellaneous
2024, (Keynote Talk).
Abstract | Links | BibTeX | Tags: icsc
@misc{24:icolonne:wscc,
title = {Scientific Workflows in the Continuum Era},
author = {Iacopo Colonnelli},
url = {https://datacloud.di.unito.it/index.php/s/PkqYA3p38XLKgrt},
year = {2024},
date = {2024-08-01},
address = {Madrid, Spain},
abstract = {Thanks to their generality, workflow models represent a powerful abstraction for designing complex applications and executing them on large-scale distributed architectures. However, several additional challenges appear when transitioning from cloud/HPC environments to the entire compute continuum. Continuum execution environments are fully distributed and modular, and modules can be heterogeneous and independent of each other. In addition, continuum workflows often rely on multiple intercommunicating agents that form complex micro-services architectures. Different agents deal with different communication and parallelization paradigms: network-based stream processing at the edge and file-based batch processing on HPC facilities. Finally, support for efficient interactive workflows in the continuum remains an open research problem. This talk explores these challenges and provides insights on how to deal with them. A ready-to-use software library accompanies each proposed solution to facilitate the reproducibility and reusability of the presented concepts.},
note = {Keynote Talk},
keywords = {icsc},
pubstate = {published},
tppubtype = {misc}
}
Giulio Malenza
Preliminary analysis of model parallelism applications on a 64-core RV64 Server CPU Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: eupilot, icsc
@misc{24:gmalenza:hlpp:MPRISC-v,
title = {Preliminary analysis of model parallelism applications on a 64-core RV64 Server CPU},
author = {Giulio Malenza},
url = {https://datacloud.di.unito.it/index.php/s/JrWwKALeaFEJSQo},
year = {2024},
date = {2024-07-01},
address = {Pisa, Italy},
abstract = {Massive Data Parallel workloads, driven by inference on large ML models, are pushing hardware vendors to develop efficient and cost-effective multi-core server CPUs. The RISC-V architecture plays a prominent role due to its open, extensible and energy-friendly ISA. Despite significant progress in recent years, finding efficient methods to run parallel applications on new architectures to harness their maximum performance fully remains a challenge. In this study, we benchmark the inference of machine learning models on the SOPHON SG2042 SoC, the first server-grade CPU based on the RV64 ISA, composed of 64 cores arranged in a grid of 16 groups of 4 cores. Specifically, we aim to enhance performance via better cache hit ratios stemming from model parallelism to split and assign parts of the model to specific (groups of) cores using a pipeline execution. We orchestrate execution using FastFlow, a low-level programming framework designed for multithreaded streaming applications. By comparing the results against the standard multi-core inference and analyzing the effects of different submodel-to-core mapping strategies, we aim to provide a comprehensive understanding of how the model parallel approach can maximize efficiency and utilization of hardware resources.},
keywords = {eupilot, icsc},
pubstate = {published},
tppubtype = {misc}
}
Marco Edoardo Santimaria
CAPIO-CL: Cross Application Programmable IO - Coordination Language Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: across, admire, capio, eupex, icsc
@misc{24:santimaria:hlpp:capiocl,
title = {CAPIO-CL: Cross Application Programmable IO - Coordination Language},
author = {Marco Edoardo Santimaria},
url = {https://datacloud.di.unito.it/index.php/s/zsKY3PWzX5NFCiX},
year = {2024},
date = {2024-07-01},
address = {Pisa, Italy},
abstract = {The performance bottleneck in file-based workflows remains a pressing issue in the realm of I/O-based workflows. To address this challenge, a novel annotation language has been developed. CAPIO-CL is positioned as an innovative I/O coordination language, enabling users to annotate data dependencies within file-based workflows with synchronization semantics pertinent to the involved files and directories. Through the information provided by the language, optimization opportunities arise in streaming and preemptive data movement. This paper serves to illustrate the semantics and syntax enabling CAPIO-CL to enhance the performance of in situ workflows without necessitating the rewriting or modification of the original workflow application steps. Finally, an analysis of CAPIO-CL is provided, taking into consideration both language expressiveness and application performance enhancement.},
keywords = {across, admire, capio, eupex, icsc},
pubstate = {published},
tppubtype = {misc}
}
Lorenzo Brescia
Towards Secure WMS with TEEs Miscellaneous
Mentoring session: HPC Summer School, university of Trento, 2024.
Links | BibTeX | Tags: confidential, icsc
@misc{24:brescia:hpc:school:talk,
title = {Towards Secure WMS with TEEs},
author = {Lorenzo Brescia},
url = {https://datacloud.di.unito.it/index.php/s/eAxEgqiTsGSRQz4},
year = {2024},
date = {2024-06-01},
address = {Trento, Italy},
howpublished = {Mentoring session: HPC Summer School, university of Trento},
keywords = {confidential, icsc},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli, Robert Birke, Giulio Malenza, Gianluca Mittone, Alberto Mulone, Marco Aldinucci
Cross-Facility Federated Learning - Part II Miscellaneous
2024, (Invited talk).
Links | BibTeX | Tags: eupex, icsc, space
@misc{24:ic:elise:xffl,
title = {Cross-Facility Federated Learning - Part II},
author = {Iacopo Colonnelli and Robert Birke and Giulio Malenza and Gianluca Mittone and Alberto Mulone and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/7HonBpcWPxotXLX},
year = {2024},
date = {2024-06-01},
address = {Helsinki, Finland},
note = {Invited talk},
keywords = {eupex, icsc, space},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
From HPC4AI to Software & Integration living lab to innovation Miscellaneous
HPC as an enabling platform for AI event, 2024.
Abstract | Links | BibTeX | Tags: ai, HPC, icsc
@misc{24:ma:hpcai:talk,
title = {From HPC4AI to Software & Integration living lab to innovation},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/3SS3Xa9XorN6D9o},
year = {2024},
date = {2024-06-01},
address = {Torino, Italy},
abstract = {The talk presents the motivation and the activity of the "Software and Integration" lab at UNITO.},
howpublished = {HPC as an enabling platform for AI event},
keywords = {ai, HPC, icsc},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
Software & Integration lab of FutureHPC spoke Miscellaneous
HPC as an enabling platform for AI event, 2024.
Abstract | Links | BibTeX | Tags: ai, HPC, icsc
@misc{24:ma:swi:lab,
title = {Software & Integration lab of FutureHPC spoke},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/WDjyXCGyYFJDQSd},
year = {2024},
date = {2024-06-01},
address = {Torino, Italy},
abstract = {The presentation describes the main the activity of the "Software and Integration" lab at UNITO across its main flagship codes.},
howpublished = {HPC as an enabling platform for AI event},
keywords = {ai, HPC, icsc},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli
Dynamic hybrid workflows for Deep Learning on HPC infrastructure Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: icsc, jupyter-workflow, streamflow
@misc{24:icolonne:ictp,
title = {Dynamic hybrid workflows for Deep Learning on HPC infrastructure},
author = {Iacopo Colonnelli},
url = {https://datacloud.di.unito.it/index.php/s/EaFHJEKNbW5oXeq},
year = {2024},
date = {2024-05-01},
address = {Trieste, Italy},
abstract = {Hybrid workflow abstractions allow users to quickly design and orchestrate cross-facility workloads, decoupling tasks from environment-specific technical details to reduce complexity and increase reusability. Plus, workflow descriptions help ensure the reproducibility of scientific experiments through prospective and retrospective provenance collection. This module has been designed to provide a hands-on exploration of scientific workflows from various angles, from the initial design phase to their orchestration at extreme scales. We will use the practical example of the CommonWorkflow Language (CWL) open standard to demonstrate how workflows can be written, and the StreamFlow workflow system to execute them seamlessly on the CINECA HPC facility. We will also delve into the integration between scientific workflows and Jupyter Notebooks, which aims to give data scientists a familiar interface to scientific workflows. In this module, students will gain a comprehensive understanding of scientific workflows. They will learn how to use these workflows to model and orchestrate Machine Learning and Deep Learning pipelines. Additionally, they will explore how modern workflow management systems can efficiently scale data-oriented workloads from a researcher’s laptop to an entire HPC facility.},
keywords = {icsc, jupyter-workflow, streamflow},
pubstate = {published},
tppubtype = {misc}
}
Gianluca Mittone
Into to Federated Learning Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: fl, icsc
@misc{24:mittone:ictp,
title = {Into to Federated Learning},
author = {Gianluca Mittone},
url = {https://datacloud.di.unito.it/index.php/s/nSwwmedjqe2jbWJ},
year = {2024},
date = {2024-05-01},
address = {Trieste, Italy},
abstract = {Machine Learning (ML) is the branch of Artificial Intelligence focused on developing algorithms capable of adapting and improving their predictive or generative performance by feeding on data. Adapting or improving the system’s behaviour based on the provided data is called learning since it is similar to the human learning process in many aspects. The same ML algorithm, usually referred to as a model, trained on different data will thus expose different capabilities and can, therefore, solve different tasks. FL is a relatively recent distributed ML methodology aiming to bridge the gap between the need to train ever bigger ML models on ever larger datasets and the individual and companies’ will to protect and not share their private data. From another point of view, FL is also a way to distribute the training of an ML model even more than before. However, it should be considered that the learning performance of FL is usually lower than that of traditional centralised learning. This course will start from Kairouz ad McMahan’s definition of FL: ”Federated learning is a machine learning setting where multiple entities (clients) collaborate in solving a machine learning problem, under the coordination of a central server or service provider. Each client’s raw data is stored locally and not exchanged or transferred; instead, focused updates intended for immediate aggregation are used to achieve the learning objective.” From this starting point, the most significant aspects of FL will be exposed and discussed. This tutorial will particularly explore FL from both the learning and computational [5] performance perspectives, investigating its pros and cons in a distributed ML setting. Since FL natively targets data privacy, some insights on how the FL process can be attacked and protected will also be discussed from a high-level perspective. Finally, a hands-on session will guide the participants in building a basic FL system, providing a better understanding of the major implementational difficulties of such a technique.},
keywords = {fl, icsc},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli
CWL Working Groups Miscellaneous
2024 CWL Conference, 2024.
Abstract | Links | BibTeX | Tags: icsc
@misc{24:icolonne:cwlcon2024,
title = {CWL Working Groups},
author = {Iacopo Colonnelli},
url = {https://datacloud.di.unito.it/index.php/s/zZDKdL8deLd4jSi},
year = {2024},
date = {2024-05-01},
address = {Amsterdam, Netherlands},
abstract = {This presentation introduces the new CWL Working Groups initiative, describing what a Working Group actually is, which Working Groups already exist in the CWL community, and how anybody can create a new officially recognized Working Group. Then, the presentation will explore the CWL4HPC Working Group, using it as an example of how a CWL Working Group can actually work.},
howpublished = {2024 CWL Conference},
keywords = {icsc},
pubstate = {published},
tppubtype = {misc}
}
Lorenzo Brescia
Secure Generic Remote Workflow Execution with TEEs Miscellaneous
Proceedings of the 2nd Workshop on Workflows in Distributed Environments, 2024.
Abstract | Links | BibTeX | Tags: confidential, icsc
@misc{23:brescia:wide:talk,
title = {Secure Generic Remote Workflow Execution with TEEs},
author = {Lorenzo Brescia},
url = {https://datacloud.di.unito.it/index.php/s/Prxq6EWGbcN8sWx},
year = {2024},
date = {2024-04-01},
address = {Athens, Greece},
abstract = {In scientific environments, the frequent need to process substantial volumes of data poses a common challenge. Individuals tasked with executing these computations frequently encounter a deficit in local computational resources, leading them to opt for the facilities of a Cloud Service Provider (CSP) for data processing. However, the data subjected to these calculations may be subject to confidentiality constraints. This paper introduces a proof-of-concept framework that leverages Gramine LibOS and Intel SGX, enabling the protection of generic remote workflow computations through SGX enclaves as Trusted Execution Environments (TEEs). The framework entails the delineation of user and CSP behavior and has been implemented using Bash scripts. Furthermore, an infrastructure has been designed for the Data Center Attestation Primitives (DCAP) remote attestation mechanism, wherein the user gains trust in the proper instantiation of the enclave within the CSP. To assess the framework efficacy, it has been tested on two distinct workflows, one trivial and the other involving real-world bioinformatics applications for processing DNA data. The performance study revealed that the framework incurred an acceptable overhead, ranging from a factor of x1.4 to x1.8 compared to unsafe execution practice.},
howpublished = {Proceedings of the 2nd Workshop on Workflows in Distributed Environments},
keywords = {confidential, icsc},
pubstate = {published},
tppubtype = {misc}
}
Alberto Mulone
Workflows for future High-Performance Computing Miscellaneous
COMETE PhD Workshop, 2024.
@misc{24:amulone:comete,
title = {Workflows for future High-Performance Computing},
author = {Alberto Mulone},
url = {https://datacloud.di.unito.it/index.php/s/ZGG8fLMp5B7qRHS},
year = {2024},
date = {2024-04-01},
address = {Torino, Italy},
howpublished = {COMETE PhD Workshop},
keywords = {icsc},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli
CWL in the HPC Ecosystem Miscellaneous
Workshop on workflow languages for HEP analysis, 2024.
Links | BibTeX | Tags: across, eupex, icsc, space, streamflow
@misc{24:icolonne:cwl4hpccern,
title = {CWL in the HPC Ecosystem},
author = {Iacopo Colonnelli},
url = {https://datacloud.di.unito.it/index.php/s/PRmqdwWHt6P2PH7},
year = {2024},
date = {2024-04-01},
address = {CERN, Meyrin, Switzerland},
howpublished = {Workshop on workflow languages for HEP analysis},
keywords = {across, eupex, icsc, space, streamflow},
pubstate = {published},
tppubtype = {misc}
}
Giulio Malenza, Marco Edoardo Santimaria
Benchmarking Parallelization Models through Karmarkar`s algorithm Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: HPC, icsc
@misc{24:pdp:karmarkartalk,
title = {Benchmarking Parallelization Models through Karmarkar`s algorithm},
author = {Giulio Malenza and Marco Edoardo Santimaria},
url = {https://datacloud.di.unito.it/index.php/s/JjKcAJpYS7ctX9r},
year = {2024},
date = {2024-03-01},
address = {Dublin, Irelans},
abstract = {Optimization problems are one of the main focus of scientific research. Their computational-intensive nature makes them prone to be parallelized with consistent improvements in performance. This paper sheds light on different parallel models for accelerating Karmarkar’s Interior-point method. To do so, we assess parallelization strategies for individual operations within the aforementioned Karmarkar’s algorithm using OpenMP, GPU acceleration with CUDA, and the recent Parallel Standard C++ Linear Algebra library (PSTL) executing both on GPU and CPU. Our different implementations yield interesting benchmark results that show the optimal approach for parallelizing interior point algorithms for general Linear Programming (LP) problems. In addition, we propose a more theoretical perspective of the parallelization of this algorithm, with a detailed study of our OpenMP implementation, showing the limits of optimizing the single operations},
keywords = {HPC, icsc},
pubstate = {published},
tppubtype = {misc}
}
Robert Birke
FLaaS: Federated Learning as a Service Miscellaneous
ICSC - Spoke 1 meeting, 2024.
Abstract | Links | BibTeX | Tags: ai, icsc
@misc{24:icsc:spoke1:ifab,
title = {FLaaS: Federated Learning as a Service},
author = {Robert Birke},
url = {https://datacloud.di.unito.it/index.php/s/yHXdTnC8xEqoJ6Y},
year = {2024},
date = {2024-02-01},
address = {Torino, Italy},
abstract = {Presentation about the Innovation Grant in collaboration with IFAB},
howpublished = {ICSC - Spoke 1 meeting},
keywords = {ai, icsc},
pubstate = {published},
tppubtype = {misc}
}
Alberto Mulone
Cross-Platform Full Waveform Inversion Miscellaneous
ICSC - Spoke 1 meeting, 2024.
Abstract | Links | BibTeX | Tags: icsc, streamflow
@misc{24:icsc:spoke1:eni,
title = {Cross-Platform Full Waveform Inversion},
author = {Alberto Mulone},
url = {https://datacloud.di.unito.it/index.php/s/M3HkxA5wsBPS5ro},
year = {2024},
date = {2024-02-01},
address = {Torino, Italy},
abstract = {Presentation about the Innovation Grant in collaboration with ENI},
howpublished = {ICSC - Spoke 1 meeting},
keywords = {icsc, streamflow},
pubstate = {published},
tppubtype = {misc}
}
Gianluca Mittone
RISC-V for AI Miscellaneous
High Performance, Edge And Cloud computing Conference 2024 (HiPEAC 2024), 2024.
Abstract | Links | BibTeX | Tags: eupilot, icsc
@misc{24:HiPEAC:riscv,
title = {RISC-V for AI},
author = {Gianluca Mittone},
url = {https://datacloud.di.unito.it/index.php/s/rFtxT7zryoKNGbP},
year = {2024},
date = {2024-01-01},
address = {Garching bei München, München, Germany},
abstract = {AI-focused RISC-V-based hardware accelerators},
howpublished = {High Performance, Edge And Cloud computing Conference 2024 (HiPEAC 2024)},
keywords = {eupilot, icsc},
pubstate = {published},
tppubtype = {misc}
}
2023
Lorenzo Brescia, Iacopo Colonnelli
Trusted Computing at Scale Miscellaneous
CN HPC Flagship 4 Working Day, 2023.
Links | BibTeX | Tags: confidential, icsc
@misc{23:brescia:trusted:workflow:fl4:talk,
title = {Trusted Computing at Scale},
author = {Lorenzo Brescia and Iacopo Colonnelli},
url = {https://datacloud.di.unito.it/index.php/s/5ij6tLd5SAX4Nn4},
year = {2023},
date = {2023-12-01},
address = {Turin, Italy},
howpublished = {CN HPC Flagship 4 Working Day},
keywords = {confidential, icsc},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
Federated Learning: A Distributed System Viewpoint Miscellaneous
Bicocca University seminars, Milan, Italy, 2023, (Invited talk).
Abstract | Links | BibTeX | Tags: eupilot, icsc, textarossa
@misc{23:FL:bicocca,
title = {Federated Learning: A Distributed System Viewpoint},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/FfEzADQtC73GgLs},
year = {2023},
date = {2023-12-01},
abstract = {Decentralized machine learning (DML) enables collaborative machine learning without centralized input data. Federated learning (FL) and edge inference (EI) are examples of DML. Collaboration naturally happens at the edge of a distributed system with inherently distributed data. While tools for DML are starting to flourish, much needs to be done to get more flexible and portable tools to experiment with novel techniques, non-fully connected topologies, multiple data domains, and asynchronous collaboration schemes. We'll present recent advances in DML, aiming to improve usability in data centers and, at the edge, to widen the class of models extending FL to non-DDN paradigms, to improve the accuracy of models controlling normalization and frequency of communications, and to boost data privacy through generative adversarial networks.},
howpublished = {Bicocca University seminars, Milan, Italy},
note = {Invited talk},
keywords = {eupilot, icsc, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Gianluca Mittone, Giulio Malenza, Marco Aldinucci, Robert Birke
Distributed Edge Inference: an Experimental Study on Multiview Detection Miscellaneous
The 16th IEEE/ACM International Conference on Utility and Cloud Computing (UCC 2023), 2023.
Abstract | Links | BibTeX | Tags: ai, eupilot, icsc
@misc{23:ucc:multiview,
title = {Distributed Edge Inference: an Experimental Study on Multiview Detection},
author = {Gianluca Mittone and Giulio Malenza and Marco Aldinucci and Robert Birke},
url = {https://datacloud.di.unito.it/index.php/s/XfjNZEPSNfSKPFr},
year = {2023},
date = {2023-12-01},
address = {Taormina, Italy},
abstract = {Computing is evolving rapidly to cater to the increasing demand for sophisticated services, and Cloud computing lays a solid foundation for flexible on-demand provisioning. However, as the size of applications grows, the centralised client-server approach used by Cloud computing increasingly limits the applications scalability. To achieve ultra-scalability, cloud/edge/fog computing converges into the compute continuum, completely decentralising the infrastructure to encompass universal, pervasive resources. The compute continuum makes devising applications benefitting from this complex environment a challenging research problem. We put the opportunities the compute continuum others to the test through a real-world multi-view detection model (MvDet) implemented with the FastFL C/C++ high-performance edge inference framework. Computational performance is discussed considering many experimental scenarios, encompassing different edge computational capabilities and network bandwidths. We obtain up to 1.92x speedup in inference time over a centralised solution using the same devices.},
howpublished = {The 16th IEEE/ACM International Conference on Utility and Cloud Computing (UCC 2023)},
keywords = {ai, eupilot, icsc},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci, Elena Baralis, Valeria Cardellini, Iacopo Colonnelli, Marco Danelutto, Sergio Decherchi, Giuseppe Di Modica, Luca Ferrucci, Marco Gribaudo, Francesco Iannone, Marco Lapegna, Doriana Medić, Giuseppa Muscianisi, Francesca Righetti, Eva Sciacca, Nicola Tonellotto, Mauro Tortonesi, Paolo Trunfio, Tullio Vardanega
A Systematic Mapping Study of Italian Research on Workflows Miscellaneous
18th Workshop on Workflows in Support of Large-Scale Science (WORKS 2023), 2023.
Abstract | Links | BibTeX | Tags: icsc
@misc{23:sc:works,
title = {A Systematic Mapping Study of Italian Research on Workflows},
author = {Marco Aldinucci and Elena Baralis and Valeria Cardellini and Iacopo Colonnelli and Marco Danelutto and Sergio Decherchi and Giuseppe Di Modica and Luca Ferrucci and Marco Gribaudo and Francesco Iannone and Marco Lapegna and Doriana Medić and Giuseppa Muscianisi and Francesca Righetti and Eva Sciacca and Nicola Tonellotto and Mauro Tortonesi and Paolo Trunfio and Tullio Vardanega},
url = {https://datacloud.di.unito.it/index.php/s/2kgooG43pGCykji},
year = {2023},
date = {2023-11-01},
address = {Denver, CO, Usa},
abstract = {An entire ecosystem of methodologies and tools revolves around scientific workflow management. They cover crucial non-functional requirements that standard workflow models fail to target, such as interactive execution, energy efficiency, performance portability, Big Data management, and intelligent orchestration in the Computing Continuum. Characterizing and monitoring this ecosystem is crucial to developing an informed view of current and future research directions. This work conducts a systematic mapping study of the Italian workflow research community, analyzing 25 tools and 10 applications from several scientific domains in the context of the ``National Research Centre for HPC, Big Data, and Quantum Computing'' (ICSC). The study aims to outline the main current research directions and determine how they address the critical needs of modern scientific applications. The findings highlight a variegated research ecosystem of tools, with a prominent interest in advanced workflow orchestration and still immature but promising efforts toward energy efficiency.},
howpublished = {18th Workshop on Workflows in Support of Large-Scale Science (WORKS 2023)},
keywords = {icsc},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli, Doriana Medić, Barbara Cantalupo, Marco Aldinucci
Università degli Studi di Torino: Alpha parallel research group Miscellaneous
HaMMon Kick-Off meeting, 2023.
Links | BibTeX | Tags: icsc, streamflow
@misc{23:HaMMonProject,
title = {Università degli Studi di Torino: Alpha parallel research group},
author = {Iacopo Colonnelli and Doriana Medić and Barbara Cantalupo and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/cmgy9BZ3nwCR2QJ},
year = {2023},
date = {2023-10-01},
address = {Bologna, Italy},
howpublished = {HaMMon Kick-Off meeting},
keywords = {icsc, streamflow},
pubstate = {published},
tppubtype = {misc}
}
Giulio Malenza, Valentina Cesare, Marco Aldinucci
Performance portability in HPC: the Gaia use-case. Miscellaneous
2nd Italian Conference on Big Data and Data Science (ITADATA 2023), 2023.
@misc{23:GAIA:bigHPC,
title = {Performance portability in HPC: the Gaia use-case.},
author = {Giulio Malenza and Valentina Cesare and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/RqcZpizFtC9toFq},
year = {2023},
date = {2023-09-01},
address = {Naples, Italy},
howpublished = {2nd Italian Conference on Big Data and Data Science (ITADATA 2023)},
keywords = {icsc},
pubstate = {published},
tppubtype = {misc}
}
Samuele Fonio
Benchmarking Federated Learning Frameworks for Medical Imaging Tasks Miscellaneous
Image Analysis and Processing - ICIAP 2023 - 22th International Conference - FedMed, 2023.
Abstract | Links | BibTeX | Tags: ai, eupilot, fl, icsc
@misc{23:iciap:benchmed,
title = {Benchmarking Federated Learning Frameworks for Medical Imaging Tasks},
author = {Samuele Fonio},
url = {https://datacloud.di.unito.it/index.php/s/sR7YeTGgfH4DtCR},
year = {2023},
date = {2023-09-01},
address = {Udine, Italy},
abstract = {This paper presents a comprehensive benchmarking study of various Federated Learning (FL) frameworks applied to the task of Medical Image Classification. The research specifically addresses the often neglected and complex aspects of scalability and usability in off-the-shelf FL frameworks. Through experimental validation using real case deployments, we provide empirical evidence of the performance and practical relevance of open source FL frameworks. Our findings contribute valuable insights for anyone interested in deploying a FL system, with a particular focus on the healthcare domain—an increasingly attractive field for FL applications.},
howpublished = {Image Analysis and Processing - ICIAP 2023 - 22th International Conference - FedMed},
keywords = {ai, eupilot, fl, icsc},
pubstate = {published},
tppubtype = {misc}
}
Gianluca Mittone, Samuele Fonio
Benchmarking Federated Learning Scalability Miscellaneous
2nd Italian Conference on Big Data and Data Science (ITADATA 2023), 2023.
Abstract | Links | BibTeX | Tags: ai, eupilot, fl, icsc
@misc{23:itadata:fl_scaling,
title = {Benchmarking Federated Learning Scalability},
author = {Gianluca Mittone and Samuele Fonio},
url = {https://datacloud.di.unito.it/index.php/s/QZGxC4X3s5LG5oT},
year = {2023},
date = {2023-09-01},
address = {Naples, Italy},
abstract = {Federated Learning (FL) is a widespread Machine Learning paradigm handling distributed Big Data. In this work, we demonstrate that different FL frameworks expose different scaling performances despite adopting the same technologies, highlighting the need for a more comprehensive study on the topic.},
howpublished = {2nd Italian Conference on Big Data and Data Science (ITADATA 2023)},
keywords = {ai, eupilot, fl, icsc},
pubstate = {published},
tppubtype = {misc}
}
Gianluca Mittone, Walter Riviera, Iacopo Colonnelli, Robert Birke, Marco Aldinucci
Model-Agnostic Federated Learning Miscellaneous
29th International European Conference on Parallel and Distributed Computing (Euro-Par '23), 2023.
Abstract | Links | BibTeX | Tags: ai, eupilot, icsc
@misc{23:europar:mafl,
title = {Model-Agnostic Federated Learning},
author = {Gianluca Mittone and Walter Riviera and Iacopo Colonnelli and Robert Birke and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/9T6G2tRreRomBAE},
year = {2023},
date = {2023-09-01},
address = {Limassol, Cyprus},
abstract = {Since its debut in 2016, Federated Learning (FL) has been tied to the inner workings of Deep Neural Networks (DNNs); this allowed its development as DNNs proliferated but neglected those scenarios in which using DNNs is not possible or advantageous. The fact that most current FL frameworks only support DNNs reinforces this problem. To address the lack of non-DNN-based FL solutions, we propose MAFL (Model-Agnostic Federated Learning). MAFL merges a model-agnostic FL algorithm, AdaBoost.F, with an open industry-grade FL framework: Intel® OpenFL. MAFL is the first FL system not tied to any machine learning model, allowing exploration of FL beyond DNNs. We test MAFL from multiple points of view, assessing its correctness, flexibility, and scaling properties up to 64 nodes of an HPC cluster. We also show how we optimised OpenFL achieving a 5.5x speedup over a standard FL scenario. MAFL is compatible with x86-64, ARM-v8, Power and RISC-V.},
howpublished = {29th International European Conference on Parallel and Distributed Computing (Euro-Par '23)},
keywords = {ai, eupilot, icsc},
pubstate = {published},
tppubtype = {misc}
}
Gianluca Mittone, Robert Birke, Marco Aldinucci
Model-Agnostic Federated Learning Miscellaneous
29th International European Conference on Parallel and Distributed Computing (Euro-Par '23), 2023.
Abstract | Links | BibTeX | Tags: eupilot, icsc
@misc{23:europar:phdtalk,
title = {Model-Agnostic Federated Learning},
author = {Gianluca Mittone and Robert Birke and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/pT3qxkwzzsHR3nS},
year = {2023},
date = {2023-08-01},
address = {Limassol, Cyprus},
abstract = {Since its debut in 2016, Federated Learning (FL) has been tied to the inner workings of Deep Neural Networks (DNNs); this allowed its development as DNNs proliferated but neglected those scenarios in which using DNNs is not possible or advantageous. The fact that most current FL frameworks only support DNNs reinforces this problem. To address the lack of non-DNN-based FL solutions, we propose MAFL (Model-Agnostic Federated Learning). Decentralised Machine Learning (DML) enables collaborative machine learning without centralised input data. Federated Learning (FL) and Edge Inference are examples of DML. While tools for DML (especially FL) are starting to flourish, many are not flexible and portable enough to experiment with novel processors (e.g., RISC-V), non-fully connected network topologies, and asynchronous collaboration schemes. We overcome these limitations via a domain-specific language allowing us to map DML schemes to an underlying middleware, i.e. the FastFlow parallel programming library.},
howpublished = {29th International European Conference on Parallel and Distributed Computing (Euro-Par '23)},
keywords = {eupilot, icsc},
pubstate = {published},
tppubtype = {misc}
}
Giulio Malenza
Building an accelerated OpenFOAM Proof-of-Concept application using Modern C++. Miscellaneous
18th OpenFOAM Workshop 2023, Genova, 2023.
@misc{23:OF:genova,
title = {Building an accelerated OpenFOAM Proof-of-Concept application using Modern C++.},
author = {Giulio Malenza},
url = {https://datacloud.di.unito.it/index.php/s/mB6omsDB8ERBkGW},
year = {2023},
date = {2023-07-01},
address = {Genova, Italy},
howpublished = {18th OpenFOAM Workshop 2023, Genova},
keywords = {icsc},
pubstate = {published},
tppubtype = {misc}
}
Alberto Mulone, Sherine Awad, Davide Chiarugi, Marco Aldinucci
Porting the Variant Calling Pipeline for NGS data in cloud-HPC environment Miscellaneous
47th IEEE Annual Computers, Software, and Applications Conference, COMPSAC 2023, 2023.
Abstract | Links | BibTeX | Tags: across, icsc, streamflow
@misc{23:mulone:wide:talk,
title = {Porting the Variant Calling Pipeline for NGS data in cloud-HPC environment},
author = {Alberto Mulone and Sherine Awad and Davide Chiarugi and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/zNLj3LCZNsNxHwy},
year = {2023},
date = {2023-06-01},
address = {Torino, Italy},
abstract = {In recent years we have understood the importance of analyzing and sequencing human genetic variation. A relevant aspect that emerged from the Covid-19 pandemic was the need to obtain results very quickly; this involved using High-Performance Computing (HPC) environments to execute the Next Generation Sequencing (NGS) pipeline. However, HPC is not always the most suitable environment for the entire execution of a pipeline, especially when it involves many heterogeneous tools. The ability to execute parts of the pipeline on different environments can lead to higher performance but also cheaper executions. This work shows the design and optimization process that led us to a state-of-the-art Variant Calling hybrid workflow based on the StreamFlow Workflow Management System (WfMS). We also compare StreamFlow with Snakemake, an established WfMS targeting HPC facilities, observing comparable performance on single environments and satisfactory improvements with a hybrid cloud-HPC configuration.},
howpublished = {47th IEEE Annual Computers, Software, and Applications Conference, COMPSAC 2023},
keywords = {across, icsc, streamflow},
pubstate = {published},
tppubtype = {misc}
}
Gianluca Mittone, Nicolò Tonci, Robert Birke, Iacopo Colonnelli, Doriana Medić, Andrea Bartolini, Roberto Esposito, Emanuele Parisi, Francesco Beneventi, Mirko Polato, Massimo Torquati, Luca Benini, Marco Aldinucci
Experimenting with Emerging RISC-V Systems for Decentralised Machine Learning Miscellaneous
20th ACM international conference on computing frontiers (CF '23), 2023, (Invited talk).
Abstract | Links | BibTeX | Tags: ai, eupilot, icsc
@misc{23:ACMCF,
title = {Experimenting with Emerging RISC-V Systems for Decentralised Machine Learning},
author = {Gianluca Mittone and Nicolò Tonci and Robert Birke and Iacopo Colonnelli and Doriana Medić and Andrea Bartolini and Roberto Esposito and Emanuele Parisi and Francesco Beneventi and Mirko Polato and Massimo Torquati and Luca Benini and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/BYyqZbHzzN4DL8Z},
year = {2023},
date = {2023-05-01},
abstract = {Decentralised Machine Learning (DML) enables collaborative machine learning without centralised input data. Federated Learning (FL) and Edge Inference are examples of DML. While tools for DML (especially FL) are starting to flourish, many are not flexible and portable enough to experiment with novel processors (e.g., RISC-V), non-fully connected network topologies, and asynchronous collaboration schemes. We overcome these limitations via a domain-specific language allowing us to map DML schemes to an underlying middleware, i.e. the FastFlow parallel programming library. We experiment with it by generating different working DML schemes on x86-64 and ARM platforms and an emerging RISC-V one. We characterise the performance and energy efficiency of the presented schemes and systems. As a byproduct, we introduce a RISC-V porting of the PyTorch framework, the first publicly available to our knowledge.},
howpublished = {20th ACM international conference on computing frontiers (CF '23)},
note = {Invited talk},
keywords = {ai, eupilot, icsc},
pubstate = {published},
tppubtype = {misc}
}
Gianluca Mittone, Filip Svoboda, Marco Aldinucci, Nicholas D. Lane, Pietro Lio'
A Federated Learning Benchmark for Drug-Target Interaction Miscellaneous
2023 ACM international Web Conference (WWW '23), 2023, (Invited talk).
Abstract | Links | BibTeX | Tags: eupilot, icsc
@misc{23:WWW,
title = {A Federated Learning Benchmark for Drug-Target Interaction},
author = {Gianluca Mittone and Filip Svoboda and Marco Aldinucci and Nicholas D. Lane and Pietro Lio'},
url = {https://datacloud.di.unito.it/index.php/s/js7go3EorZxSLn9},
year = {2023},
date = {2023-05-01},
abstract = {Aggregating pharmaceutical data in the drug-target interaction (DTI) domain can potentially deliver life-saving breakthroughs. It is, however, notoriously difficult due to regulatory constraints and commercial interests. This work proposes the application of federated learning, which is reconcilable with the industry's constraints. It does not require sharing any information that would reveal the entities' data or any other high-level summary. When used on a representative GraphDTA model and the KIBA dataset, it achieves up to 15% improved performance relative to the best available non-privacy preserving alternative. Our extensive battery of experiments shows that, unlike in other domains, the non-IID data distribution in the DTI datasets does not deteriorate FL performance. Additionally, we identify a material trade-off between the benefits of adding new data and the cost of adding more clients.},
howpublished = {2023 ACM international Web Conference (WWW '23)},
note = {Invited talk},
keywords = {eupilot, icsc},
pubstate = {published},
tppubtype = {misc}
}
Bruno Casella, Samuele Fonio
Architecture-Based FedAvg for Vertical Federated Learning Miscellaneous
2023.
Abstract | Links | BibTeX | Tags: ai, epi, fl, icsc
@misc{23:casella:architecturalfedavgtalk,
title = {Architecture-Based FedAvg for Vertical Federated Learning},
author = {Bruno Casella and Samuele Fonio},
url = {https://datacloud.di.unito.it/index.php/s/kJQxnqG4d2ZSicK},
year = {2023},
date = {2023-01-01},
booktitle = {Proceedings of the 3rd Workshop on Distributed Machine Learning for the Intelligent Computing Continuum (DML-ICC), IEEE/ACM UCC 2023, Taormina, Italy, 4 December 2023},
abstract = {Federated Learning (FL) has emerged as a promising solution to address privacy concerns by collaboratively training Deep Learning (DL) models across distributed parties. This work proposes an architecture-based aggregation strategy in Vertical FL, where parties hold data with different attributes but shared instances. Our approach leverages the identical architectural parts, i.e. neural network layers, of different models to selectively aggregate weights, which is particularly relevant when collaborating with institutions holding different types of datasets, i.e., image, text, or tabular datasets. In a scenario where two entities train DL models, such as a Convolutional Neural Network (CNN) and a Multi-Layer Perceptron (MLP), our strategy computes the average only for architecturally identical segments. This preserves data-specific features learned from demographic and clinical data. We tested our approach on two clinical datasets, i.e., the COVID-CXR dataset and the ADNI study. Results show that our method achieves comparable results with the centralized scenario, in which all the data are collected in a single data lake, and benefits from FL generalizability. In particular, compared to the non-federated models, our proposed proof-of-concept model exhibits a slight performance loss on the COVID-CXR dataset (less than 8%), but outperforms ADNI models by up to 12%. Moreover, communication costs between training rounds are minimized by exchanging only the dense layer parameters.},
keywords = {ai, epi, fl, icsc},
pubstate = {published},
tppubtype = {misc}
}
Doriana Medić, Marco Aldinucci
Towards formal model for location aware workflows Miscellaneous
2023.
Abstract | Links | BibTeX | Tags: eupex, icsc
@misc{23:wide:medic,
title = {Towards formal model for location aware workflows},
author = {Doriana Medić and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/wpDd9HETzioixTW},
year = {2023},
date = {2023-01-01},
address = {Torino, Italy},
abstract = {Designing complex applications and executing them on large-scale topologies of heterogeneous architectures is becoming increasingly crucial in many scientific domains. As a result, diverse workflow modelling paradigms are developed, most of them with no formalisation provided. In these circumstances, comparing two different models or switching from one system to the other becomes a hard nut to crack. This paper investigates the capability of process algebra to model a location aware workflow system. Distributed π-calculus is considered as the base of the formal model due to its ability to describe the communicating components that change their structure as an outcome of the communication. Later, it is discussed how the base model could be extended or modified to capture different features of location aware workflow system. The intention of this paper is to highlight the fact that due to its flexibility, π-calculus, could be a good candidate to represent the behavioural perspective of the workflow system.},
keywords = {eupex, icsc},
pubstate = {published},
tppubtype = {misc}
}
2022
Marco Aldinucci
EuroHPC and the Italian HPC ecosystem Miscellaneous
Critical Infrastructure Protection Forum - EuroCC Romania, 2022, (Invited talk).
Abstract | Links | BibTeX | Tags: across, admire, eumaster4hpc, eupex, eupilot, icsc, textarossa
@misc{22:cip:romania,
title = {EuroHPC and the Italian HPC ecosystem},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/5dFFoNsZzwTzQkn},
year = {2022},
date = {2022-06-01},
address = {Bucharest, Romania},
abstract = {The talk presents the main investments currently ongoing in Italy in the HPC area as well as the activity of Italian stakeholders within EuroHPC. The novel Italian National Centre on HPC (ICSC) is introduced.},
howpublished = {Critical Infrastructure Protection Forum - EuroCC Romania},
note = {Invited talk},
keywords = {across, admire, eumaster4hpc, eupex, eupilot, icsc, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
The Italian HPC ecosystem and the next generation of EuroHPC CoE Miscellaneous
EuroHPC EoCoE final summit, 2022, (Invited talk).
Abstract | Links | BibTeX | Tags: across, admire, eumaster4hpc, eupex, eupilot, icsc, textarossa
@misc{22:eocoe:summit,
title = {The Italian HPC ecosystem and the next generation of EuroHPC CoE},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/AH5Ms3NekeoEooB},
year = {2022},
date = {2022-06-01},
address = {Napoli, Italy},
abstract = {The talk presents the main investments currently ongoing in Italy in the HPC area as well as the activity of Italian stakeholders within EuroHPC. The novel Italian National Centre on HPC (ICSC) is introduced.},
howpublished = {EuroHPC EoCoE final summit},
note = {Invited talk},
keywords = {across, admire, eumaster4hpc, eupex, eupilot, icsc, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
Da HPC4AI al living lab dello spoke FutureHPC del Centro Nazionale HPC Miscellaneous
Condivisioni, Conferenza GARR 2022, 2022, (Keynote talk).
Abstract | Links | BibTeX | Tags: across, admire, eumaster4hpc, eupex, hpc4ai, icsc, textarossa
@misc{22:garr,
title = {Da HPC4AI al living lab dello spoke FutureHPC del Centro Nazionale HPC},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/P3KSroSSmrRxZMc},
year = {2022},
date = {2022-05-01},
address = {Palermo, Italy},
abstract = {HPC4AI is an open-access laboratory of the University of Turin open to researchers, students and companies that manages a double pair of systems: a production cloud-HPC system and its twin dedicated to development. The cloud-HPC system is implemented thanks to an extended version of the GARR cloud (OpenStack) and the SLURM workload manager. HPC4AI is specifically designed to support system software development and cloud-HPC convergence tools. Among these streamflow (WMS), jupyter-as-a-service (SaaS), portable-secure-tenant (PasS). The experience gained in the design and management of HPC4AI forms the heart of the design of the livinglab of the Turin "FutureHPC" spoke of the National Center "HPC, BigData and Quantum Computing" funded by the PNRR which should be operational from September 2022.},
howpublished = {Condivisioni, Conferenza GARR 2022},
note = {Keynote talk},
keywords = {across, admire, eumaster4hpc, eupex, hpc4ai, icsc, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Space Center of Excellence (EC HE, HORIZON-EUROHPC-JU-2021-COE-01): Scalable Parallel and distributed Astrophysical Codes for Exascale (2023, 48 months. total cost 8M€, G.A. 101093441)
Publications
2024
Iacopo Colonnelli, Robert Birke, Giulio Malenza, Gianluca Mittone, Alberto Mulone, Jeroen Galjaard, Lydia Y. Chen, Sanzio Bassini, Gabriella Scipione, Jan Martinovič, Vit Vondrák, Marco Aldinucci
Cross-Facility Federated Learning Journal Article
In: Procedia Computer Science, vol. 240, pp. 3–12, 2024, ISSN: 1877-0509.
Abstract | Links | BibTeX | Tags: icsc, space, streamflow
@article{24:eurohpc:xffl,
title = {Cross-Facility Federated Learning},
author = {Iacopo Colonnelli and Robert Birke and Giulio Malenza and Gianluca Mittone and Alberto Mulone and Jeroen Galjaard and Lydia Y. Chen and Sanzio Bassini and Gabriella Scipione and Jan Martinovič and Vit Vondrák and Marco Aldinucci},
url = {https://www.sciencedirect.com/science/article/pii/S1877050924016909},
doi = {10.1016/j.procs.2024.07.003},
issn = {1877-0509},
year = {2024},
date = {2024-01-01},
booktitle = {Proceedings of the First EuroHPC user day},
journal = {Procedia Computer Science},
volume = {240},
pages = {3–12},
publisher = {Elsevier},
address = {Bruxelles, Belgium},
abstract = {In a decade, AI frontier research transitioned from the researcher's workstation to thousands of high-end hardware-accelerated compute nodes. This rapid evolution shows no signs of slowing down in the foreseeable future. While top cloud providers may be able to keep pace with this growth rate, obtaining and efficiently exploiting computing resources at that scale is a daunting challenge for universities and SMEs. This work introduces the Cross-Facility Federated Learning (XFFL) framework to bridge this compute divide, extending the opportunity to efficiently exploit multiple independent data centres for extreme-scale deep learning tasks to data scientists and domain experts. XFFL relies on hybrid workflow abstractions to decouple tasks from environment-specific technicalities, reducing complexity and enhancing reusability. In addition, Federated Learning (FL) algorithms eliminate the need to move large amounts of data between different facilities, reducing time-to-solution and preserving data privacy. The XFFL approach is empirically evaluated by training a full LLaMAv2 7B instance on two facilities of the EuroHPC JU, showing how the increased computing power completely compensates for the additional overhead introduced by two data centres.},
keywords = {icsc, space, streamflow},
pubstate = {published},
tppubtype = {article}
}
Talks
2024
Iacopo Colonnelli, Robert Birke, Giulio Malenza, Gianluca Mittone, Alberto Mulone, Marco Aldinucci
Cross-Facility Federated Learning - Part II Miscellaneous
2024, (Invited talk).
Links | BibTeX | Tags: eupex, icsc, space
@misc{24:ic:elise:xffl,
title = {Cross-Facility Federated Learning - Part II},
author = {Iacopo Colonnelli and Robert Birke and Giulio Malenza and Gianluca Mittone and Alberto Mulone and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/7HonBpcWPxotXLX},
year = {2024},
date = {2024-06-01},
address = {Helsinki, Finland},
note = {Invited talk},
keywords = {eupex, icsc, space},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli
CWL in the HPC Ecosystem Miscellaneous
Workshop on workflow languages for HEP analysis, 2024.
Links | BibTeX | Tags: across, eupex, icsc, space, streamflow
@misc{24:icolonne:cwl4hpccern,
title = {CWL in the HPC Ecosystem},
author = {Iacopo Colonnelli},
url = {https://datacloud.di.unito.it/index.php/s/PRmqdwWHt6P2PH7},
year = {2024},
date = {2024-04-01},
address = {CERN, Meyrin, Switzerland},
howpublished = {Workshop on workflow languages for HEP analysis},
keywords = {across, eupex, icsc, space, streamflow},
pubstate = {published},
tppubtype = {misc}
}
2023
Sofia Karvounari, Eleni Mathioulaki, Michael R. Crusoe, Iacopo Colonnelli
Standardised Workflows at EBRAINS Miscellaneous
Human Brain Project Summit 2023, 2023, (Invited talk).
Abstract | Links | BibTeX | Tags: across, eupex, space, streamflow
@misc{23:HBPSummit,
title = {Standardised Workflows at EBRAINS},
author = {Sofia Karvounari and Eleni Mathioulaki and Michael R. Crusoe and Iacopo Colonnelli},
url = {https://datacloud.di.unito.it/index.php/s/K5YQKTsX9N7NLT8},
year = {2023},
date = {2023-03-01},
address = {Marseille, France},
abstract = {A hands-on training offer for Standardised Workflows in EBRAINS. A short presentation will be used as an introduction, while the main hands-on session will provide information about Writing and Executing Standardised Workflows. TC will give some guidelines, so attendees can experiment with writing CWL tools and workflows and then they will be given access to VM to execute these workflows. The Workflows Dashboard will be also presented during the same session, offering to the attendees the opportunity to understand the different functionalities, use it with TC support and provide useful comments.},
howpublished = {Human Brain Project Summit 2023},
note = {Invited talk},
keywords = {across, eupex, space, streamflow},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
Experimenting with Systems for Decentralized Machine Learning Miscellaneous
NVidia GTC 2023, 2023.
Abstract | Links | BibTeX | Tags: across, admire, epi, eumaster4hpc, eupex, eupilot, hpc4ai, space, textarossa
@misc{23:gtc:fl,
title = {Experimenting with Systems for Decentralized Machine Learning},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/oyLt7xwkbKxz65c},
year = {2023},
date = {2023-03-01},
abstract = {Decentralized machine learning (DML) enables collaborative machine learning without centralized input data. Federated learning (FL) and edge inference (EI) are examples of DML. Collaboration naturally happens at the edge of a distributed system with inherently distributed data. While tools for DML are starting to flourish, much needs to be done to get more flexible and portable tools to experiment with novel techniques, non-fully connected topologies, multiple data domains, and asynchronous collaboration schemes. We'll present recent advances in DML, aiming to improve usability in data centers and, at the edge, to widen the class of models extending FL to non-DNN paradigms, to improve the accuracy of models controlling normalization and frequency of communications, and to boost data privacy though generative adversarial networks. Prerequisites: Intermediate understanding of machine learning methods and distributed & parallel computing.},
howpublished = {NVidia GTC 2023},
keywords = {across, admire, epi, eumaster4hpc, eupex, eupilot, hpc4ai, space, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
HPC4AI: The Research on AI beyond the public cloud Miscellaneous
CENTAI kick-off meeting, 2023.
Links | BibTeX | Tags: across, admire, brainteaser, epi, eumaster4hpc, eupex, eupilot, hpc4ai, space, textarossa
@misc{23:CENTAI:hpc4ai,
title = {HPC4AI: The Research on AI beyond the public cloud},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/PZXjPm8sfKTmTGb},
year = {2023},
date = {2023-03-01},
address = {Torino, Italy},
howpublished = {CENTAI kick-off meeting},
keywords = {across, admire, brainteaser, epi, eumaster4hpc, eupex, eupilot, hpc4ai, space, textarossa},
pubstate = {published},
tppubtype = {misc}
}
DYMAN (EC HE, HORIZON-EIC-2023-PATHFINDERCHALLENGES-01) Dynamically Managed Self-Cooling HPC Data Centers (2024, 36 months, total cost 4M€, G.A. 101161930)
Publications
2024
Iacopo Colonnelli, Robert Birke, Giulio Malenza, Gianluca Mittone, Alberto Mulone, Jeroen Galjaard, Lydia Y. Chen, Sanzio Bassini, Gabriella Scipione, Jan Martinovič, Vit Vondrák, Marco Aldinucci
Cross-Facility Federated Learning Journal Article
In: Procedia Computer Science, vol. 240, pp. 3–12, 2024, ISSN: 1877-0509.
Abstract | Links | BibTeX | Tags: icsc, space, streamflow
@article{24:eurohpc:xffl,
title = {Cross-Facility Federated Learning},
author = {Iacopo Colonnelli and Robert Birke and Giulio Malenza and Gianluca Mittone and Alberto Mulone and Jeroen Galjaard and Lydia Y. Chen and Sanzio Bassini and Gabriella Scipione and Jan Martinovič and Vit Vondrák and Marco Aldinucci},
url = {https://www.sciencedirect.com/science/article/pii/S1877050924016909},
doi = {10.1016/j.procs.2024.07.003},
issn = {1877-0509},
year = {2024},
date = {2024-01-01},
booktitle = {Proceedings of the First EuroHPC user day},
journal = {Procedia Computer Science},
volume = {240},
pages = {3–12},
publisher = {Elsevier},
address = {Bruxelles, Belgium},
abstract = {In a decade, AI frontier research transitioned from the researcher's workstation to thousands of high-end hardware-accelerated compute nodes. This rapid evolution shows no signs of slowing down in the foreseeable future. While top cloud providers may be able to keep pace with this growth rate, obtaining and efficiently exploiting computing resources at that scale is a daunting challenge for universities and SMEs. This work introduces the Cross-Facility Federated Learning (XFFL) framework to bridge this compute divide, extending the opportunity to efficiently exploit multiple independent data centres for extreme-scale deep learning tasks to data scientists and domain experts. XFFL relies on hybrid workflow abstractions to decouple tasks from environment-specific technicalities, reducing complexity and enhancing reusability. In addition, Federated Learning (FL) algorithms eliminate the need to move large amounts of data between different facilities, reducing time-to-solution and preserving data privacy. The XFFL approach is empirically evaluated by training a full LLaMAv2 7B instance on two facilities of the EuroHPC JU, showing how the increased computing power completely compensates for the additional overhead introduced by two data centres.},
keywords = {icsc, space, streamflow},
pubstate = {published},
tppubtype = {article}
}
Talks
2024
Iacopo Colonnelli, Robert Birke, Giulio Malenza, Gianluca Mittone, Alberto Mulone, Marco Aldinucci
Cross-Facility Federated Learning - Part II Miscellaneous
2024, (Invited talk).
Links | BibTeX | Tags: eupex, icsc, space
@misc{24:ic:elise:xffl,
title = {Cross-Facility Federated Learning - Part II},
author = {Iacopo Colonnelli and Robert Birke and Giulio Malenza and Gianluca Mittone and Alberto Mulone and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/7HonBpcWPxotXLX},
year = {2024},
date = {2024-06-01},
address = {Helsinki, Finland},
note = {Invited talk},
keywords = {eupex, icsc, space},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli
CWL in the HPC Ecosystem Miscellaneous
Workshop on workflow languages for HEP analysis, 2024.
Links | BibTeX | Tags: across, eupex, icsc, space, streamflow
@misc{24:icolonne:cwl4hpccern,
title = {CWL in the HPC Ecosystem},
author = {Iacopo Colonnelli},
url = {https://datacloud.di.unito.it/index.php/s/PRmqdwWHt6P2PH7},
year = {2024},
date = {2024-04-01},
address = {CERN, Meyrin, Switzerland},
howpublished = {Workshop on workflow languages for HEP analysis},
keywords = {across, eupex, icsc, space, streamflow},
pubstate = {published},
tppubtype = {misc}
}
2023
Sofia Karvounari, Eleni Mathioulaki, Michael R. Crusoe, Iacopo Colonnelli
Standardised Workflows at EBRAINS Miscellaneous
Human Brain Project Summit 2023, 2023, (Invited talk).
Abstract | Links | BibTeX | Tags: across, eupex, space, streamflow
@misc{23:HBPSummit,
title = {Standardised Workflows at EBRAINS},
author = {Sofia Karvounari and Eleni Mathioulaki and Michael R. Crusoe and Iacopo Colonnelli},
url = {https://datacloud.di.unito.it/index.php/s/K5YQKTsX9N7NLT8},
year = {2023},
date = {2023-03-01},
address = {Marseille, France},
abstract = {A hands-on training offer for Standardised Workflows in EBRAINS. A short presentation will be used as an introduction, while the main hands-on session will provide information about Writing and Executing Standardised Workflows. TC will give some guidelines, so attendees can experiment with writing CWL tools and workflows and then they will be given access to VM to execute these workflows. The Workflows Dashboard will be also presented during the same session, offering to the attendees the opportunity to understand the different functionalities, use it with TC support and provide useful comments.},
howpublished = {Human Brain Project Summit 2023},
note = {Invited talk},
keywords = {across, eupex, space, streamflow},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
Experimenting with Systems for Decentralized Machine Learning Miscellaneous
NVidia GTC 2023, 2023.
Abstract | Links | BibTeX | Tags: across, admire, epi, eumaster4hpc, eupex, eupilot, hpc4ai, space, textarossa
@misc{23:gtc:fl,
title = {Experimenting with Systems for Decentralized Machine Learning},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/oyLt7xwkbKxz65c},
year = {2023},
date = {2023-03-01},
abstract = {Decentralized machine learning (DML) enables collaborative machine learning without centralized input data. Federated learning (FL) and edge inference (EI) are examples of DML. Collaboration naturally happens at the edge of a distributed system with inherently distributed data. While tools for DML are starting to flourish, much needs to be done to get more flexible and portable tools to experiment with novel techniques, non-fully connected topologies, multiple data domains, and asynchronous collaboration schemes. We'll present recent advances in DML, aiming to improve usability in data centers and, at the edge, to widen the class of models extending FL to non-DNN paradigms, to improve the accuracy of models controlling normalization and frequency of communications, and to boost data privacy though generative adversarial networks. Prerequisites: Intermediate understanding of machine learning methods and distributed & parallel computing.},
howpublished = {NVidia GTC 2023},
keywords = {across, admire, epi, eumaster4hpc, eupex, eupilot, hpc4ai, space, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
HPC4AI: The Research on AI beyond the public cloud Miscellaneous
CENTAI kick-off meeting, 2023.
Links | BibTeX | Tags: across, admire, brainteaser, epi, eumaster4hpc, eupex, eupilot, hpc4ai, space, textarossa
@misc{23:CENTAI:hpc4ai,
title = {HPC4AI: The Research on AI beyond the public cloud},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/PZXjPm8sfKTmTGb},
year = {2023},
date = {2023-03-01},
address = {Torino, Italy},
howpublished = {CENTAI kick-off meeting},
keywords = {across, admire, brainteaser, epi, eumaster4hpc, eupex, eupilot, hpc4ai, space, textarossa},
pubstate = {published},
tppubtype = {misc}
}
EUMaster4HPC (EC H2020 RIA, EuroHPC-2020-03): HPC European Consortium Leading Education Activities (2022, 36 months, total cost 7M€, G.A. 101051997)
Publications
Sorry, no publications matched your criteria.
Talks
2023
Marco Aldinucci
Experimenting with Systems for Decentralized Machine Learning Miscellaneous
NVidia GTC 2023, 2023.
Abstract | Links | BibTeX | Tags: across, admire, epi, eumaster4hpc, eupex, eupilot, hpc4ai, space, textarossa
@misc{23:gtc:fl,
title = {Experimenting with Systems for Decentralized Machine Learning},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/oyLt7xwkbKxz65c},
year = {2023},
date = {2023-03-01},
abstract = {Decentralized machine learning (DML) enables collaborative machine learning without centralized input data. Federated learning (FL) and edge inference (EI) are examples of DML. Collaboration naturally happens at the edge of a distributed system with inherently distributed data. While tools for DML are starting to flourish, much needs to be done to get more flexible and portable tools to experiment with novel techniques, non-fully connected topologies, multiple data domains, and asynchronous collaboration schemes. We'll present recent advances in DML, aiming to improve usability in data centers and, at the edge, to widen the class of models extending FL to non-DNN paradigms, to improve the accuracy of models controlling normalization and frequency of communications, and to boost data privacy though generative adversarial networks. Prerequisites: Intermediate understanding of machine learning methods and distributed & parallel computing.},
howpublished = {NVidia GTC 2023},
keywords = {across, admire, epi, eumaster4hpc, eupex, eupilot, hpc4ai, space, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
HPC4AI: The Research on AI beyond the public cloud Miscellaneous
CENTAI kick-off meeting, 2023.
Links | BibTeX | Tags: across, admire, brainteaser, epi, eumaster4hpc, eupex, eupilot, hpc4ai, space, textarossa
@misc{23:CENTAI:hpc4ai,
title = {HPC4AI: The Research on AI beyond the public cloud},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/PZXjPm8sfKTmTGb},
year = {2023},
date = {2023-03-01},
address = {Torino, Italy},
howpublished = {CENTAI kick-off meeting},
keywords = {across, admire, brainteaser, epi, eumaster4hpc, eupex, eupilot, hpc4ai, space, textarossa},
pubstate = {published},
tppubtype = {misc}
}
2022
Marco Aldinucci
Il calcolo parallelo: una storia di metodi e algoritmi raccontata dalle macchine Miscellaneous
Olimpiadi di Informatica, 2022, (Invited talk).
Abstract | Links | BibTeX | Tags: across, admire, eumaster4hpc, eupex, eupilot, textarossa
@misc{22:olimpiadi:cs,
title = {Il calcolo parallelo: una storia di metodi e algoritmi raccontata dalle macchine},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/7ZdfLkn3NetzXCN},
year = {2022},
date = {2022-09-01},
address = {Biella, Italy},
abstract = {Lectio Magistralis alle finali nazionali delle Olimpiadi di Informatica 2022},
howpublished = {Olimpiadi di Informatica},
note = {Invited talk},
keywords = {across, admire, eumaster4hpc, eupex, eupilot, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
La convergenza HPC-cloud è l'anello mancante tra il calcolo scientifico e l'IA applicata Miscellaneous
Intelligenza Artificiale e Business Applications, 2022, (Invited talk).
Abstract | Links | BibTeX | Tags: across, admire, eumaster4hpc, eupex, eupilot, textarossa
@misc{22:soiel:ai,
title = {La convergenza HPC-cloud è l'anello mancante tra il calcolo scientifico e l'IA applicata},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/xCQSqJ8bCKCXMK9},
year = {2022},
date = {2022-09-01},
address = {Virtual event},
abstract = {Innanzitutto, le infrastrutture HPC stanno adottando le GPU per il loro rapporto prestazioni per watt superiore rispetto ai multicore generici. In secondo luogo, i flussi di lavoro scientifici di prossima generazione stanno integrando passaggi basati sull'intelligenza artificiale per la loro precisione nell'approssimazione e nell'analisi di fenomeni complessi. In terzo luogo, l'IA e in particolare il Machine Learning (ML) rappresentano un carico di lavoro perfetto per le GPU in termini di prestazioni e tempo di sviluppo. Oggi non possiamo ancora chiudere il cerchio eseguendo senza problemi carichi di lavoro scientifici abilitati all'intelligenza artificiale nelle infrastrutture HPC perché il loro software di sistema e gli strumenti di sviluppo non sono progettati per i carichi di lavoro moderni, come i framework ML progettati per il cloud. È probabile che la convergenza HPC-cloud colmi il divario. Nel talk verranno presentate le infrastrutture e gli strumenti sviluppati all'Università di Torino per la convergenza HPC-cloud (es. HPC4AI, StreamFlow, CAPIO, Jupyter-workflow) e come sono stati utilizzati per le applicazioni di intelligenza artificiale, come la diagnosi spiegabile di polmonite COVID-19 e la tutela della privacy AI. L'esperienza maturata nella progettazione e gestione di HPC4AI costituisce il cuore della progettazione del laboratorio di contaminazione del "FutureHPC" di Torino secondo il Centro Nazionale "HPC, BigData e Quantum Computing" finanziato dal PNRR con 320M€ che dovrebbe essere operativo dal 1 settembre 2022. L'obiettivo finale del laboratorio di contaminazione è sviluppare relazioni e collaborazioni tra industria e università.},
howpublished = {Intelligenza Artificiale e Business Applications},
note = {Invited talk},
keywords = {across, admire, eumaster4hpc, eupex, eupilot, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli, Barbara Cantalupo, Doriana Medić, Marco Aldinucci
Hybrid workflows for heterogeneous distributed computing Miscellaneous
3rd Italian Workshop on HPC (ITWSHPC), 2022.
Links | BibTeX | Tags: across, admire, eumaster4hpc, eupex, eupilot, textarossa
@misc{22:itwshpc,
title = {Hybrid workflows for heterogeneous distributed computing},
author = {Iacopo Colonnelli and Barbara Cantalupo and Doriana Medić and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/ienbcA2DJ26aioE},
year = {2022},
date = {2022-09-01},
address = {Torino, Italy},
howpublished = {3rd Italian Workshop on HPC (ITWSHPC)},
keywords = {across, admire, eumaster4hpc, eupex, eupilot, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli, Marco Aldinucci
CINI HPC-KTT: HPC Key Technologies and Tools National Lab Miscellaneous
NVIDIA HPC Roundtable, 2022, (Invited talk).
Links | BibTeX | Tags: across, admire, eumaster4hpc, eupex, eupilot, textarossa
@misc{22:nvidia_hpc_roundtable,
title = {CINI HPC-KTT: HPC Key Technologies and Tools National Lab},
author = {Iacopo Colonnelli and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/9EQniZ2dGzdJ26f},
year = {2022},
date = {2022-09-01},
address = {Casalecchio di Reno, Italy},
howpublished = {NVIDIA HPC Roundtable},
note = {Invited talk},
keywords = {across, admire, eumaster4hpc, eupex, eupilot, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
EuroHPC and the Italian HPC ecosystem Miscellaneous
Critical Infrastructure Protection Forum - EuroCC Romania, 2022, (Invited talk).
Abstract | Links | BibTeX | Tags: across, admire, eumaster4hpc, eupex, eupilot, icsc, textarossa
@misc{22:cip:romania,
title = {EuroHPC and the Italian HPC ecosystem},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/5dFFoNsZzwTzQkn},
year = {2022},
date = {2022-06-01},
address = {Bucharest, Romania},
abstract = {The talk presents the main investments currently ongoing in Italy in the HPC area as well as the activity of Italian stakeholders within EuroHPC. The novel Italian National Centre on HPC (ICSC) is introduced.},
howpublished = {Critical Infrastructure Protection Forum - EuroCC Romania},
note = {Invited talk},
keywords = {across, admire, eumaster4hpc, eupex, eupilot, icsc, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
The Italian HPC ecosystem and the next generation of EuroHPC CoE Miscellaneous
EuroHPC EoCoE final summit, 2022, (Invited talk).
Abstract | Links | BibTeX | Tags: across, admire, eumaster4hpc, eupex, eupilot, icsc, textarossa
@misc{22:eocoe:summit,
title = {The Italian HPC ecosystem and the next generation of EuroHPC CoE},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/AH5Ms3NekeoEooB},
year = {2022},
date = {2022-06-01},
address = {Napoli, Italy},
abstract = {The talk presents the main investments currently ongoing in Italy in the HPC area as well as the activity of Italian stakeholders within EuroHPC. The novel Italian National Centre on HPC (ICSC) is introduced.},
howpublished = {EuroHPC EoCoE final summit},
note = {Invited talk},
keywords = {across, admire, eumaster4hpc, eupex, eupilot, icsc, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
Cognitive continuum: a game theoretical approach Miscellaneous
HiPEAC Vision meeting, Brussels, 16 May 2022, 2022.
Abstract | Links | BibTeX | Tags: across, admire, brainteaser, eumaster4hpc, eupex, eupilot, textarossa
@misc{22:hipeacvision:fl,
title = {Cognitive continuum: a game theoretical approach},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/453HWfmrQyo7j9E},
year = {2022},
date = {2022-05-01},
address = {Brussels, Belgium},
abstract = {Cognitive continuum: a game theoretical approach, (maybe) data operations are too basic: read, write, copy, remove … The talk is aimed to contribute to the forthcoming HiPEAC Vision document},
howpublished = {HiPEAC Vision meeting, Brussels, 16 May 2022},
keywords = {across, admire, brainteaser, eumaster4hpc, eupex, eupilot, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
Da HPC4AI al living lab dello spoke FutureHPC del Centro Nazionale HPC Miscellaneous
Condivisioni, Conferenza GARR 2022, 2022, (Keynote talk).
Abstract | Links | BibTeX | Tags: across, admire, eumaster4hpc, eupex, hpc4ai, icsc, textarossa
@misc{22:garr,
title = {Da HPC4AI al living lab dello spoke FutureHPC del Centro Nazionale HPC},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/P3KSroSSmrRxZMc},
year = {2022},
date = {2022-05-01},
address = {Palermo, Italy},
abstract = {HPC4AI is an open-access laboratory of the University of Turin open to researchers, students and companies that manages a double pair of systems: a production cloud-HPC system and its twin dedicated to development. The cloud-HPC system is implemented thanks to an extended version of the GARR cloud (OpenStack) and the SLURM workload manager. HPC4AI is specifically designed to support system software development and cloud-HPC convergence tools. Among these streamflow (WMS), jupyter-as-a-service (SaaS), portable-secure-tenant (PasS). The experience gained in the design and management of HPC4AI forms the heart of the design of the livinglab of the Turin "FutureHPC" spoke of the National Center "HPC, BigData and Quantum Computing" funded by the PNRR which should be operational from September 2022.},
howpublished = {Condivisioni, Conferenza GARR 2022},
note = {Keynote talk},
keywords = {across, admire, eumaster4hpc, eupex, hpc4ai, icsc, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Brainteaser (EC H2020 RIA, SC1-DTH-2020-1): BRinging Artificial INTelligencE home for a better cAre of amyotrophic lateral sclerosis and multiple SclERosis ( 2021, 48 months, total cost 5.9M€, G.A. 101017598)
Publications
Sorry, no publications matched your criteria.
Talks
2023
Marco Aldinucci
HPC4AI: The Research on AI beyond the public cloud Miscellaneous
CENTAI kick-off meeting, 2023.
Links | BibTeX | Tags: across, admire, brainteaser, epi, eumaster4hpc, eupex, eupilot, hpc4ai, space, textarossa
@misc{23:CENTAI:hpc4ai,
title = {HPC4AI: The Research on AI beyond the public cloud},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/PZXjPm8sfKTmTGb},
year = {2023},
date = {2023-03-01},
address = {Torino, Italy},
howpublished = {CENTAI kick-off meeting},
keywords = {across, admire, brainteaser, epi, eumaster4hpc, eupex, eupilot, hpc4ai, space, textarossa},
pubstate = {published},
tppubtype = {misc}
}
2022
Marco Aldinucci
Cognitive continuum: a game theoretical approach Miscellaneous
HiPEAC Vision meeting, Brussels, 16 May 2022, 2022.
Abstract | Links | BibTeX | Tags: across, admire, brainteaser, eumaster4hpc, eupex, eupilot, textarossa
@misc{22:hipeacvision:fl,
title = {Cognitive continuum: a game theoretical approach},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/453HWfmrQyo7j9E},
year = {2022},
date = {2022-05-01},
address = {Brussels, Belgium},
abstract = {Cognitive continuum: a game theoretical approach, (maybe) data operations are too basic: read, write, copy, remove … The talk is aimed to contribute to the forthcoming HiPEAC Vision document},
howpublished = {HiPEAC Vision meeting, Brussels, 16 May 2022},
keywords = {across, admire, brainteaser, eumaster4hpc, eupex, eupilot, textarossa},
pubstate = {published},
tppubtype = {misc}
}
EPI (EC H2020 IA, EuroHPC-2020-02): The European Processor Initiative SGA2 (2022, 48 months, total cost 70M€, G.A. 101036168)
Publications
2024
Bruno Casella, Alessio Barbaro Chisari, Marco Aldinucci, Sebastiano Battiato, Mario Valerio Giuffrida
Federated Learning in a Semi-Supervised Environment for Earth Observation Data Proceedings Article
In: Proceedings of the 32nd European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning, ESANN, Bruges, Belgium, 2024.
Abstract | Links | BibTeX | Tags: ai, epi, icsc
@inproceedings{24:casella:fedrec,
title = {Federated Learning in a Semi-Supervised Environment for Earth Observation Data},
author = {Bruno Casella and Alessio Barbaro Chisari and Marco Aldinucci and Sebastiano Battiato and Mario Valerio Giuffrida},
url = {https://iris.unito.it/retrieve/a798d7b8-6b98-48c2-92f4-327d2aaa8788/ES2024-214.pdf},
doi = {10.14428/esann/2024.es2024-214},
year = {2024},
date = {2024-10-01},
booktitle = {Proceedings of the 32nd European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning, ESANN},
address = {Bruges, Belgium},
abstract = {We propose FedRec, a federated learning workflow taking advantage of unlabelled data in a semi-supervised environment to assist in the training of a supervised aggregated model. In our proposed method, an encoder architecture extracting features from unlabelled data is aggregated with the feature extractor of a classification model via weight averaging. The fully connected layers of the supervised models are also averaged in a federated fashion. We show the effectiveness of our approach by comparing it with the state-of-the-art federated algorithm, an isolated and a centralised baseline, on novel cloud detection datasets.},
keywords = {ai, epi, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Bruno Casella, Jakobs Matthias, Marco Aldinucci, Sebastian Buschjager
Federated Time Series Classification with ROCKET features Proceedings Article
In: Proceedings of the 32nd European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning, ESANN, Bruges, Belgium, 2024.
Abstract | Links | BibTeX | Tags: ai, epi, icsc
@inproceedings{24:casella:frocks,
title = {Federated Time Series Classification with ROCKET features},
author = {Bruno Casella and Jakobs Matthias and Marco Aldinucci and Sebastian Buschjager},
url = {https://iris.unito.it/retrieve/51b63fc1-3e22-4ad4-8926-84af69cde739/ES2024-61.pdf},
doi = {10.14428/esann/2024.es2024-61},
year = {2024},
date = {2024-10-01},
booktitle = {Proceedings of the 32nd European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning, ESANN},
address = {Bruges, Belgium},
abstract = {This paper proposes FROCKS, a federated time series classification method using ROCKET features. Our approach dynamically adapts the models’ features by selecting and exchanging the best-performing ROCKET kernels from a federation of clients. Specifically, the server gathers the best-performing kernels of the clients together with the associated model parameters, and it performs a weighted average if a kernel is best-performing for more than one client. We compare the proposed method with state-of-the-art approaches on the UCR archive binary classification datasets and show superior performance on most datasets.},
keywords = {ai, epi, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Chi Hong, Robert Birke, Pin-Yu Chen, Lydia Chen
On Dark Knowledge for Distilling Generators Proceedings Article
In: Yang, De-Nian, Xie, Xing, Tseng, Vincent S., Pei, Jian, Huang, Jen-Wei, Lin, Jerry Chun-Wei (Ed.): Proceedings of the 28th Pacific-Asia Conference on Knowledge Discovery and Data Mining, pp. 235–247, Springer, Taipei, Taiwan, 2024.
Abstract | Links | BibTeX | Tags: ai, epi, icsc
@inproceedings{24:chen:llm,
title = {On Dark Knowledge for Distilling Generators},
author = {Chi Hong and Robert Birke and Pin-Yu Chen and Lydia Chen},
editor = {De-Nian Yang and Xing Xie and Vincent S. Tseng and Jian Pei and Jen-Wei Huang and Jerry Chun-Wei Lin},
url = {https://hdl.handle.net/2318/1976671},
doi = {10.1007/978-981-97-2253-2_19},
year = {2024},
date = {2024-05-01},
booktitle = {Proceedings of the 28th Pacific-Asia Conference on Knowledge Discovery and Data Mining},
volume = {14646},
pages = {235–247},
publisher = {Springer},
address = {Taipei, Taiwan},
series = {Lecture Notes in Computer Science},
abstract = {Knowledge distillation has been applied on generative models, such as Variational Autoencoder (VAE) and Generative Adversarial Networks (GANs). To distill the knowledge, the synthetic outputs of a teacher generator are used to train a student model. While the dark knowledge, i.e., the probabilistic output, is well explored in distilling classifiers, little is known about the existence of an equivalent dark knowledge for generative models and its extractability. In this paper, we derive the first kind of empirical risk bound for distilling generative models from a Bayesian perspective. Through our analysis, we show the existence of the dark knowledge for generative models, i.e., Bayes probability distribution of a synthetic output from a given input, which achieves lower empirical risk bound than merely using the synthetic output of the generators. Furthermore, we propose a Dark Knowledge based Distillation , DKtill, which trains the student generator based on the (approximate) dark knowledge. Our extensive evaluation on distilling VAE, conditional GANs, and translation GANs on Facades and CelebA datasets show that the FID of student generators trained by DKtill combining dark knowledge are lower than student generators trained only by the synthetic outputs by up to 42.66%, and 78.99%, respectively.},
keywords = {ai, epi, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Bruno Casella, Iacopo Colonnelli, Gianluca Mittone, Robert Birke, Walter Riviera, Antonio Sciarappa, Carlo Cavazzoni, Marco Aldinucci
A Performance Analysis for Confidential Federated Learning Proceedings Article
In: Proceedings of the 2024 Deep Learning Security and Privacy Workshop, IEEE Symposium on Security and Privacy 2024, San Francisco, CA, 2024.
Abstract | Links | BibTeX | Tags: ai, confidential, epi, icsc
@inproceedings{24:casella:sgx,
title = {A Performance Analysis for Confidential Federated Learning},
author = {Bruno Casella and Iacopo Colonnelli and Gianluca Mittone and Robert Birke and Walter Riviera and Antonio Sciarappa and Carlo Cavazzoni and Marco Aldinucci},
url = {https://iris.unito.it/retrieve/b5877a97-2d8d-4e95-8791-0aa4a1b953b3/DLSP___CONFIDENTIAL_FL.pdf},
doi = {10.1109/SPW63631.2024.00009},
year = {2024},
date = {2024-05-01},
booktitle = {Proceedings of the 2024 Deep Learning Security and Privacy Workshop, IEEE Symposium on Security and Privacy 2024},
address = {San Francisco, CA},
abstract = {Federated Learning (FL) has emerged as a solution to preserve data privacy by keeping the data locally on each participant's device. However, FL alone is still vulnerable to attacks that can cause privacy leaks. Therefore, it becomes necessary to take additional security measures at the cost of increasing runtimes. The Trusted Execution Environment (TEE) approach promises to offer the highest degree of security during execution. However, TEEs suffer from memory limits which prevent safe end-to-end FL training of modern deep models. State-of- the-art approaches limit secure training to selected layers, failing to avert the full spectrum of attacks or adopt layer-wise training affecting model performance. We benchmark the usage of a library OS (LibOS) to run the full, unmodified end-to-end FL training inside the TEE. We extensively evaluate and model the overhead of the different security mechanisms needed to protect the data and model during computation (TEE), communication (TLS), and storage (disk encryption). The obtained results across three datasets and two models demonstrate that LibOSes are a viable way to seamlessly inject security into FL with limited overhead (at most 2x), offering valuable guidance for researchers and developers aiming to apply FL in data-security-focused contexts.},
keywords = {ai, confidential, epi, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Bruno Casella, Roberto Esposito, Antonio Sciarappa, Carlo Cavazzoni, Marco Aldinucci
Experimenting With Normalization Layers in Federated Learning on Non-IID Scenarios Journal Article
In: IEEE Access, vol. 12, pp. 47961-47971, 2024.
Links | BibTeX | Tags: epi, icsc
@article{24:casella:normalization,
title = {Experimenting With Normalization Layers in Federated Learning on Non-IID Scenarios},
author = {Bruno Casella and Roberto Esposito and Antonio Sciarappa and Carlo Cavazzoni and Marco Aldinucci},
doi = {10.1109/ACCESS.2024.3383783},
year = {2024},
date = {2024-01-01},
journal = {IEEE Access},
volume = {12},
pages = {47961-47971},
keywords = {epi, icsc},
pubstate = {published},
tppubtype = {article}
}
Bruno Casella, Walter Riviera, Marco Aldinucci, Gloria Menegaz
Protocol for training MERGE: A federated multi-input neural network for COVID-19 prognosis Journal Article
In: STAR Protocols, 2024, (https://prod-shared-star-protocols.s3.amazonaws.com/protocols/3225.pdf).
Abstract | Links | BibTeX | Tags: epi, icsc
@article{24:casella:starprotocol,
title = {Protocol for training MERGE: A federated multi-input neural network for COVID-19 prognosis},
author = {Bruno Casella and Walter Riviera and Marco Aldinucci and Gloria Menegaz},
url = {https://prod-shared-star-protocols.s3.amazonaws.com/protocols/3225.pdf},
doi = {10.1016/j.xpro.2023.102812},
year = {2024},
date = {2024-01-01},
journal = {STAR Protocols},
institution = {Computer Science Department, University of Torino},
abstract = {Federated learning is a cooperative learning approach that has emerged as an effective way to address privacy concerns. Here, we present a protocol for training MERGE: a federated multi-input neural network (NN) for COVID-19 prognosis. We describe steps for collecting and preprocessing datasets. We then detail the process of training a multi-input NN. This protocol can be adapted for use with datasets containing both image- and table-based input sources.},
note = {https://prod-shared-star-protocols.s3.amazonaws.com/protocols/3225.pdf},
keywords = {epi, icsc},
pubstate = {published},
tppubtype = {article}
}
2023
Bruno Casella, Roberto Esposito, Antonio Sciarappa, Carlo Cavazzoni, Marco Aldinucci
Experimenting with Normalization Layers in Federated Learning on non-IID scenarios Technical Report
Computer Science Department, University of Torino 2023.
Abstract | Links | BibTeX | Tags: confidential, epi, icsc
@techreport{23:casella:normalization,
title = {Experimenting with Normalization Layers in Federated Learning on non-IID scenarios},
author = {Bruno Casella and Roberto Esposito and Antonio Sciarappa and Carlo Cavazzoni and Marco Aldinucci},
url = {https://arxiv.org/pdf/2303.10630.pdf},
year = {2023},
date = {2023-01-01},
institution = {Computer Science Department, University of Torino},
abstract = {Training Deep Learning (DL) models require large, high-quality datasets, often assembled with data from different institutions. Federated Learning (FL) has been emerging as a method for privacy-preserving pooling of datasets employing collaborative training from different institutions by iteratively globally aggregating locally trained models. One critical performance challenge of FL is operating on datasets not independently and identically distributed (non-IID) among the federation participants. Even though this fragility cannot be eliminated, it can be debunked by a suitable optimization of two hyperparameters: layer normalization methods and collaboration frequency selection. In this work, we benchmark five different normalization layers for training Neural Networks (NNs), two families of non-IID data skew, and two datasets. Results show that Batch Normalization, widely employed for centralized DL, is not the best choice for FL, whereas Group and Layer Normalization consistently outperform Batch Normalization. Similarly, frequent model aggregation decreases convergence speed and mode quality.},
keywords = {confidential, epi, icsc},
pubstate = {published},
tppubtype = {techreport}
}
Bruno Casella, Lorenzo Paletto
Predicting Cryptocurrencies Market Phases through On-Chain Data Long-Term Forecasting Proceedings Article
In: Proceedings of the 2023 IEEE International Conference on Blockchain and Cryptocurrency (ICBC), 1-5 May 2023, Dubai, 2023, (https://ieeexplore.ieee.org/document/10174989).
Abstract | Links | BibTeX | Tags: epi, icsc
@inproceedings{23:casella:onchain,
title = {Predicting Cryptocurrencies Market Phases through On-Chain Data Long-Term Forecasting},
author = {Bruno Casella and Lorenzo Paletto},
url = {https://iris.unito.it/bitstream/2318/1902652/1/6.%20ICBC23%20-%20PREDICTING%20BTC.pdf},
doi = {https://doi.org/10.1109/ICBC56567.2023.10174989},
year = {2023},
date = {2023-01-01},
booktitle = {Proceedings of the 2023 IEEE International Conference on Blockchain and Cryptocurrency (ICBC), 1-5 May 2023, Dubai},
abstract = {Blockchain, the underlying technology of Bitcoin and several other cryptocurrencies, like Ethereum, produces a massive amount of open-access data that can be analyzed, providing important information about the network's activity and its respective token. The on-chain data have extensively been used as input to Machine Learning algorithms for predicting cryptocurrencies' future prices; however, there is a lack of study in predicting the future behaviour of on-chain data. This study aims to show how on-chain data can be used to detect cryptocurrency market regimes, like minimum and maximum, bear and bull market phases, and how forecasting these data can provide an optimal asset allocation for long-term investors.},
note = {https://ieeexplore.ieee.org/document/10174989},
keywords = {epi, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Bruno Casella, Samuele Fonio
Architecture-Based FedAvg for Vertical Federated Learning Proceedings Article
In: Proceedings of the 3rd Workshop on Distributed Machine Learning for the Intelligent Computing Continuum (DML-ICC), IEEE/ACM UCC 2023, Taormina, Italy, 4 December 2023, 2023, (https://iris.unito.it/bitstream/2318/1949730/1/HALF_HVL_for_DML_ICC23___Taormina-2.pdf).
Abstract | Links | BibTeX | Tags: ai, epi, icsc
@inproceedings{23:casella:architecturalfedavg,
title = {Architecture-Based FedAvg for Vertical Federated Learning},
author = {Bruno Casella and Samuele Fonio},
url = {https://iris.unito.it/retrieve/173d9960-8531-419d-9bd5-5acce6694c4e/Aggregation%20Based%20VFL.pdf},
doi = {10.1145/3603166.3632559},
year = {2023},
date = {2023-01-01},
booktitle = {Proceedings of the 3rd Workshop on Distributed Machine Learning for the Intelligent Computing Continuum (DML-ICC), IEEE/ACM UCC 2023, Taormina, Italy, 4 December 2023},
abstract = {Federated Learning (FL) has emerged as a promising solution to address privacy concerns by collaboratively training Deep Learning (DL) models across distributed parties. This work proposes an architecture-based aggregation strategy in Vertical FL, where parties hold data with different attributes but shared instances. Our approach leverages the identical architectural parts, i.e. neural network layers, of different models to selectively aggregate weights, which is particularly relevant when collaborating with institutions holding different types of datasets, i.e., image, text, or tabular datasets. In a scenario where two entities train DL models, such as a Convolutional Neural Network (CNN) and a Multi-Layer Perceptron (MLP), our strategy computes the average only for architecturally identical segments. This preserves data-specific features learned from demographic and clinical data. We tested our approach on two clinical datasets, i.e., the COVID-CXR dataset and the ADNI study. Results show that our method achieves comparable results with the centralized scenario, in which all the data are collected in a single data lake, and benefits from FL generalizability. In particular, compared to the non-federated models, our proposed proof-of-concept model exhibits a slight performance loss on the COVID-CXR dataset (less than 8%), but outperforms ADNI models by up to 12%. Moreover, communication costs between training rounds are minimized by exchanging only the dense layer parameters.},
note = {https://iris.unito.it/bitstream/2318/1949730/1/HALF_HVL_for_DML_ICC23___Taormina-2.pdf},
keywords = {ai, epi, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Bruno Casella, Walter Riviera, Marco Aldinucci, Gloria Menegaz
MERGE: A model for multi-input biomedical federated learning Journal Article
In: Patterns, pp. 100856, 2023, ISSN: 2666-3899.
Abstract | Links | BibTeX | Tags: ai, epi, icsc
@article{23:fl:patterns,
title = {MERGE: A model for multi-input biomedical federated learning},
author = {Bruno Casella and Walter Riviera and Marco Aldinucci and Gloria Menegaz},
url = {https://www.sciencedirect.com/science/article/pii/S2666389923002404},
doi = {10.1016/j.patter.2023.100856},
issn = {2666-3899},
year = {2023},
date = {2023-01-01},
journal = {Patterns},
pages = {100856},
abstract = {Driven by the deep learning (DL) revolution, artificial intelligence (AI) has become a fundamental tool for many biomedical tasks, including analyzing and classifying diagnostic images. Imaging, however, is not the only source of information. Tabular data, such as personal and genomic data and blood test results, are routinely collected but rarely considered in DL pipelines. Nevertheless, DL requires large datasets that often must be pooled from different institutions, raising non-trivial privacy concerns. Federated learning (FL) is a cooperative learning paradigm that aims to address these issues by moving models instead of data across different institutions. Here, we present a federated multi-input architecture using images and tabular data as a methodology to enhance model performance while preserving data privacy. We evaluated it on two showcases: the prognosis of COVID-19 and patients' stratification in Alzheimer's disease, providing evidence of enhanced accuracy and F1 scores against single-input models and improved generalizability against non-federated models.},
keywords = {ai, epi, icsc},
pubstate = {published},
tppubtype = {article}
}
Talks
2024
Samuele Fonio Bruno Casella Oussama Harrak
Federated Adaboost for Survival Analysis Miscellaneous
European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases, 2nd Workshop on Advancements in Federated Learning (WAFL), 2024.
Abstract | Links | BibTeX | Tags: ai, epi, fl, icsc
@misc{23:ecmlpkdd:fedsurvboost,
title = {Federated Adaboost for Survival Analysis},
author = {Samuele Fonio Bruno Casella Oussama Harrak},
url = {https://datacloud.di.unito.it/index.php/s/DtXiQfne6BEC235},
year = {2024},
date = {2024-09-01},
address = {Vilnius, Lithuania},
abstract = {This work proposes FedSurvBoost, a federated learning pipeline for survival analysis based on the AdaBoost.F algorithm, which iteratively aggregates the best local weak hypotheses. Our method extends AdaBoost.F by removing the dependence on the number of classes coefficient from the computation of the weights of the best model. This makes it suitable for regression tasks, such as survival analysis. We show the effectiveness of our approach by comparing it with state-of-the-art methods, specifically developed for survival analysis problems, on two common survival datasets.},
howpublished = {European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases, 2nd Workshop on Advancements in Federated Learning (WAFL)},
keywords = {ai, epi, fl, icsc},
pubstate = {published},
tppubtype = {misc}
}
2023
Marco Aldinucci
Experimenting with Systems for Decentralized Machine Learning Miscellaneous
NVidia GTC 2023, 2023.
Abstract | Links | BibTeX | Tags: across, admire, epi, eumaster4hpc, eupex, eupilot, hpc4ai, space, textarossa
@misc{23:gtc:fl,
title = {Experimenting with Systems for Decentralized Machine Learning},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/oyLt7xwkbKxz65c},
year = {2023},
date = {2023-03-01},
abstract = {Decentralized machine learning (DML) enables collaborative machine learning without centralized input data. Federated learning (FL) and edge inference (EI) are examples of DML. Collaboration naturally happens at the edge of a distributed system with inherently distributed data. While tools for DML are starting to flourish, much needs to be done to get more flexible and portable tools to experiment with novel techniques, non-fully connected topologies, multiple data domains, and asynchronous collaboration schemes. We'll present recent advances in DML, aiming to improve usability in data centers and, at the edge, to widen the class of models extending FL to non-DNN paradigms, to improve the accuracy of models controlling normalization and frequency of communications, and to boost data privacy though generative adversarial networks. Prerequisites: Intermediate understanding of machine learning methods and distributed & parallel computing.},
howpublished = {NVidia GTC 2023},
keywords = {across, admire, epi, eumaster4hpc, eupex, eupilot, hpc4ai, space, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
HPC4AI: The Research on AI beyond the public cloud Miscellaneous
CENTAI kick-off meeting, 2023.
Links | BibTeX | Tags: across, admire, brainteaser, epi, eumaster4hpc, eupex, eupilot, hpc4ai, space, textarossa
@misc{23:CENTAI:hpc4ai,
title = {HPC4AI: The Research on AI beyond the public cloud},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/PZXjPm8sfKTmTGb},
year = {2023},
date = {2023-03-01},
address = {Torino, Italy},
howpublished = {CENTAI kick-off meeting},
keywords = {across, admire, brainteaser, epi, eumaster4hpc, eupex, eupilot, hpc4ai, space, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Bruno Casella, Samuele Fonio
Architecture-Based FedAvg for Vertical Federated Learning Miscellaneous
2023.
Abstract | Links | BibTeX | Tags: ai, epi, fl, icsc
@misc{23:casella:architecturalfedavgtalk,
title = {Architecture-Based FedAvg for Vertical Federated Learning},
author = {Bruno Casella and Samuele Fonio},
url = {https://datacloud.di.unito.it/index.php/s/kJQxnqG4d2ZSicK},
year = {2023},
date = {2023-01-01},
booktitle = {Proceedings of the 3rd Workshop on Distributed Machine Learning for the Intelligent Computing Continuum (DML-ICC), IEEE/ACM UCC 2023, Taormina, Italy, 4 December 2023},
abstract = {Federated Learning (FL) has emerged as a promising solution to address privacy concerns by collaboratively training Deep Learning (DL) models across distributed parties. This work proposes an architecture-based aggregation strategy in Vertical FL, where parties hold data with different attributes but shared instances. Our approach leverages the identical architectural parts, i.e. neural network layers, of different models to selectively aggregate weights, which is particularly relevant when collaborating with institutions holding different types of datasets, i.e., image, text, or tabular datasets. In a scenario where two entities train DL models, such as a Convolutional Neural Network (CNN) and a Multi-Layer Perceptron (MLP), our strategy computes the average only for architecturally identical segments. This preserves data-specific features learned from demographic and clinical data. We tested our approach on two clinical datasets, i.e., the COVID-CXR dataset and the ADNI study. Results show that our method achieves comparable results with the centralized scenario, in which all the data are collected in a single data lake, and benefits from FL generalizability. In particular, compared to the non-federated models, our proposed proof-of-concept model exhibits a slight performance loss on the COVID-CXR dataset (less than 8%), but outperforms ADNI models by up to 12%. Moreover, communication costs between training rounds are minimized by exchanging only the dense layer parameters.},
keywords = {ai, epi, fl, icsc},
pubstate = {published},
tppubtype = {misc}
}
ACROSS (EC H2020 IA, EuroHPC-01-2019): HPC Big Data Artificial Intelligence cross-stack platform toward exascale (2021, 36 months, total cost 8M€, G.A. n. 955648)
Publications
2024
Simone Leo, Michael R. Crusoe, Laura Rodríguez-Navas, Raül Sirvent, Alexander Kanitz, Paul De Geest, Rudolf Wittner, Luca Pireddu, Daniel Garijo, José M. Fernández, Iacopo Colonnelli, Matej Gallo, Tazro Ohta, Hirotaka Suetake, Salvador Capella-Gutierrez, Renske Wit, Bruno P. Kinoshita, Stian Soiland-Reyes
Recording provenance of workflow runs with RO-Crate Journal Article
In: PLoS ONE, vol. 19, no. 9, pp. 1–35, 2024.
Abstract | Links | BibTeX | Tags: across, eupex, icsc, streamflow
@article{24:pone:wfrunrocrate,
title = {Recording provenance of workflow runs with RO-Crate},
author = {Simone Leo and Michael R. Crusoe and Laura Rodríguez-Navas and Raül Sirvent and Alexander Kanitz and Paul De Geest and Rudolf Wittner and Luca Pireddu and Daniel Garijo and José M. Fernández and Iacopo Colonnelli and Matej Gallo and Tazro Ohta and Hirotaka Suetake and Salvador Capella-Gutierrez and Renske Wit and Bruno P. Kinoshita and Stian Soiland-Reyes},
url = {https://iris.unito.it/retrieve/d261a069-1afb-4384-88e8-97d62b183b55/journal.pone.0309210.pdf},
doi = {10.1371/journal.pone.0309210},
year = {2024},
date = {2024-09-01},
journal = {PLoS ONE},
volume = {19},
number = {9},
pages = {1–35},
publisher = {Public Library of Science},
abstract = {Recording the provenance of scientific computation results is key to the support of traceability, reproducibility and quality assessment of data products. Several data models have been explored to address this need, providing representations of workflow plans and their executions as well as means of packaging the resulting information for archiving and sharing. However, existing approaches tend to lack interoperable adoption across workflow management systems. In this work we present Workflow Run RO-Crate, an extension of RO-Crate (Research Object Crate) and Schema.org to capture the provenance of the execution of computational workflows at different levels of granularity and bundle together all their associated objects (inputs, outputs, code, etc.). The model is supported by a diverse, open community that runs regular meetings, discussing development, maintenance and adoption aspects. Workflow Run RO-Crate is already implemented by several workflow management systems, allowing interoperable comparisons between workflow runs from heterogeneous systems. We describe the model, its alignment to standards such as W3C PROV, and its implementation in six workflow systems. Finally, we illustrate the application of Workflow Run RO-Crate in two use cases of machine learning in the digital image analysis domain.},
keywords = {across, eupex, icsc, streamflow},
pubstate = {published},
tppubtype = {article}
}
2023
Iacopo Colonnelli
Workflow Models for Heterogeneous Distributed Systems Proceedings Article
In: Bena, Nicola, Martino, Beniamino Di, Maratea, Antonio, Sperduti, Alessandro, Nardo, Emanuel Di, Ciaramella, Angelo, Montella, Raffaele, Ardagna, Claudio A. (Ed.): Proceedings of the 2nd Italian Conference on Big Data and Data Science (ITADATA 2023), Naples, Italy, September 11-13, 2023, CEUR-WS.org, 2023.
Abstract | Links | BibTeX | Tags: across, eupex, icsc, jupyter-workflow, streamflow
@inproceedings{23:colonnelli:itadata,
title = {Workflow Models for Heterogeneous Distributed Systems},
author = {Iacopo Colonnelli},
editor = {Nicola Bena and Beniamino Di Martino and Antonio Maratea and Alessandro Sperduti and Emanuel Di Nardo and Angelo Ciaramella and Raffaele Montella and Claudio A. Ardagna},
url = {https://ceur-ws.org/Vol-3606/invited77.pdf},
year = {2023},
date = {2023-01-01},
booktitle = {Proceedings of the 2nd Italian Conference on Big Data and Data Science (ITADATA 2023), Naples, Italy, September 11-13, 2023},
volume = {3606},
publisher = {CEUR-WS.org},
series = {CEUR Workshop Proceedings},
abstract = {This article introduces a novel hybrid workflow abstraction that injects topology awareness directly into the definition of a distributed workflow model. In particular, the article briefly discusses the advantages brought by this approach to the design and orchestration of large-scale data-oriented workflows, the current level of support from state-of-the-art workflow systems, and some future research directions.},
keywords = {across, eupex, icsc, jupyter-workflow, streamflow},
pubstate = {published},
tppubtype = {inproceedings}
}
Alberto Mulone, Sherine Awad, Davide Chiarugi, Marco Aldinucci
Porting the Variant Calling Pipeline for NGS data in cloud-HPC environment Proceedings Article
In: Shahriar, Hossain, Teranishi, Yuuichi, Cuzzocrea, Alfredo, Sharmin, Moushumi, Towey, Dave, Majumder, A. K. M. Jahangir Alam, Kashiwazaki, Hiroki, Yang, Ji-Jiang, Takemoto, Michiharu, Sakib, Nazmus, Banno, Ryohei, Ahamed, Sheikh Iqbal (Ed.): 47th IEEE Annual Computers, Software, and Applications Conference, COMPSAC 2023, pp. 1858–1863, IEEE, Torino, Italy, 2023.
Abstract | Links | BibTeX | Tags: across, icsc, streamflow
@inproceedings{23:mulone:wide:vcp,
title = {Porting the Variant Calling Pipeline for NGS data in cloud-HPC environment},
author = {Alberto Mulone and Sherine Awad and Davide Chiarugi and Marco Aldinucci},
editor = {Hossain Shahriar and Yuuichi Teranishi and Alfredo Cuzzocrea and Moushumi Sharmin and Dave Towey and A. K. M. Jahangir Alam Majumder and Hiroki Kashiwazaki and Ji-Jiang Yang and Michiharu Takemoto and Nazmus Sakib and Ryohei Banno and Sheikh Iqbal Ahamed},
url = {https://iris.unito.it/bitstream/2318/1919364/1/paper.pdf},
doi = {10.1109/COMPSAC57700.2023.00288},
year = {2023},
date = {2023-01-01},
booktitle = {47th IEEE Annual Computers, Software, and Applications Conference, COMPSAC 2023},
pages = {1858–1863},
publisher = {IEEE},
address = {Torino, Italy},
abstract = {In recent years we have understood the importance of analyzing and sequencing human genetic variation. A relevant aspect that emerged from the Covid-19 pandemic was the need to obtain results very quickly; this involved using High-Performance Computing (HPC) environments to execute the Next Generation Sequencing (NGS) pipeline. However, HPC is not always the most suitable environment for the entire execution of a pipeline, especially when it involves many heterogeneous tools. The ability to execute parts of the pipeline on different environments can lead to higher performance but also cheaper executions. This work shows the design and optimization process that led us to a state-of-the-art Variant Calling hybrid workflow based on the StreamFlow Workflow Management System (WfMS). We also compare StreamFlow with Snakemake, an established WfMS targeting HPC facilities, observing comparable performance on single environments and satisfactory improvements with a hybrid cloud-HPC configuration.},
howpublished = {47th IEEE Annual Computers, Software, and Applications Conference, COMPSAC 2023},
keywords = {across, icsc, streamflow},
pubstate = {published},
tppubtype = {inproceedings}
}
Iacopo Colonnelli, Bruno Casella, Gianluca Mittone, Yasir Arfat, Barbara Cantalupo, Roberto Esposito, Alberto Riccardo Martinelli, Doriana Medić, Marco Aldinucci
Federated Learning meets HPC and cloud Proceedings Article
In: Bufano, Filomena, Riggi, Simone, Sciacca, Eva, Schillirò, Francesco (Ed.): Astrophysics and Space Science Proceedings, pp. 193–199, Springer, Catania, Italy, 2023, ISBN: 978-3-031-34167-0, (Keynote talk).
Abstract | Links | BibTeX | Tags: across, eupilot, streamflow
@inproceedings{22:ml4astro,
title = {Federated Learning meets HPC and cloud},
author = {Iacopo Colonnelli and Bruno Casella and Gianluca Mittone and Yasir Arfat and Barbara Cantalupo and Roberto Esposito and Alberto Riccardo Martinelli and Doriana Medić and Marco Aldinucci},
editor = {Filomena Bufano and Simone Riggi and Eva Sciacca and Francesco Schillirò},
url = {https://iris.unito.it/retrieve/3ac66baa-9d9a-4e9f-94a5-13700694d8aa/ML4Astro.pdf},
doi = {10.1007/978-3-031-34167-0_39},
isbn = {978-3-031-34167-0},
year = {2023},
date = {2023-01-01},
booktitle = {Astrophysics and Space Science Proceedings},
volume = {60},
pages = {193–199},
publisher = {Springer},
address = {Catania, Italy},
abstract = {HPC and AI are fated to meet for several reasons. This article will discuss some of them and argue why this will happen through the set of methods and technologies that underpin cloud computing. As a paradigmatic example, we present a new federated learning system that collaboratively trains a deep learning model in different supercomputing centers. The system is based on the StreamFlow workflow manager designed for hybrid cloud-HPC infrastructures.},
howpublished = {Machine Learning for Astrophysics (ML4ASTRO)},
note = {Keynote talk},
keywords = {across, eupilot, streamflow},
pubstate = {published},
tppubtype = {inproceedings}
}
2022
Iacopo Colonnelli, Marco Aldinucci
Hybrid Workflows For Large - Scale Scientific Applications Proceedings Article
In: Sixth EAGE High Performance Computing Workshop, pp. 1–5, European Association of Geoscientists & Engineers , Milano, Italy, 2022, ISSN: 2214-4609.
Abstract | Links | BibTeX | Tags: across, eupex
@inproceedings{22:eage-hpc-workshop,
title = {Hybrid Workflows For Large - Scale Scientific Applications},
author = {Iacopo Colonnelli and Marco Aldinucci},
url = {https://iris.unito.it/retrieve/d79ddabb-f9d7-4a55-9f84-1528b1533ba3/Extended_Abstract.pdf},
doi = {10.3997/2214-4609.2022615029},
issn = {2214-4609},
year = {2022},
date = {2022-09-01},
booktitle = {Sixth EAGE High Performance Computing Workshop},
pages = {1–5},
publisher = {European Association of Geoscientists & Engineers },
address = {Milano, Italy},
abstract = {Large-scale scientific applications are facing an irrevrsible transition from monolithic, high-performance oriented codes to modular and polyglot deployments of specialised (micro-)services. The reasons behind this transition are many: coupling of standard solvers with Deep Learning techniques, offloading of data analysis and visualisation to Cloud, and the advent of specialised hardware accelerators. Topology-aware Workflow Management Systems (WMSs) play a crucial role. In particular, topology-awareness allows an explicit mapping of workflow steps onto heterogeneous locations, allowing automated executions on top of hybrid architectures (e.g., cloud+HPC or classical+quantum). Plus, topology-aware WMSs can offer nonfunctional requirements OOTB, e.g. components' life-cycle orchestration, secure and efficient data transfers, fault tolerance, and cross-cluster execution of urgent workloads. Augmenting interactive Jupyter Notebooks with distributed workflow capabilities allows domain experts to prototype and scale applications using the same technological stack, while relying on a feature-rich and user-friendly web interface. This abstract will showcase how these general methodologies can be applied to a typical geoscience simulation pipeline based on the Full Wavefront Inversion (FWI) technique. In particular, a prototypical Jupyter Notebook will be executed interactively on Cloud. Preliminary data analyses and post-processing will be executed locally, while the computationally demanding optimisation loop will be scheduled on a remote HPC cluster.},
keywords = {across, eupex},
pubstate = {published},
tppubtype = {inproceedings}
}
Iacopo Colonnelli, Marco Aldinucci, Barbara Cantalupo, Luca Padovani, Sergio Rabellino, Concetto Spampinato, Roberto Morelli, Rosario Di Carlo, Nicolò Magini, Carlo Cavazzoni
Distributed workflows with Jupyter Journal Article
In: Future Generation Computer Systems, vol. 128, pp. 282–298, 2022, ISSN: 0167-739X.
Abstract | Links | BibTeX | Tags: across, deephealth, jupyter-workflow, streamflow
@article{21:FGCS:jupyflow,
title = {Distributed workflows with Jupyter},
author = {Iacopo Colonnelli and Marco Aldinucci and Barbara Cantalupo and Luca Padovani and Sergio Rabellino and Concetto Spampinato and Roberto Morelli and Rosario Di Carlo and Nicolò Magini and Carlo Cavazzoni},
url = {https://www.sciencedirect.com/science/article/pii/S0167739X21003976},
doi = {10.1016/j.future.2021.10.007},
issn = {0167-739X},
year = {2022},
date = {2022-01-01},
journal = {Future Generation Computer Systems},
volume = {128},
pages = {282–298},
abstract = {The designers of a new coordination interface enacting complex workflows have to tackle a dichotomy: choosing a language-independent or language-dependent approach. Language-independent approaches decouple workflow models from the host code's business logic and advocate portability. Language-dependent approaches foster flexibility and performance by adopting the same host language for business and coordination code. Jupyter Notebooks, with their capability to describe both imperative and declarative code in a unique format, allow taking the best of the two approaches, maintaining a clear separation between application and coordination layers but still providing a unified interface to both aspects. We advocate the Jupyter Notebooks' potential to express complex distributed workflows, identifying the general requirements for a Jupyter-based Workflow Management System (WMS) and introducing a proof-of-concept portable implementation working on hybrid Cloud-HPC infrastructures. As a byproduct, we extended the vanilla IPython kernel with workflow-based parallel and distributed execution capabilities. The proposed Jupyter-workflow (Jw) system is evaluated on common scenarios for High Performance Computing (HPC) and Cloud, showing its potential in lowering the barriers between prototypical Notebooks and production-ready implementations.},
keywords = {across, deephealth, jupyter-workflow, streamflow},
pubstate = {published},
tppubtype = {article}
}
Talks
2024
Marco Edoardo Santimaria, Iacopo Colonnelli, Marco Aldinucci
Releasing the CAPIO middleware from MPI derived constraints Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: across, admire, capio, capiocl, eupex, icsc
@misc{24:santimaria:bighpc,
title = {Releasing the CAPIO middleware from MPI derived constraints},
author = {Marco Edoardo Santimaria and Iacopo Colonnelli and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/zrJGD4i36fWdp5g},
year = {2024},
date = {2024-09-01},
address = {Pisa, Italy},
abstract = {CAPIO is a middleware that transparently injects streaming capabilities into file-based workflows. However, its implementation is limited to HPC environments based on the MPI framework, significantly limiting its applications. This paper will illustrate a proposed architecture and some preliminary results aimed at investigating the usage of a distributed files system as a communication media for the CAPIO middleware, with the ultimate goal of supporting both CLOUD-based and HPC-based workflows.},
keywords = {across, admire, capio, capiocl, eupex, icsc},
pubstate = {published},
tppubtype = {misc}
}
Marco Edoardo Santimaria, Iacopo Colonnelli, Massimo Torquati, Marco Aldinucci
CAPIO: Cross Application Programamble IO Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: across, admire, capio, capiocl, eupex, icsc
@misc{24:santimaria:itadata:shpcpee,
title = {CAPIO: Cross Application Programamble IO},
author = {Marco Edoardo Santimaria and Iacopo Colonnelli and Massimo Torquati and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/rg6LWwrZXi6tTXm},
year = {2024},
date = {2024-09-01},
address = {Pisa, Italy},
abstract = {With the increasing amount of digital data available for analysis and simulation, the class of I/O-intensive HPC workflows is fated to expand, further exacerbating quickly the performance gap between computing, memory, and storage technologies. CAPIO (Cross-Application Programmable I/O), is a middleware capable of injecting I/O streaming capabilities into file-based workflows, improving the computation-I/O overlap without the need to change the application code. In this presentation, we will introduce the CAPIO-CL language with its semantics, as well as the implementation of the CAPIO-CL language through the CAPIO middleware. We will also provide some case studies of how CAPIO has been employed to improve workflow execution time as well as some future directions.},
keywords = {across, admire, capio, capiocl, eupex, icsc},
pubstate = {published},
tppubtype = {misc}
}
Marco Edoardo Santimaria
CAPIO-CL: Cross Application Programmable IO - Coordination Language Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: across, admire, capio, eupex, icsc
@misc{24:santimaria:hlpp:capiocl,
title = {CAPIO-CL: Cross Application Programmable IO - Coordination Language},
author = {Marco Edoardo Santimaria},
url = {https://datacloud.di.unito.it/index.php/s/zsKY3PWzX5NFCiX},
year = {2024},
date = {2024-07-01},
address = {Pisa, Italy},
abstract = {The performance bottleneck in file-based workflows remains a pressing issue in the realm of I/O-based workflows. To address this challenge, a novel annotation language has been developed. CAPIO-CL is positioned as an innovative I/O coordination language, enabling users to annotate data dependencies within file-based workflows with synchronization semantics pertinent to the involved files and directories. Through the information provided by the language, optimization opportunities arise in streaming and preemptive data movement. This paper serves to illustrate the semantics and syntax enabling CAPIO-CL to enhance the performance of in situ workflows without necessitating the rewriting or modification of the original workflow application steps. Finally, an analysis of CAPIO-CL is provided, taking into consideration both language expressiveness and application performance enhancement.},
keywords = {across, admire, capio, eupex, icsc},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli
CWL in the HPC Ecosystem Miscellaneous
Workshop on workflow languages for HEP analysis, 2024.
Links | BibTeX | Tags: across, eupex, icsc, space, streamflow
@misc{24:icolonne:cwl4hpccern,
title = {CWL in the HPC Ecosystem},
author = {Iacopo Colonnelli},
url = {https://datacloud.di.unito.it/index.php/s/PRmqdwWHt6P2PH7},
year = {2024},
date = {2024-04-01},
address = {CERN, Meyrin, Switzerland},
howpublished = {Workshop on workflow languages for HEP analysis},
keywords = {across, eupex, icsc, space, streamflow},
pubstate = {published},
tppubtype = {misc}
}
2023
Iacopo Colonnelli, Robert Birke, Giulio Malenza, Gianluca Mittone, Alberto Mulone, Marco Aldinucci, Valerio Basile, Marco Antonio Stranisci, Viviana Patti, Jeroen Galjaard, Lydia Y. Chen, Sanzio Bassini, Massimiliano Guarrasi, Gabriella Scipione, Jan Martinovič, Vit Vondrák
Cross-Facility Federated Learning Miscellaneous
1st EuroHPC User Day, 2023.
Links | BibTeX | Tags: across, ai, eupex, eupilot, HPC
@misc{23:eurohpc,
title = {Cross-Facility Federated Learning},
author = {Iacopo Colonnelli and Robert Birke and Giulio Malenza and Gianluca Mittone and Alberto Mulone and Marco Aldinucci and Valerio Basile and Marco Antonio Stranisci and Viviana Patti and Jeroen Galjaard and Lydia Y. Chen and Sanzio Bassini and Massimiliano Guarrasi and Gabriella Scipione and Jan Martinovič and Vit Vondrák},
url = {https://datacloud.di.unito.it/index.php/s/DDAz4QkJP3WZ68M},
year = {2023},
date = {2023-12-01},
address = {Bruxelles, Belgium},
howpublished = {1st EuroHPC User Day},
keywords = {across, ai, eupex, eupilot, HPC},
pubstate = {published},
tppubtype = {misc}
}
Alberto Scionti, Iacopo Colonnelli
Orchestrating Multi-Domain Workflows: The ACROSS Approach Miscellaneous
Workflows Community: Modern Workflows for Continuum and Cross-Facility Computing, 2023.
Links | BibTeX | Tags: across, streamflow
@misc{23:sc:WCIBoF,
title = {Orchestrating Multi-Domain Workflows: The ACROSS Approach},
author = {Alberto Scionti and Iacopo Colonnelli},
url = {https://datacloud.di.unito.it/index.php/s/rJXcDBK4mLmS8yz},
year = {2023},
date = {2023-11-01},
address = {Denver, CO, Usa},
howpublished = {Workflows Community: Modern Workflows for Continuum and Cross-Facility Computing},
keywords = {across, streamflow},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli
ACROSS: HPC Big Data Artificial Intelligence Cross Stack Platform Towards Exascale Miscellaneous
LN HPC-KTT Assemblea Nazionale 2023, 2023.
Links | BibTeX | Tags: across, streamflow
@misc{23:AssembleaHPC-KTT,
title = {ACROSS: HPC Big Data Artificial Intelligence Cross Stack Platform Towards Exascale},
author = {Iacopo Colonnelli},
url = {https://datacloud.di.unito.it/index.php/s/aK7es8BgFeWorjD},
year = {2023},
date = {2023-10-01},
address = {Pisa, Italy},
howpublished = {LN HPC-KTT Assemblea Nazionale 2023},
keywords = {across, streamflow},
pubstate = {published},
tppubtype = {misc}
}
Alberto Mulone, Sherine Awad, Davide Chiarugi, Marco Aldinucci
Porting the Variant Calling Pipeline for NGS data in cloud-HPC environment Miscellaneous
47th IEEE Annual Computers, Software, and Applications Conference, COMPSAC 2023, 2023.
Abstract | Links | BibTeX | Tags: across, icsc, streamflow
@misc{23:mulone:wide:talk,
title = {Porting the Variant Calling Pipeline for NGS data in cloud-HPC environment},
author = {Alberto Mulone and Sherine Awad and Davide Chiarugi and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/zNLj3LCZNsNxHwy},
year = {2023},
date = {2023-06-01},
address = {Torino, Italy},
abstract = {In recent years we have understood the importance of analyzing and sequencing human genetic variation. A relevant aspect that emerged from the Covid-19 pandemic was the need to obtain results very quickly; this involved using High-Performance Computing (HPC) environments to execute the Next Generation Sequencing (NGS) pipeline. However, HPC is not always the most suitable environment for the entire execution of a pipeline, especially when it involves many heterogeneous tools. The ability to execute parts of the pipeline on different environments can lead to higher performance but also cheaper executions. This work shows the design and optimization process that led us to a state-of-the-art Variant Calling hybrid workflow based on the StreamFlow Workflow Management System (WfMS). We also compare StreamFlow with Snakemake, an established WfMS targeting HPC facilities, observing comparable performance on single environments and satisfactory improvements with a hybrid cloud-HPC configuration.},
howpublished = {47th IEEE Annual Computers, Software, and Applications Conference, COMPSAC 2023},
keywords = {across, icsc, streamflow},
pubstate = {published},
tppubtype = {misc}
}
Sofia Karvounari, Eleni Mathioulaki, Michael R. Crusoe, Iacopo Colonnelli
Standardised Workflows at EBRAINS Miscellaneous
Human Brain Project Summit 2023, 2023, (Invited talk).
Abstract | Links | BibTeX | Tags: across, eupex, space, streamflow
@misc{23:HBPSummit,
title = {Standardised Workflows at EBRAINS},
author = {Sofia Karvounari and Eleni Mathioulaki and Michael R. Crusoe and Iacopo Colonnelli},
url = {https://datacloud.di.unito.it/index.php/s/K5YQKTsX9N7NLT8},
year = {2023},
date = {2023-03-01},
address = {Marseille, France},
abstract = {A hands-on training offer for Standardised Workflows in EBRAINS. A short presentation will be used as an introduction, while the main hands-on session will provide information about Writing and Executing Standardised Workflows. TC will give some guidelines, so attendees can experiment with writing CWL tools and workflows and then they will be given access to VM to execute these workflows. The Workflows Dashboard will be also presented during the same session, offering to the attendees the opportunity to understand the different functionalities, use it with TC support and provide useful comments.},
howpublished = {Human Brain Project Summit 2023},
note = {Invited talk},
keywords = {across, eupex, space, streamflow},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
Experimenting with Systems for Decentralized Machine Learning Miscellaneous
NVidia GTC 2023, 2023.
Abstract | Links | BibTeX | Tags: across, admire, epi, eumaster4hpc, eupex, eupilot, hpc4ai, space, textarossa
@misc{23:gtc:fl,
title = {Experimenting with Systems for Decentralized Machine Learning},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/oyLt7xwkbKxz65c},
year = {2023},
date = {2023-03-01},
abstract = {Decentralized machine learning (DML) enables collaborative machine learning without centralized input data. Federated learning (FL) and edge inference (EI) are examples of DML. Collaboration naturally happens at the edge of a distributed system with inherently distributed data. While tools for DML are starting to flourish, much needs to be done to get more flexible and portable tools to experiment with novel techniques, non-fully connected topologies, multiple data domains, and asynchronous collaboration schemes. We'll present recent advances in DML, aiming to improve usability in data centers and, at the edge, to widen the class of models extending FL to non-DNN paradigms, to improve the accuracy of models controlling normalization and frequency of communications, and to boost data privacy though generative adversarial networks. Prerequisites: Intermediate understanding of machine learning methods and distributed & parallel computing.},
howpublished = {NVidia GTC 2023},
keywords = {across, admire, epi, eumaster4hpc, eupex, eupilot, hpc4ai, space, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
HPC4AI: The Research on AI beyond the public cloud Miscellaneous
CENTAI kick-off meeting, 2023.
Links | BibTeX | Tags: across, admire, brainteaser, epi, eumaster4hpc, eupex, eupilot, hpc4ai, space, textarossa
@misc{23:CENTAI:hpc4ai,
title = {HPC4AI: The Research on AI beyond the public cloud},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/PZXjPm8sfKTmTGb},
year = {2023},
date = {2023-03-01},
address = {Torino, Italy},
howpublished = {CENTAI kick-off meeting},
keywords = {across, admire, brainteaser, epi, eumaster4hpc, eupex, eupilot, hpc4ai, space, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli
CWL for HPC: are we there yet? Miscellaneous
2023 CWL Conference, 2023, (Invited talk).
Abstract | Links | BibTeX | Tags: across, eupex, streamflow
@misc{23:CWLConference,
title = {CWL for HPC: are we there yet?},
author = {Iacopo Colonnelli},
url = {https://datacloud.di.unito.it/index.php/s/CMCd5LiZeXsxwEg},
year = {2023},
date = {2023-03-01},
address = {Heidelberg, Germany},
abstract = {Modern HPC applications are becoming so heterogeneous and complex that a modular approach to their design, deployment and orchestration is now necessary. This talk explores the benefits of using a vendor-agnostic workflow language (CWL) coupled with a hybrid workflow management system (StreamFlow) in the HPC ecosystem. Also, it will examine the requirements needed to model HPC applications effectively, the CWL’s readiness to meet such requirements, and the proposals made to improve the language where needed. Four real use cases will drive the discussion: the ACROSS Project (G.A. n. 955648), where CWL is the primary interface to model three HPC workflows, and the EUPEX Project (G.A. n. 101033975), where StreamFlow will be used for the rapid prototyping of a seismic engineering HPC application for a Modular Supercomputing Architecture (MSA) system.},
howpublished = {2023 CWL Conference},
note = {Invited talk},
keywords = {across, eupex, streamflow},
pubstate = {published},
tppubtype = {misc}
}
2022
Marco Aldinucci
Il calcolo parallelo: una storia di metodi e algoritmi raccontata dalle macchine Miscellaneous
Olimpiadi di Informatica, 2022, (Invited talk).
Abstract | Links | BibTeX | Tags: across, admire, eumaster4hpc, eupex, eupilot, textarossa
@misc{22:olimpiadi:cs,
title = {Il calcolo parallelo: una storia di metodi e algoritmi raccontata dalle macchine},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/7ZdfLkn3NetzXCN},
year = {2022},
date = {2022-09-01},
address = {Biella, Italy},
abstract = {Lectio Magistralis alle finali nazionali delle Olimpiadi di Informatica 2022},
howpublished = {Olimpiadi di Informatica},
note = {Invited talk},
keywords = {across, admire, eumaster4hpc, eupex, eupilot, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
La convergenza HPC-cloud è l'anello mancante tra il calcolo scientifico e l'IA applicata Miscellaneous
Intelligenza Artificiale e Business Applications, 2022, (Invited talk).
Abstract | Links | BibTeX | Tags: across, admire, eumaster4hpc, eupex, eupilot, textarossa
@misc{22:soiel:ai,
title = {La convergenza HPC-cloud è l'anello mancante tra il calcolo scientifico e l'IA applicata},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/xCQSqJ8bCKCXMK9},
year = {2022},
date = {2022-09-01},
address = {Virtual event},
abstract = {Innanzitutto, le infrastrutture HPC stanno adottando le GPU per il loro rapporto prestazioni per watt superiore rispetto ai multicore generici. In secondo luogo, i flussi di lavoro scientifici di prossima generazione stanno integrando passaggi basati sull'intelligenza artificiale per la loro precisione nell'approssimazione e nell'analisi di fenomeni complessi. In terzo luogo, l'IA e in particolare il Machine Learning (ML) rappresentano un carico di lavoro perfetto per le GPU in termini di prestazioni e tempo di sviluppo. Oggi non possiamo ancora chiudere il cerchio eseguendo senza problemi carichi di lavoro scientifici abilitati all'intelligenza artificiale nelle infrastrutture HPC perché il loro software di sistema e gli strumenti di sviluppo non sono progettati per i carichi di lavoro moderni, come i framework ML progettati per il cloud. È probabile che la convergenza HPC-cloud colmi il divario. Nel talk verranno presentate le infrastrutture e gli strumenti sviluppati all'Università di Torino per la convergenza HPC-cloud (es. HPC4AI, StreamFlow, CAPIO, Jupyter-workflow) e come sono stati utilizzati per le applicazioni di intelligenza artificiale, come la diagnosi spiegabile di polmonite COVID-19 e la tutela della privacy AI. L'esperienza maturata nella progettazione e gestione di HPC4AI costituisce il cuore della progettazione del laboratorio di contaminazione del "FutureHPC" di Torino secondo il Centro Nazionale "HPC, BigData e Quantum Computing" finanziato dal PNRR con 320M€ che dovrebbe essere operativo dal 1 settembre 2022. L'obiettivo finale del laboratorio di contaminazione è sviluppare relazioni e collaborazioni tra industria e università.},
howpublished = {Intelligenza Artificiale e Business Applications},
note = {Invited talk},
keywords = {across, admire, eumaster4hpc, eupex, eupilot, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli, Marco Aldinucci
Hybrid Workflows For Large-Scale Scientific Applications Miscellaneous
6th EAGE High Performance Computing Workshop, 2022.
Abstract | Links | BibTeX | Tags: across, eupex, jupyter-workflow, textarossa
@misc{22:eage,
title = {Hybrid Workflows For Large-Scale Scientific Applications},
author = {Iacopo Colonnelli and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/GScPS5LCPdt6Yoo},
year = {2022},
date = {2022-09-01},
address = {Milano, Italy},
abstract = {Large-scale scientific applications are facing an irreversible transition from monolithic, high-performance oriented codes to modular and polyglot deployments of specialised (micro-)services. The reasons behind this transition are many: coupling of standard solvers with Deep Learning techniques, offloading of data analysis and visualisation to Cloud, and the advent of specialised hardware accelerators. Topology-aware Workflow Management Systems (WMSs) play a crucial role. In particular, topology-awareness allows an explicit mapping of workflow steps onto heterogeneous locations, allowing automated executions on top of hybrid architectures (e.g., cloud+HPC or classical+quantum). Plus, topology-aware WMSs can offer non-functional requirements OOTB, e.g. components’ life-cycle orchestration, secure and efficient data transfers, fault tolerance, and cross-cluster execution of urgent workloads. Augmenting interactive Jupyter Notebooks with distributed workflow capabilities allows domain experts to prototype and scale applications using the same technological stack, while relying on a feature-rich and user-friendly web interface. This abstract will showcase how these general methodologies can be applied to a typical geoscience simulation pipeline based on the Full Wavefront Inversion (FWI) technique. In particular, a prototypical Jupyter Notebook will be executed interactively on Cloud. Preliminary data analyses and post-processing will be executed locally, while the computationally demanding optimisation loop will be scheduled on a remote HPC cluster.},
howpublished = {6th EAGE High Performance Computing Workshop},
keywords = {across, eupex, jupyter-workflow, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli, Barbara Cantalupo, Doriana Medić, Marco Aldinucci
Hybrid workflows for heterogeneous distributed computing Miscellaneous
3rd Italian Workshop on HPC (ITWSHPC), 2022.
Links | BibTeX | Tags: across, admire, eumaster4hpc, eupex, eupilot, textarossa
@misc{22:itwshpc,
title = {Hybrid workflows for heterogeneous distributed computing},
author = {Iacopo Colonnelli and Barbara Cantalupo and Doriana Medić and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/ienbcA2DJ26aioE},
year = {2022},
date = {2022-09-01},
address = {Torino, Italy},
howpublished = {3rd Italian Workshop on HPC (ITWSHPC)},
keywords = {across, admire, eumaster4hpc, eupex, eupilot, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli, Marco Aldinucci
CINI HPC-KTT: HPC Key Technologies and Tools National Lab Miscellaneous
NVIDIA HPC Roundtable, 2022, (Invited talk).
Links | BibTeX | Tags: across, admire, eumaster4hpc, eupex, eupilot, textarossa
@misc{22:nvidia_hpc_roundtable,
title = {CINI HPC-KTT: HPC Key Technologies and Tools National Lab},
author = {Iacopo Colonnelli and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/9EQniZ2dGzdJ26f},
year = {2022},
date = {2022-09-01},
address = {Casalecchio di Reno, Italy},
howpublished = {NVIDIA HPC Roundtable},
note = {Invited talk},
keywords = {across, admire, eumaster4hpc, eupex, eupilot, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli, Dario Tranchitella
Dossier: multi-tenant distributed Jupyter Notebooks Miscellaneous
DoK Talks 141, 2022, (Invited talk).
Abstract | Links | BibTeX | Tags: across, deephealth, hpc4ai, jupyter-workflow
@misc{22:data-on-kubernetes,
title = {Dossier: multi-tenant distributed Jupyter Notebooks},
author = {Iacopo Colonnelli and Dario Tranchitella},
url = {https://datacloud.di.unito.it/index.php/s/RNqTGmTqWS66qHT},
year = {2022},
date = {2022-07-01},
address = {Virtual event},
abstract = {When providing data analysis as a service, one must tackle several problems. Data privacy and protection by design are crucial when working on sensitive data. Performance and scalability are fundamental for compute-intensive workloads, e.g. training Deep Neural Networks. User-friendly interfaces and fast prototyping tools are essential to allow domain experts to experiment with new techniques. Portability and reproducibility are necessary to assess the actual value of results. Kubernetes is the best platform to provide reliable, elastic, and maintainable services. However, Kubernetes alone is not enough to achieve large-scale multi-tenant reproducible data analysis. OOTB support for multi-tenancy is too rough, with only two levels of segregation (i.e. the single namespace or the entire cluster). Offloading computation to off-cluster resources is non-trivial and requires the user's manual configuration. Also, Jupyter Notebooks per se cannot provide much scalability (they execute locally and sequentially) and reproducibility (users can run cells in any order and any number of times). The Dossier platform allows system administrators to manage multi-tenant distributed Jupyter Notebooks at the cluster level in the Kubernetes way, i.e. through CRDs. Namespaces are aggregated in Tenants, and all security and accountability aspects are managed at that level. Each Notebook spawns into a user-dedicated namespace, subject to all Tenant-level constraints. Users can rely on provisioned resources, either in-cluster worker nodes or external resources like HPC facilities. Plus, they can plug their computing nodes in a BYOD fashion. Notebooks are interpreted as distributed workflows, where each cell is a task that one can offload to a different location in charge of its execution.},
howpublished = {DoK Talks 141},
note = {Invited talk},
keywords = {across, deephealth, hpc4ai, jupyter-workflow},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli
StreamFlow Miscellaneous
2nd HealthyCloud Workshop: Analysis of existing orchestration mechanisms for distributed computational analyses, 2022, (Invited talk).
Links | BibTeX | Tags: across, deephealth, eupex, streamflow, textarossa
@misc{22:healthycloud-workshop,
title = {StreamFlow},
author = {Iacopo Colonnelli},
url = {https://datacloud.di.unito.it/index.php/s/Taz8qtzmkmn9ffT},
year = {2022},
date = {2022-07-01},
address = {Virtual event},
howpublished = {2nd HealthyCloud Workshop: Analysis of existing orchestration mechanisms for distributed computational analyses},
note = {Invited talk},
keywords = {across, deephealth, eupex, streamflow, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
EuroHPC and the Italian HPC ecosystem Miscellaneous
Critical Infrastructure Protection Forum - EuroCC Romania, 2022, (Invited talk).
Abstract | Links | BibTeX | Tags: across, admire, eumaster4hpc, eupex, eupilot, icsc, textarossa
@misc{22:cip:romania,
title = {EuroHPC and the Italian HPC ecosystem},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/5dFFoNsZzwTzQkn},
year = {2022},
date = {2022-06-01},
address = {Bucharest, Romania},
abstract = {The talk presents the main investments currently ongoing in Italy in the HPC area as well as the activity of Italian stakeholders within EuroHPC. The novel Italian National Centre on HPC (ICSC) is introduced.},
howpublished = {Critical Infrastructure Protection Forum - EuroCC Romania},
note = {Invited talk},
keywords = {across, admire, eumaster4hpc, eupex, eupilot, icsc, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
The Italian HPC ecosystem and the next generation of EuroHPC CoE Miscellaneous
EuroHPC EoCoE final summit, 2022, (Invited talk).
Abstract | Links | BibTeX | Tags: across, admire, eumaster4hpc, eupex, eupilot, icsc, textarossa
@misc{22:eocoe:summit,
title = {The Italian HPC ecosystem and the next generation of EuroHPC CoE},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/AH5Ms3NekeoEooB},
year = {2022},
date = {2022-06-01},
address = {Napoli, Italy},
abstract = {The talk presents the main investments currently ongoing in Italy in the HPC area as well as the activity of Italian stakeholders within EuroHPC. The novel Italian National Centre on HPC (ICSC) is introduced.},
howpublished = {EuroHPC EoCoE final summit},
note = {Invited talk},
keywords = {across, admire, eumaster4hpc, eupex, eupilot, icsc, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli
StreamFlow: a topology-aware WMS Miscellaneous
ELIXIR Cloud, Data & AAI Bi-weekly Technical Calls, 2022, (Invited talk).
Links | BibTeX | Tags: across, deephealth, eupex, streamflow, textarossa
@misc{22:elixir-streamflow,
title = {StreamFlow: a topology-aware WMS},
author = {Iacopo Colonnelli},
url = {https://datacloud.di.unito.it/index.php/s/Z9GsKnRCxmBdMd3},
year = {2022},
date = {2022-06-01},
address = {Virtual event},
howpublished = {ELIXIR Cloud, Data & AAI Bi-weekly Technical Calls},
note = {Invited talk},
keywords = {across, deephealth, eupex, streamflow, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
HPC-cloud convergence is the missing link between scientific computing and applied-AI Miscellaneous
Machine Learning for Astrophysics (ML4ASTRO), 2022, (Keynote talk).
Abstract | Links | BibTeX | Tags: across, deephealth, eupex, eupilot
@misc{22:ml4astrotalk,
title = {HPC-cloud convergence is the missing link between scientific computing and applied-AI},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/2SGswkcip7MoMoH},
year = {2022},
date = {2022-06-01},
address = {Catania, Italy},
abstract = {First, HPC infrastructures are embracing GPUs for their superior performance-per-watt ratio against general-purpose multicores. Second, the next-generation scientific workflows are integrating AI-based steps for their accuracy in approximating and analyzing complex phenomena. Third, AI and specifically Machine Learning (ML), is a perfect workload for GPUs in terms of performance and development time. Today, we cannot still close the circle seamlessly running AI-enabled scientific workloads into HPC infrastructures because their system software and development tools are not designed for modern workloads, such as ML frameworks designed for the cloud. HPC-cloud convergence is likely to bridge the gap. In the talk, we will present Streamflow and CAPIO, two development tools for HPC-cloud convergence.},
howpublished = {Machine Learning for Astrophysics (ML4ASTRO)},
note = {Keynote talk},
keywords = {across, deephealth, eupex, eupilot},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
Cognitive continuum: a game theoretical approach Miscellaneous
HiPEAC Vision meeting, Brussels, 16 May 2022, 2022.
Abstract | Links | BibTeX | Tags: across, admire, brainteaser, eumaster4hpc, eupex, eupilot, textarossa
@misc{22:hipeacvision:fl,
title = {Cognitive continuum: a game theoretical approach},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/453HWfmrQyo7j9E},
year = {2022},
date = {2022-05-01},
address = {Brussels, Belgium},
abstract = {Cognitive continuum: a game theoretical approach, (maybe) data operations are too basic: read, write, copy, remove … The talk is aimed to contribute to the forthcoming HiPEAC Vision document},
howpublished = {HiPEAC Vision meeting, Brussels, 16 May 2022},
keywords = {across, admire, brainteaser, eumaster4hpc, eupex, eupilot, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
Da HPC4AI al living lab dello spoke FutureHPC del Centro Nazionale HPC Miscellaneous
Condivisioni, Conferenza GARR 2022, 2022, (Keynote talk).
Abstract | Links | BibTeX | Tags: across, admire, eumaster4hpc, eupex, hpc4ai, icsc, textarossa
@misc{22:garr,
title = {Da HPC4AI al living lab dello spoke FutureHPC del Centro Nazionale HPC},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/P3KSroSSmrRxZMc},
year = {2022},
date = {2022-05-01},
address = {Palermo, Italy},
abstract = {HPC4AI is an open-access laboratory of the University of Turin open to researchers, students and companies that manages a double pair of systems: a production cloud-HPC system and its twin dedicated to development. The cloud-HPC system is implemented thanks to an extended version of the GARR cloud (OpenStack) and the SLURM workload manager. HPC4AI is specifically designed to support system software development and cloud-HPC convergence tools. Among these streamflow (WMS), jupyter-as-a-service (SaaS), portable-secure-tenant (PasS). The experience gained in the design and management of HPC4AI forms the heart of the design of the livinglab of the Turin "FutureHPC" spoke of the National Center "HPC, BigData and Quantum Computing" funded by the PNRR which should be operational from September 2022.},
howpublished = {Condivisioni, Conferenza GARR 2022},
note = {Keynote talk},
keywords = {across, admire, eumaster4hpc, eupex, hpc4ai, icsc, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli, Dario Tranchitella
OpenDeepHealth: Crafting a Deep Learning Platform as a Service with Kubernetes Miscellaneous
J on The Beach 2022, 2022.
Links | BibTeX | Tags: across, deephealth, hpc4ai, jupyter-workflow, streamflow
@misc{22:jotb22,
title = {OpenDeepHealth: Crafting a Deep Learning Platform as a Service with Kubernetes},
author = {Iacopo Colonnelli and Dario Tranchitella},
url = {https://datacloud.di.unito.it/index.php/s/n6J7STNnwdyqtET},
year = {2022},
date = {2022-04-01},
address = {Malaga, Spain},
howpublished = {J on The Beach 2022},
keywords = {across, deephealth, hpc4ai, jupyter-workflow, streamflow},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli
Distributed workflows with Jupyter Miscellaneous
J on The Beach 2022, 2022, (Workshop).
Links | BibTeX | Tags: across, deephealth, jupyter-workflow, streamflow
@misc{22:jotb22-workshop,
title = {Distributed workflows with Jupyter},
author = {Iacopo Colonnelli},
url = {https://datacloud.di.unito.it/index.php/s/om89q55S6ePf2Ji},
year = {2022},
date = {2022-04-01},
address = {Malaga, Spain},
howpublished = {J on The Beach 2022},
note = {Workshop},
keywords = {across, deephealth, jupyter-workflow, streamflow},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli
StreamFlow: A framework for hybrid workflows Miscellaneous
ACROSS WP4 meeting, 2022.
Links | BibTeX | Tags: across, streamflow
@misc{22:across-streamflow,
title = {StreamFlow: A framework for hybrid workflows},
author = {Iacopo Colonnelli},
url = {https://datacloud.di.unito.it/index.php/s/FXFTKtQSRf6anMX},
year = {2022},
date = {2022-02-01},
address = {Virtual event},
howpublished = {ACROSS WP4 meeting},
keywords = {across, streamflow},
pubstate = {published},
tppubtype = {misc}
}
2021
Iacopo Colonnelli
StreamFlow: A framework for hybrid workflows Miscellaneous
ACROSS WP4 meeting, 2021.
Links | BibTeX | Tags: across, streamflow
@misc{21:across-streamflow,
title = {StreamFlow: A framework for hybrid workflows},
author = {Iacopo Colonnelli},
url = {https://datacloud.di.unito.it/index.php/s/yrGYJL6CyNywF8a},
year = {2021},
date = {2021-10-01},
address = {Virtual event},
howpublished = {ACROSS WP4 meeting},
keywords = {across, streamflow},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
The modernization of HPC applications for the cloud era Miscellaneous
Fifth EAGE Workshop on High Performance Computing for Upstream, 2021, (Keynote talk).
Abstract | BibTeX | Tags: across, admire, deephealth, streamflow
@misc{21:eni:streamflow,
title = {The modernization of HPC applications for the cloud era},
author = {Marco Aldinucci},
year = {2021},
date = {2021-09-01},
address = {Virtual event},
abstract = {Workflows are among the most commonly used tools in a variety of execution environments. Many of them target a specific environment; few of them make it possible to execute an entire workflow in different environments, e.g., clouds, supercomputers, and both of them. We present a novel approach to workflow execution, called StreamFlow, that complements the workflow graph with the declarative description of potentially complex execution environments (such as Kubernetes and SLURM), making it possible to execute onto multiple sites not sharing a common data space. Streamflow clearly distinguishes it from many other workflow management systems because it decouples the data dependencies from the deployment of (containerized) workflow steps. Streamflow also leverages CAPIO (Cross-Application Programmable I/O) to move data from one step to another efficiently. CAPIO captures the POSIX file system and streams it in parallel and in-memory to the workflow's next step, possibly enabling in-transit data filtering.},
howpublished = {Fifth EAGE Workshop on High Performance Computing for Upstream},
note = {Keynote talk},
keywords = {across, admire, deephealth, streamflow},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli
HPC Containers Miscellaneous
ACROSS WP4 meeting, 2021.
@misc{21:across-containers,
title = {HPC Containers},
author = {Iacopo Colonnelli},
url = {https://datacloud.di.unito.it/index.php/s/ddf3YBjpm8KBGAF},
year = {2021},
date = {2021-07-01},
address = {Virtual event},
howpublished = {ACROSS WP4 meeting},
keywords = {across},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
From skeletons to workflows in the cloud-edge era Miscellaneous
14th Intl. Symposium on High-Level Programming and Applications (HLPP), 2021, (Keynote talk).
Abstract | Links | BibTeX | Tags: across, admire, deephealth, streamflow
@misc{21:hlpp:streamflow,
title = {From skeletons to workflows in the cloud-edge era},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/RyRPjNBse5PKnab},
year = {2021},
date = {2021-07-01},
address = {Virtual event},
abstract = {Workflows are among the most commonly used tools in a variety of execution environments. Many of them target a specific environment; few of them make it possible to execute an entire workflow in different environments, e.g. Kubernetes and batch clusters. We present a novel approach to workflow execution, called StreamFlow, that complements the workflow graph with the declarative description of potentially complex execution environments and that makes it possible to execute multiple sites not sharing a common data space. StreamFlow supports both task and data parallelism and enables the reproducible and scalable execution of workflows, such as AI pipelines, in hybrid cloud-HPC environments. As a running example, we use the novel ``universal COVID-19 pipeline'' that explore the whole optimisation space of the training of different DNNs to classify COVID-19 lung lesions.},
howpublished = {14th Intl. Symposium on High-Level Programming and Applications (HLPP)},
note = {Keynote talk},
keywords = {across, admire, deephealth, streamflow},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
Reproducibility in the AI era Miscellaneous
Penta Scientific Meeting, 2021.
Abstract | Links | BibTeX | Tags: across, admire, deephealth
@misc{21:penta:covid,
title = {Reproducibility in the AI era},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/GLpf7kKSJRH733A},
year = {2021},
date = {2021-07-01},
address = {Virtual event},
abstract = {TBD},
howpublished = {Penta Scientific Meeting},
keywords = {across, admire, deephealth},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
The Italian research on HPC key technologies across EuroHPC Miscellaneous
2021.
Abstract | Links | BibTeX | Tags: across, admire, eupex, eupilot, textarossa
@misc{21:CINI_acm_CF_talk,
title = {The Italian research on HPC key technologies across EuroHPC},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/3ZYmDbEm84rbB9k},
year = {2021},
date = {2021-05-01},
booktitle = {ACM Computing Frontiers},
publisher = {ACM},
address = {Virtual Conference, Italy},
abstract = {High-Performance Computing (HPC) is one of the strategic priorities for research and innovation worldwide due to its relevance for industrial and scientific applications. We envision HPC as composed of three pillars: infrastructures, applications, and key technologies and tools. While infrastructures are by construction centralized in large-scale HPC centers, and applications are generally within the purview of domain-specific organizations, key technologies fall in an intermediate case where coordination is needed, but design and development are often decentralized. A large group of Italian researchers has started a dedicated laboratory within the National Interuniversity Consortium for Informatics (CINI) to address this challenge. The laboratory, albeit young, has managed to succeed in its first attempts to propose a coordinated approach to HPC research within the EuroHPC Joint Undertaking, participating in the calls 2019-20 to five successful proposals for an aggregate total cost of 95M Euro. In this paper, we outline the working group's scope and goals and provide an overview of the five funded projects, which become fully operational in March 2021, and cover a selection of key technologies provided by the working group partners, highlighting their usage development within the projects.},
keywords = {across, admire, eupex, eupilot, textarossa},
pubstate = {published},
tppubtype = {misc}
}
ADMIRE (EC H2020 RIA, EuroHPC-01-2019): Adaptive multi-tier intelligent data manager for Exascale (2021, 36 months, total cost 8M€, G.A. n. 956748)
Publications
2024
Adriano Marques Garcia, Dalvan Griebler, Claudio Schepke, José Daniel García, Javier Fernández Muñoz, Luiz Gustavo Fernandes
Performance and programmability of GrPPI for parallel stream processing on multi-cores Journal Article
In: The Journal of Supercomputing, vol. In press, no. In press, pp. 1-35, 2024, ISBN: 1573-0484.
Abstract | Links | BibTeX | Tags: admire
@article{GARCIA:JSuper:24,
title = {Performance and programmability of GrPPI for parallel stream processing on multi-cores},
author = {Adriano Marques Garcia and Dalvan Griebler and Claudio Schepke and José Daniel García and Javier Fernández Muñoz and Luiz Gustavo Fernandes},
url = {https://iris.unito.it/retrieve/fff66640-fcbe-4080-a4f1-3279c9fadafb/s11227-024-05934-z.pdf},
doi = {10.1007/s11227-024-05934-z},
isbn = {1573-0484},
year = {2024},
date = {2024-01-01},
journal = {The Journal of Supercomputing},
volume = {In press},
number = {In press},
pages = {1-35},
publisher = {Springer},
abstract = {GrPPI library aims to simplify the burdening task of parallel programming. It provides a unified, abstract, and generic layer while promising minimal overhead on performance. Although it supports stream parallelism, GrPPI lacks an evaluation regarding representative performance metrics for this domain, such as throughput and latency. This work evaluates GrPPI focused on parallel stream processing. We compare the throughput and latency performance, memory usage, and programmability of GrPPI against handwritten parallel code. For this, we use the benchmarking framework SPBench to build custom GrPPI benchmarks and benchmarks with handwritten parallel code using the same backends supported by GrPPI. The basis of the benchmarks is real applications, such as Lane Detection, Bzip2, Face Recognizer, and Ferret. Experiments show that while performance is often competitive with handwritten parallel code, the infeasibility of fine-tuning GrPPI is a crucial drawback for emerging applications. Despite this, programmability experiments estimate that GrPPI can potentially reduce the development time of parallel applications by about three times.},
keywords = {admire},
pubstate = {published},
tppubtype = {article}
}
2023
Alberto Riccardo Martinelli, Massimo Torquati, Marco Aldinucci, Iacopo Colonnelli, Barbara Cantalupo
CAPIO: a Middleware for Transparent I/O Streaming in Data-Intensive Workflows Proceedings Article
In: 2023 IEEE 30th International Conference on High Performance Computing, Data, and Analytics (HiPC), IEEE, Goa, India, 2023.
Abstract | Links | BibTeX | Tags: admire, capio, eupex, icsc
@inproceedings{23:hipc:capio,
title = {CAPIO: a Middleware for Transparent I/O Streaming in Data-Intensive Workflows},
author = {Alberto Riccardo Martinelli and Massimo Torquati and Marco Aldinucci and Iacopo Colonnelli and Barbara Cantalupo},
url = {https://iris.unito.it/retrieve/27380f37-0978-409e-a9d8-2b5e95a4bb85/CAPIO-HiPC23-preprint.pdf},
doi = {10.1109/HiPC58850.2023.00031},
year = {2023},
date = {2023-12-01},
booktitle = {2023 IEEE 30th International Conference on High Performance Computing, Data, and Analytics (HiPC)},
publisher = {IEEE},
address = {Goa, India},
abstract = {With the increasing amount of digital data available for analysis and simulation, the class of I/O-intensive HPC workflows is fated to quickly expand, further exacerbating the performance gap between computing, memory, and storage technologies. This paper introduces CAPIO (Cross-Application Programmable I/O), a middleware capable of injecting I/O streaming capabilities into file-based workflows, improving the computation-I/O overlap without the need to change the application code. The contribution is twofold: 1) at design time, a new I/O coordination language allows users to annotate workflow data dependencies with synchronization semantics; 2) at run time, a user-space middleware automatically and transparently to the user turns a workflow batch execution into a streaming execution according to the semantics expressed in the configuration file. CAPIO has been tested on synthetic benchmarks simulating typical workflow I/O patterns and two real-world workflows. Experiments show that CAPIO reduces the execution time by 10% to 66% for data-intensive workflows that use the file system as a communication medium.},
keywords = {admire, capio, eupex, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Jesus Carretero, Javier Garcia-Blas, Marco Aldinucci, Jean Baptiste Besnard Besnard, Jean-Thomas Acquaviva, André Brinkmann, Marc-André Vef, Emmanuel Jeannot, Alberto Miranda, Ramon Nou, Morris Riedel, Massimo Torquati, Felix Wolf
Adaptive multi-tier intelligent data manager for Exascale Proceedings Article
In: 20th ACM International Conference on Computing Frontiers (CF '23), ACM, Bologna, Italy, 2023.
Abstract | Links | BibTeX | Tags: admire
@inproceedings{23:admire:cf,
title = {Adaptive multi-tier intelligent data manager for Exascale},
author = {Jesus Carretero and Javier Garcia-Blas and Marco Aldinucci and Jean Baptiste Besnard Besnard and Jean-Thomas Acquaviva and André Brinkmann and Marc-André Vef and Emmanuel Jeannot and Alberto Miranda and Ramon Nou and Morris Riedel and Massimo Torquati and Felix Wolf},
url = {https://dl.acm.org/doi/pdf/10.1145/3587135.3592174},
doi = {10.1145/3587135.3592174},
year = {2023},
date = {2023-05-01},
booktitle = {20th ACM International Conference on Computing Frontiers (CF '23)},
publisher = {ACM},
address = {Bologna, Italy},
abstract = {The main objective of the ADMIRE project1 is the creation of an active I/O stack that dynamically adjusts computation and storage requirements through intelligent global coordination, the elasticity of computation and I/O, and the scheduling of storage resources along all levels of the storage hierarchy, while offering quality-of-service (QoS), energy efficiency, and resilience for accessing extremely large data sets in very heterogeneous computing and storage environments. We have developed a framework prototype that is able to dynamically adjust computation and storage requirements through intelligent global coordination, separated control, and data paths, the malleability of computation and I/O, the scheduling of storage resources along all levels of the storage hierarchy, and scalable monitoring techniques. The leading idea in ADMIRE is to co-design applications with ad-hoc storage systems that can be deployed with the application and adapt their computing and I/O behaviour on runtime, using malleability techniques, to increase the performance of applications and the throughput of the applications.},
keywords = {admire},
pubstate = {published},
tppubtype = {inproceedings}
}
Adriano Marques Garcia, Dalvan Griebler, Claudio Schepke, André Sacilotto Santos, José Daniel García, Javier Fernández Muñoz, Luiz Gustavo Fernandes
A Latency, Throughput, and Programmability Perspective of GrPPI for Streaming on Multi-cores Proceedings Article
In: 31st Euromicro International Conference on Parallel, Distributed and Network-Based Processing (PDP), pp. 164-168, IEEE, Naples, Italy, 2023.
Abstract | Links | BibTeX | Tags: admire
@inproceedings{GARCIA:PDP:23,
title = {A Latency, Throughput, and Programmability Perspective of GrPPI for Streaming on Multi-cores},
author = {Adriano Marques Garcia and Dalvan Griebler and Claudio Schepke and André Sacilotto Santos and José Daniel García and Javier Fernández Muñoz and Luiz Gustavo Fernandes},
url = {https://iris.unito.it/retrieve/9165d2ef-7140-4645-87cc-269050341c1d/PDP_2023_SPbench_with_GrPPI.pdf},
doi = {10.1109/PDP59025.2023.00033},
year = {2023},
date = {2023-03-01},
booktitle = {31st Euromicro International Conference on Parallel, Distributed and Network-Based Processing (PDP)},
pages = {164-168},
publisher = {IEEE},
address = {Naples, Italy},
series = {PDP'23},
abstract = {Several solutions aim to simplify the burdening task of parallel programming. The GrPPI library is one of them. It allows users to implement parallel code for multiple backends through a unified, abstract, and generic layer while promising minimal overhead on performance. An outspread evaluation of GrPPI regarding stream parallelism with representative metrics for this domain, such as throughput and latency, was not yet done. In this work, we evaluate GrPPI focused on stream processing. We evaluate performance, memory usage, and programming effort and compare them against handwritten parallel code. For this, we use the benchmarking framework SPBench to build custom GrPPI benchmarks. The basis of the benchmarks is real applications, such as Lane Detection, Bzip2, Face Recognizer, and Ferret. Experiments show that while performance is competitive with handwritten code in some cases, in other cases, the infeasibility of fine-tuning GrPPI is a crucial drawback. Despite this, programmability experiments estimate that GrPPI has the potential to reduce by about three times the development time of parallel applications.},
keywords = {admire},
pubstate = {published},
tppubtype = {inproceedings}
}
Alessia Antelmi, Massimo Torquati, Daniele Gregori, Francesco Polzella, Gianmarco Spinatelli, Marco Aldinucci
The SWH-Analytics Framework Proceedings Article
In: Bena, Nicola, Martino, Beniamino Di, Maratea, Antonio, Sperduti, Alessandro, Nardo, Emanuel Di, Ciaramella, Angelo, Montella, Raffaele, Ardagna, Claudio A. (Ed.): Proceedings of the 2nd Italian Conference on Big Data and Data Science (ITADATA 2023), Naples, Italy, September 11-13, 2023, CEUR-WS.org, 2023.
Abstract | Links | BibTeX | Tags: admire, analytics, icsc
@inproceedings{Antelmi_ITADATA_2023,
title = {The SWH-Analytics Framework},
author = {Alessia Antelmi and Massimo Torquati and Daniele Gregori and Francesco Polzella and Gianmarco Spinatelli and Marco Aldinucci},
editor = {Nicola Bena and Beniamino Di Martino and Antonio Maratea and Alessandro Sperduti and Emanuel Di Nardo and Angelo Ciaramella and Raffaele Montella and Claudio A. Ardagna},
url = {https://ceur-ws.org/Vol-3606/paper76.pdf},
year = {2023},
date = {2023-01-01},
booktitle = {Proceedings of the 2nd Italian Conference on Big Data and Data Science (ITADATA 2023), Naples, Italy, September 11-13, 2023},
volume = {3606},
publisher = {CEUR-WS.org},
series = {CEUR Workshop Proceedings},
abstract = {The Software Heritage (SWH) dataset serves as a vast repository for open-source code, with the ambitious goal of preserving all publicly available open-source projects. Despite being designed to effectively archive project files, its size of nearly 1 petabyte presents challenges in efficiently supporting Big Data MapReduce or AI systems. To address this disparity and enable seamless custom analytics on the SWH dataset, we present the SWH-Analytics (SWHA) architecture. This development environment quickly and transparently runs custom analytic applications on open-source software data preserved over time by SWH.},
keywords = {admire, analytics, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Javier Garcia-Blas, Genaro Sanchez-Gallegos, Cosmin Petre, Alberto Riccardo Martinelli, Marco Aldinucci, Jesus Carretero
Hercules: Scalable and Network Portable In-Memory Ad-Hoc File System for Data-Centric and High-Performance Applications Proceedings Article
In: Cano, José, Dikaiakos, Marios D., Papadopoulos, George A., Pericàs, Miquel, Sakellariou, Rizos (Ed.): Euro-Par 2023: Parallel Processing, pp. 679–693, Springer Nature Switzerland, Cham, 2023, ISBN: 978-3-031-39698-4.
Abstract | BibTeX | Tags: admire, HPC
@inproceedings{10.1007/978-3-031-39698-4_46,
title = {Hercules: Scalable and Network Portable In-Memory Ad-Hoc File System for Data-Centric and High-Performance Applications},
author = {Javier Garcia-Blas and Genaro Sanchez-Gallegos and Cosmin Petre and Alberto Riccardo Martinelli and Marco Aldinucci and Jesus Carretero},
editor = {José Cano and Marios D. Dikaiakos and George A. Papadopoulos and Miquel Pericàs and Rizos Sakellariou},
isbn = {978-3-031-39698-4},
year = {2023},
date = {2023-01-01},
booktitle = {Euro-Par 2023: Parallel Processing},
pages = {679–693},
publisher = {Springer Nature Switzerland},
address = {Cham},
abstract = {The growing demands for data processing by new data-intensive applications are putting pressure on the performance and capacity of HPC storage systems. The advancement in storage technologies, such as NVMe and persistent memory, are aimed at meeting these demands. However, relying solely on ultra-fast storage devices is not cost-effective, leading to the need for multi-tier storage hierarchies to move data based on its usage. To address this issue, ad-hoc file systems have been proposed as a solution. They utilise the available storage of compute nodes, such as memory and persistent storage, to create a temporary file system that adapts to the application behaviour in the HPC environment. This work presents the design, implementation, and evaluation of a distributed ad-hoc in-memory storage system (Hercules), highlighting the new communication model included in Hercules. This communication model takes advantage of the Unified Communication X framework (UCX). This solution leverages the capabilities of RDMA protocols, including Infiniband, Onmipath, shared memory, and zero-copy transfers. The preliminary evaluation results show excellent network utilisation compared with other existing technologies.},
keywords = {admire, HPC},
pubstate = {published},
tppubtype = {inproceedings}
}
Pedro Ângelo, Viviana Bono, Mariangiola Dezani-Ciancaglini, Mário Florido
Gradual Guarantee for FJ with lambda-Expressions Proceedings Article
In: Tomb, Aaron (Ed.): Proceedings of the 25th ACM International Workshop on Formal Techniques for Java-like Programs, FTfJP 2023, Seattle, WA, USA, 18 July 2023, pp. 32–38, ACM, 2023.
Links | BibTeX | Tags: admire, icsc
@inproceedings{DBLP:conf/ftfjp/AngeloBDF23,
title = {Gradual Guarantee for FJ with lambda-Expressions},
author = {Pedro Ângelo and Viviana Bono and Mariangiola Dezani-Ciancaglini and Mário Florido},
editor = {Aaron Tomb},
url = {https://doi.org/10.1145/3605156.3606453},
doi = {10.1145/3605156.3606453},
year = {2023},
date = {2023-01-01},
booktitle = {Proceedings of the 25th ACM International Workshop on Formal Techniques for Java-like Programs, FTfJP 2023, Seattle, WA, USA, 18 July 2023},
pages = {32–38},
publisher = {ACM},
keywords = {admire, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Yasir Arfat, Gianluca Mittone, Iacopo Colonnelli, Fabrizio D'Ascenzo, Roberto Esposito, Marco Aldinucci
Pooling critical datasets with Federated Learning Proceedings Article
In: 31st Euromicro International Conference on Parallel, Distributed and Network-Based Processing, PDP 2023, pp. 329–337, IEEE, Napoli, Italy, 2023.
Abstract | Links | BibTeX | Tags: admire, ai, cardio, confidential, hpc4ai
@inproceedings{23:praise-fl:pdp,
title = {Pooling critical datasets with Federated Learning},
author = {Yasir Arfat and Gianluca Mittone and Iacopo Colonnelli and Fabrizio D'Ascenzo and Roberto Esposito and Marco Aldinucci},
url = {https://iris.unito.it/retrieve/491e22ec-3db5-4989-a063-085a199edd20/23_pdp_fl.pdf},
doi = {10.1109/PDP59025.2023.00057},
year = {2023},
date = {2023-01-01},
booktitle = {31st Euromicro International Conference on Parallel, Distributed and Network-Based Processing, PDP 2023},
pages = {329–337},
publisher = {IEEE},
address = {Napoli, Italy},
abstract = {Federated Learning (FL) is becoming popular in different industrial sectors where data access is critical for security, privacy and the economic value of data itself. Unlike traditional machine learning, where all the data must be globally gathered for analysis, FL makes it possible to extract knowledge from data distributed across different organizations that can be coupled with different Machine Learning paradigms. In this work, we replicate, using Federated Learning, the analysis of a pooled dataset (with AdaBoost) that has been used to define the PRAISE score, which is today among the most accurate scores to evaluate the risk of a second acute myocardial infarction. We show that thanks to the extended-OpenFL framework, which implements AdaBoost.F, we can train a federated PRAISE model that exhibits comparable accuracy and recall as the centralised model. We achieved F1 and F2 scores which are consistently comparable to the PRAISE score study of a 16- parties federation but within an order of magnitude less time.},
keywords = {admire, ai, cardio, confidential, hpc4ai},
pubstate = {published},
tppubtype = {inproceedings}
}
2021
Marco Aldinucci, Giovanni Agosta, Antonio Andreini, Claudio A. Ardagna, Andrea Bartolini, Alessandro Cilardo, Biagio Cosenza, Marco Danelutto, Roberto Esposito, William Fornaciari, Roberto Giorgi, Davide Lengani, Raffaele Montella, Mauro Olivieri, Sergio Saponara, Daniele Simoni, Massimo Torquati
The Italian research on HPC key technologies across EuroHPC Proceedings Article
In: ACM Computing Frontiers, pp. 279–286, ACM, Virtual Conference, Italy, 2021.
Abstract | Links | BibTeX | Tags: admire, eupex, eupilot, textarossa
@inproceedings{21:CINI_acm_CF,
title = {The Italian research on HPC key technologies across EuroHPC},
author = {Marco Aldinucci and Giovanni Agosta and Antonio Andreini and Claudio A. Ardagna and Andrea Bartolini and Alessandro Cilardo and Biagio Cosenza and Marco Danelutto and Roberto Esposito and William Fornaciari and Roberto Giorgi and Davide Lengani and Raffaele Montella and Mauro Olivieri and Sergio Saponara and Daniele Simoni and Massimo Torquati},
url = {https://iris.unito.it/retrieve/handle/2318/1783118/744641/preprint.pdf},
doi = {10.1145/3457388.3458508},
year = {2021},
date = {2021-05-01},
booktitle = {ACM Computing Frontiers},
pages = {279–286},
publisher = {ACM},
address = {Virtual Conference, Italy},
abstract = {High-Performance Computing (HPC) is one of the strategic priorities for research and innovation worldwide due to its relevance for industrial and scientific applications. We envision HPC as composed of three pillars: infrastructures, applications, and key technologies and tools. While infrastructures are by construction centralized in large-scale HPC centers, and applications are generally within the purview of domain-specific organizations, key technologies fall in an intermediate case where coordination is needed, but design and development are often decentralized. A large group of Italian researchers has started a dedicated laboratory within the National Interuniversity Consortium for Informatics (CINI) to address this challenge. The laboratory, albeit young, has managed to succeed in its first attempts to propose a coordinated approach to HPC research within the EuroHPC Joint Undertaking, participating in the calls 2019-20 to five successful proposals for an aggregate total cost of 95M Euro. In this paper, we outline the working group's scope and goals and provide an overview of the five funded projects, which become fully operational in March 2021, and cover a selection of key technologies provided by the working group partners, highlighting their usage development within the projects.},
keywords = {admire, eupex, eupilot, textarossa},
pubstate = {published},
tppubtype = {inproceedings}
}
Talks
2024
Marco Edoardo Santimaria, Iacopo Colonnelli, Marco Aldinucci
Releasing the CAPIO middleware from MPI derived constraints Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: across, admire, capio, capiocl, eupex, icsc
@misc{24:santimaria:bighpc,
title = {Releasing the CAPIO middleware from MPI derived constraints},
author = {Marco Edoardo Santimaria and Iacopo Colonnelli and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/zrJGD4i36fWdp5g},
year = {2024},
date = {2024-09-01},
address = {Pisa, Italy},
abstract = {CAPIO is a middleware that transparently injects streaming capabilities into file-based workflows. However, its implementation is limited to HPC environments based on the MPI framework, significantly limiting its applications. This paper will illustrate a proposed architecture and some preliminary results aimed at investigating the usage of a distributed files system as a communication media for the CAPIO middleware, with the ultimate goal of supporting both CLOUD-based and HPC-based workflows.},
keywords = {across, admire, capio, capiocl, eupex, icsc},
pubstate = {published},
tppubtype = {misc}
}
Marco Edoardo Santimaria, Iacopo Colonnelli, Massimo Torquati, Marco Aldinucci
CAPIO: Cross Application Programamble IO Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: across, admire, capio, capiocl, eupex, icsc
@misc{24:santimaria:itadata:shpcpee,
title = {CAPIO: Cross Application Programamble IO},
author = {Marco Edoardo Santimaria and Iacopo Colonnelli and Massimo Torquati and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/rg6LWwrZXi6tTXm},
year = {2024},
date = {2024-09-01},
address = {Pisa, Italy},
abstract = {With the increasing amount of digital data available for analysis and simulation, the class of I/O-intensive HPC workflows is fated to expand, further exacerbating quickly the performance gap between computing, memory, and storage technologies. CAPIO (Cross-Application Programmable I/O), is a middleware capable of injecting I/O streaming capabilities into file-based workflows, improving the computation-I/O overlap without the need to change the application code. In this presentation, we will introduce the CAPIO-CL language with its semantics, as well as the implementation of the CAPIO-CL language through the CAPIO middleware. We will also provide some case studies of how CAPIO has been employed to improve workflow execution time as well as some future directions.},
keywords = {across, admire, capio, capiocl, eupex, icsc},
pubstate = {published},
tppubtype = {misc}
}
Marco Edoardo Santimaria
CAPIO-CL: Cross Application Programmable IO - Coordination Language Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: across, admire, capio, eupex, icsc
@misc{24:santimaria:hlpp:capiocl,
title = {CAPIO-CL: Cross Application Programmable IO - Coordination Language},
author = {Marco Edoardo Santimaria},
url = {https://datacloud.di.unito.it/index.php/s/zsKY3PWzX5NFCiX},
year = {2024},
date = {2024-07-01},
address = {Pisa, Italy},
abstract = {The performance bottleneck in file-based workflows remains a pressing issue in the realm of I/O-based workflows. To address this challenge, a novel annotation language has been developed. CAPIO-CL is positioned as an innovative I/O coordination language, enabling users to annotate data dependencies within file-based workflows with synchronization semantics pertinent to the involved files and directories. Through the information provided by the language, optimization opportunities arise in streaming and preemptive data movement. This paper serves to illustrate the semantics and syntax enabling CAPIO-CL to enhance the performance of in situ workflows without necessitating the rewriting or modification of the original workflow application steps. Finally, an analysis of CAPIO-CL is provided, taking into consideration both language expressiveness and application performance enhancement.},
keywords = {across, admire, capio, eupex, icsc},
pubstate = {published},
tppubtype = {misc}
}
2023
Barbara Cantalupo
The Software Heritage Analytics Framework Miscellaneous
Admire User Meeting, 2023, 2023.
Abstract | Links | BibTeX | Tags: admire
@misc{23:ADMIRE:UG-SHA,
title = {The Software Heritage Analytics Framework},
author = {Barbara Cantalupo},
url = {https://datacloud.di.unito.it/index.php/s/NbNDPaGZkDc3KyN},
year = {2023},
date = {2023-12-01},
address = {Barcelona Supercomputing Centre, Barcelona, Spain},
abstract = {A framework for analytics on top of Software Heritage},
howpublished = {Admire User Meeting, 2023},
keywords = {admire},
pubstate = {published},
tppubtype = {misc}
}
Simone Pernice
GreatNector Modelling Tool Miscellaneous
Admire User Meeting, 2023, 2023.
Abstract | Links | BibTeX | Tags: admire
@misc{23:ADMIRE:UG-GN,
title = {GreatNector Modelling Tool},
author = {Simone Pernice},
url = {https://datacloud.di.unito.it/index.php/s/NbNDPaGZkDc3KyN},
year = {2023},
date = {2023-12-01},
address = {Barcelona Supercomputing Centre, Barcelona, Spain},
abstract = {A tool for system modelling I/O queue behaviour},
howpublished = {Admire User Meeting, 2023},
keywords = {admire},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
Experimenting with Systems for Decentralized Machine Learning Miscellaneous
NVidia GTC 2023, 2023.
Abstract | Links | BibTeX | Tags: across, admire, epi, eumaster4hpc, eupex, eupilot, hpc4ai, space, textarossa
@misc{23:gtc:fl,
title = {Experimenting with Systems for Decentralized Machine Learning},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/oyLt7xwkbKxz65c},
year = {2023},
date = {2023-03-01},
abstract = {Decentralized machine learning (DML) enables collaborative machine learning without centralized input data. Federated learning (FL) and edge inference (EI) are examples of DML. Collaboration naturally happens at the edge of a distributed system with inherently distributed data. While tools for DML are starting to flourish, much needs to be done to get more flexible and portable tools to experiment with novel techniques, non-fully connected topologies, multiple data domains, and asynchronous collaboration schemes. We'll present recent advances in DML, aiming to improve usability in data centers and, at the edge, to widen the class of models extending FL to non-DNN paradigms, to improve the accuracy of models controlling normalization and frequency of communications, and to boost data privacy though generative adversarial networks. Prerequisites: Intermediate understanding of machine learning methods and distributed & parallel computing.},
howpublished = {NVidia GTC 2023},
keywords = {across, admire, epi, eumaster4hpc, eupex, eupilot, hpc4ai, space, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
HPC4AI: The Research on AI beyond the public cloud Miscellaneous
CENTAI kick-off meeting, 2023.
Links | BibTeX | Tags: across, admire, brainteaser, epi, eumaster4hpc, eupex, eupilot, hpc4ai, space, textarossa
@misc{23:CENTAI:hpc4ai,
title = {HPC4AI: The Research on AI beyond the public cloud},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/PZXjPm8sfKTmTGb},
year = {2023},
date = {2023-03-01},
address = {Torino, Italy},
howpublished = {CENTAI kick-off meeting},
keywords = {across, admire, brainteaser, epi, eumaster4hpc, eupex, eupilot, hpc4ai, space, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
From HPC4AI to ICSC living lab: Where systems are the research Miscellaneous
Dell Advanced Computing Workshop 2023: HPC and Beyond, 2023.
Links | BibTeX | Tags: admire, eupex, eupilot, hpc4ai, textarossa
@misc{23:Dell:hpc4ai,
title = {From HPC4AI to ICSC living lab: Where systems are the research},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/M5QRJyDxyxokcfL},
year = {2023},
date = {2023-02-01},
address = {Bologna, Italy},
howpublished = {Dell Advanced Computing Workshop 2023: HPC and Beyond},
keywords = {admire, eupex, eupilot, hpc4ai, textarossa},
pubstate = {published},
tppubtype = {misc}
}
2022
Marco Aldinucci
Il calcolo parallelo: una storia di metodi e algoritmi raccontata dalle macchine Miscellaneous
Olimpiadi di Informatica, 2022, (Invited talk).
Abstract | Links | BibTeX | Tags: across, admire, eumaster4hpc, eupex, eupilot, textarossa
@misc{22:olimpiadi:cs,
title = {Il calcolo parallelo: una storia di metodi e algoritmi raccontata dalle macchine},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/7ZdfLkn3NetzXCN},
year = {2022},
date = {2022-09-01},
address = {Biella, Italy},
abstract = {Lectio Magistralis alle finali nazionali delle Olimpiadi di Informatica 2022},
howpublished = {Olimpiadi di Informatica},
note = {Invited talk},
keywords = {across, admire, eumaster4hpc, eupex, eupilot, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
La convergenza HPC-cloud è l'anello mancante tra il calcolo scientifico e l'IA applicata Miscellaneous
Intelligenza Artificiale e Business Applications, 2022, (Invited talk).
Abstract | Links | BibTeX | Tags: across, admire, eumaster4hpc, eupex, eupilot, textarossa
@misc{22:soiel:ai,
title = {La convergenza HPC-cloud è l'anello mancante tra il calcolo scientifico e l'IA applicata},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/xCQSqJ8bCKCXMK9},
year = {2022},
date = {2022-09-01},
address = {Virtual event},
abstract = {Innanzitutto, le infrastrutture HPC stanno adottando le GPU per il loro rapporto prestazioni per watt superiore rispetto ai multicore generici. In secondo luogo, i flussi di lavoro scientifici di prossima generazione stanno integrando passaggi basati sull'intelligenza artificiale per la loro precisione nell'approssimazione e nell'analisi di fenomeni complessi. In terzo luogo, l'IA e in particolare il Machine Learning (ML) rappresentano un carico di lavoro perfetto per le GPU in termini di prestazioni e tempo di sviluppo. Oggi non possiamo ancora chiudere il cerchio eseguendo senza problemi carichi di lavoro scientifici abilitati all'intelligenza artificiale nelle infrastrutture HPC perché il loro software di sistema e gli strumenti di sviluppo non sono progettati per i carichi di lavoro moderni, come i framework ML progettati per il cloud. È probabile che la convergenza HPC-cloud colmi il divario. Nel talk verranno presentate le infrastrutture e gli strumenti sviluppati all'Università di Torino per la convergenza HPC-cloud (es. HPC4AI, StreamFlow, CAPIO, Jupyter-workflow) e come sono stati utilizzati per le applicazioni di intelligenza artificiale, come la diagnosi spiegabile di polmonite COVID-19 e la tutela della privacy AI. L'esperienza maturata nella progettazione e gestione di HPC4AI costituisce il cuore della progettazione del laboratorio di contaminazione del "FutureHPC" di Torino secondo il Centro Nazionale "HPC, BigData e Quantum Computing" finanziato dal PNRR con 320M€ che dovrebbe essere operativo dal 1 settembre 2022. L'obiettivo finale del laboratorio di contaminazione è sviluppare relazioni e collaborazioni tra industria e università.},
howpublished = {Intelligenza Artificiale e Business Applications},
note = {Invited talk},
keywords = {across, admire, eumaster4hpc, eupex, eupilot, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli, Barbara Cantalupo, Doriana Medić, Marco Aldinucci
Hybrid workflows for heterogeneous distributed computing Miscellaneous
3rd Italian Workshop on HPC (ITWSHPC), 2022.
Links | BibTeX | Tags: across, admire, eumaster4hpc, eupex, eupilot, textarossa
@misc{22:itwshpc,
title = {Hybrid workflows for heterogeneous distributed computing},
author = {Iacopo Colonnelli and Barbara Cantalupo and Doriana Medić and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/ienbcA2DJ26aioE},
year = {2022},
date = {2022-09-01},
address = {Torino, Italy},
howpublished = {3rd Italian Workshop on HPC (ITWSHPC)},
keywords = {across, admire, eumaster4hpc, eupex, eupilot, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli, Marco Aldinucci
CINI HPC-KTT: HPC Key Technologies and Tools National Lab Miscellaneous
NVIDIA HPC Roundtable, 2022, (Invited talk).
Links | BibTeX | Tags: across, admire, eumaster4hpc, eupex, eupilot, textarossa
@misc{22:nvidia_hpc_roundtable,
title = {CINI HPC-KTT: HPC Key Technologies and Tools National Lab},
author = {Iacopo Colonnelli and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/9EQniZ2dGzdJ26f},
year = {2022},
date = {2022-09-01},
address = {Casalecchio di Reno, Italy},
howpublished = {NVIDIA HPC Roundtable},
note = {Invited talk},
keywords = {across, admire, eumaster4hpc, eupex, eupilot, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
EuroHPC and the Italian HPC ecosystem Miscellaneous
Critical Infrastructure Protection Forum - EuroCC Romania, 2022, (Invited talk).
Abstract | Links | BibTeX | Tags: across, admire, eumaster4hpc, eupex, eupilot, icsc, textarossa
@misc{22:cip:romania,
title = {EuroHPC and the Italian HPC ecosystem},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/5dFFoNsZzwTzQkn},
year = {2022},
date = {2022-06-01},
address = {Bucharest, Romania},
abstract = {The talk presents the main investments currently ongoing in Italy in the HPC area as well as the activity of Italian stakeholders within EuroHPC. The novel Italian National Centre on HPC (ICSC) is introduced.},
howpublished = {Critical Infrastructure Protection Forum - EuroCC Romania},
note = {Invited talk},
keywords = {across, admire, eumaster4hpc, eupex, eupilot, icsc, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
The Italian HPC ecosystem and the next generation of EuroHPC CoE Miscellaneous
EuroHPC EoCoE final summit, 2022, (Invited talk).
Abstract | Links | BibTeX | Tags: across, admire, eumaster4hpc, eupex, eupilot, icsc, textarossa
@misc{22:eocoe:summit,
title = {The Italian HPC ecosystem and the next generation of EuroHPC CoE},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/AH5Ms3NekeoEooB},
year = {2022},
date = {2022-06-01},
address = {Napoli, Italy},
abstract = {The talk presents the main investments currently ongoing in Italy in the HPC area as well as the activity of Italian stakeholders within EuroHPC. The novel Italian National Centre on HPC (ICSC) is introduced.},
howpublished = {EuroHPC EoCoE final summit},
note = {Invited talk},
keywords = {across, admire, eumaster4hpc, eupex, eupilot, icsc, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
From small files to no files Miscellaneous
6th Workshop on Performance and Scalability of Storage Systems, 2022, (Invited talk).
Abstract | Links | BibTeX | Tags: admire, eupex
@misc{22:p3s:capio,
title = {From small files to no files},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/KLDi87xQmX86iXg},
year = {2022},
date = {2022-06-01},
address = {Paris, France},
abstract = {Modern distributed high-performance storage systems saturate the network bandwidth, and the margins for improvement at the software level are tiny. Due to metadata access, they might be troubled with massive access to small files. An example is the Software Heritage (SH) dataset, half petabytes of files with an average size of 3kBytes (Terabytes of metadata). While working with SH, we developed the idea of substituting files with in-memory streams. We did it living in dread with the fear of asking application programmers to rewrite their lovely antique legacy code exploiting the POSIX interface, and up to now, we did not. In the talk, we will introduce CAPIO (Cross-Application Programmable I/O) design principles and the current state of development of the prototype.},
howpublished = {6th Workshop on Performance and Scalability of Storage Systems},
note = {Invited talk},
keywords = {admire, eupex},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
Cognitive continuum: a game theoretical approach Miscellaneous
HiPEAC Vision meeting, Brussels, 16 May 2022, 2022.
Abstract | Links | BibTeX | Tags: across, admire, brainteaser, eumaster4hpc, eupex, eupilot, textarossa
@misc{22:hipeacvision:fl,
title = {Cognitive continuum: a game theoretical approach},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/453HWfmrQyo7j9E},
year = {2022},
date = {2022-05-01},
address = {Brussels, Belgium},
abstract = {Cognitive continuum: a game theoretical approach, (maybe) data operations are too basic: read, write, copy, remove … The talk is aimed to contribute to the forthcoming HiPEAC Vision document},
howpublished = {HiPEAC Vision meeting, Brussels, 16 May 2022},
keywords = {across, admire, brainteaser, eumaster4hpc, eupex, eupilot, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
Da HPC4AI al living lab dello spoke FutureHPC del Centro Nazionale HPC Miscellaneous
Condivisioni, Conferenza GARR 2022, 2022, (Keynote talk).
Abstract | Links | BibTeX | Tags: across, admire, eumaster4hpc, eupex, hpc4ai, icsc, textarossa
@misc{22:garr,
title = {Da HPC4AI al living lab dello spoke FutureHPC del Centro Nazionale HPC},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/P3KSroSSmrRxZMc},
year = {2022},
date = {2022-05-01},
address = {Palermo, Italy},
abstract = {HPC4AI is an open-access laboratory of the University of Turin open to researchers, students and companies that manages a double pair of systems: a production cloud-HPC system and its twin dedicated to development. The cloud-HPC system is implemented thanks to an extended version of the GARR cloud (OpenStack) and the SLURM workload manager. HPC4AI is specifically designed to support system software development and cloud-HPC convergence tools. Among these streamflow (WMS), jupyter-as-a-service (SaaS), portable-secure-tenant (PasS). The experience gained in the design and management of HPC4AI forms the heart of the design of the livinglab of the Turin "FutureHPC" spoke of the National Center "HPC, BigData and Quantum Computing" funded by the PNRR which should be operational from September 2022.},
howpublished = {Condivisioni, Conferenza GARR 2022},
note = {Keynote talk},
keywords = {across, admire, eumaster4hpc, eupex, hpc4ai, icsc, textarossa},
pubstate = {published},
tppubtype = {misc}
}
2021
Marco Aldinucci
The modernization of HPC applications for the cloud era Miscellaneous
Fifth EAGE Workshop on High Performance Computing for Upstream, 2021, (Keynote talk).
Abstract | BibTeX | Tags: across, admire, deephealth, streamflow
@misc{21:eni:streamflow,
title = {The modernization of HPC applications for the cloud era},
author = {Marco Aldinucci},
year = {2021},
date = {2021-09-01},
address = {Virtual event},
abstract = {Workflows are among the most commonly used tools in a variety of execution environments. Many of them target a specific environment; few of them make it possible to execute an entire workflow in different environments, e.g., clouds, supercomputers, and both of them. We present a novel approach to workflow execution, called StreamFlow, that complements the workflow graph with the declarative description of potentially complex execution environments (such as Kubernetes and SLURM), making it possible to execute onto multiple sites not sharing a common data space. Streamflow clearly distinguishes it from many other workflow management systems because it decouples the data dependencies from the deployment of (containerized) workflow steps. Streamflow also leverages CAPIO (Cross-Application Programmable I/O) to move data from one step to another efficiently. CAPIO captures the POSIX file system and streams it in parallel and in-memory to the workflow's next step, possibly enabling in-transit data filtering.},
howpublished = {Fifth EAGE Workshop on High Performance Computing for Upstream},
note = {Keynote talk},
keywords = {across, admire, deephealth, streamflow},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
From skeletons to workflows in the cloud-edge era Miscellaneous
14th Intl. Symposium on High-Level Programming and Applications (HLPP), 2021, (Keynote talk).
Abstract | Links | BibTeX | Tags: across, admire, deephealth, streamflow
@misc{21:hlpp:streamflow,
title = {From skeletons to workflows in the cloud-edge era},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/RyRPjNBse5PKnab},
year = {2021},
date = {2021-07-01},
address = {Virtual event},
abstract = {Workflows are among the most commonly used tools in a variety of execution environments. Many of them target a specific environment; few of them make it possible to execute an entire workflow in different environments, e.g. Kubernetes and batch clusters. We present a novel approach to workflow execution, called StreamFlow, that complements the workflow graph with the declarative description of potentially complex execution environments and that makes it possible to execute multiple sites not sharing a common data space. StreamFlow supports both task and data parallelism and enables the reproducible and scalable execution of workflows, such as AI pipelines, in hybrid cloud-HPC environments. As a running example, we use the novel ``universal COVID-19 pipeline'' that explore the whole optimisation space of the training of different DNNs to classify COVID-19 lung lesions.},
howpublished = {14th Intl. Symposium on High-Level Programming and Applications (HLPP)},
note = {Keynote talk},
keywords = {across, admire, deephealth, streamflow},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
Reproducibility in the AI era Miscellaneous
Penta Scientific Meeting, 2021.
Abstract | Links | BibTeX | Tags: across, admire, deephealth
@misc{21:penta:covid,
title = {Reproducibility in the AI era},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/GLpf7kKSJRH733A},
year = {2021},
date = {2021-07-01},
address = {Virtual event},
abstract = {TBD},
howpublished = {Penta Scientific Meeting},
keywords = {across, admire, deephealth},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
The Italian research on HPC key technologies across EuroHPC Miscellaneous
2021.
Abstract | Links | BibTeX | Tags: across, admire, eupex, eupilot, textarossa
@misc{21:CINI_acm_CF_talk,
title = {The Italian research on HPC key technologies across EuroHPC},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/3ZYmDbEm84rbB9k},
year = {2021},
date = {2021-05-01},
booktitle = {ACM Computing Frontiers},
publisher = {ACM},
address = {Virtual Conference, Italy},
abstract = {High-Performance Computing (HPC) is one of the strategic priorities for research and innovation worldwide due to its relevance for industrial and scientific applications. We envision HPC as composed of three pillars: infrastructures, applications, and key technologies and tools. While infrastructures are by construction centralized in large-scale HPC centers, and applications are generally within the purview of domain-specific organizations, key technologies fall in an intermediate case where coordination is needed, but design and development are often decentralized. A large group of Italian researchers has started a dedicated laboratory within the National Interuniversity Consortium for Informatics (CINI) to address this challenge. The laboratory, albeit young, has managed to succeed in its first attempts to propose a coordinated approach to HPC research within the EuroHPC Joint Undertaking, participating in the calls 2019-20 to five successful proposals for an aggregate total cost of 95M Euro. In this paper, we outline the working group's scope and goals and provide an overview of the five funded projects, which become fully operational in March 2021, and cover a selection of key technologies provided by the working group partners, highlighting their usage development within the projects.},
keywords = {across, admire, eupex, eupilot, textarossa},
pubstate = {published},
tppubtype = {misc}
}
TEXTAROSSA (EC H2020 RIA, EuroHPC-01-2019): Towards EXtreme scale Technologies and Accelerators for euROhpc hw/Sw Supercomputing Applications for exascale (2021, 36 months, total cost 6M€, G.A. n. 956831)
Publications
2023
Amirmasoud Ghiassi, Robert Birke, Lydia Chen
Robust Learning via Golden Symmetric Loss of (un)Trusted Labels Proceedings Article
In: SDM '23: SIAM International Conference on Data Mining, pp. 568–576, 2023.
Abstract | Links | BibTeX | Tags: textarossa
@inproceedings{sdm-ghiassi23,
title = {Robust Learning via Golden Symmetric Loss of (un)Trusted Labels},
author = {Amirmasoud Ghiassi and Robert Birke and Lydia Chen},
url = {https://datacloud.di.unito.it/index.php/s/b6z3moNLxnNiCxz},
doi = {10.1137/1.9781611977653.ch64},
year = {2023},
date = {2023-01-01},
booktitle = {SDM '23: SIAM International Conference on Data Mining},
pages = {568–576},
abstract = {Learning robust deep models against noisy labels becomes ever critical when today's data is commonly collected from open platforms and subject to adversarial corruption. The information on the label corruption process, i.e., corruption matrix, can greatly enhance the robustness of deep models but still fall behind in combating hard classes. In this paper, we propose to construct a golden symmetric loss (GSL) based on the estimated corruption matrix as to avoid overfitting to noisy labels and learn effectively from hard classes. GSL is the weighted sum of the corrected regular cross entropy and reverse cross entropy. By leveraging a small fraction of trusted clean data, we estimate the corruption matrix and use it to correct the loss as well as to determine the weights of GSL. We theoretically prove the robustness of the proposed loss function in the presence of dirty labels. We provide a heuristics to adaptively tune the loss weights of GSL according to the noise rate and diversity measured from the dataset. We evaluate our proposed golden symmetric loss on both vision and natural language deep models subject to different types of label noise patterns. Empirical results show that GSL can significantly outperform the existing robust training methods on different noise patterns, showing accuracy improvement up to 18% on CIFAR-100 and 1% on real world noisy dataset of Clothing1M.},
keywords = {textarossa},
pubstate = {published},
tppubtype = {inproceedings}
}
2022
Giovanni Agosta, Marco Aldinucci, Carlos Alvarez, Roberto Ammendola, Yasir Arfat, Olivier Beaumont, Massimo Bernaschi, Andrea Biagioni, Tommaso Boccali, Berenger Bramas, Carlo Brandolese, Barbara Cantalupo, Mauro Carrozzo, Daniele Cattaneo, Alessandro Celestini, Massimo Celino, Iacopo Colonnelli, Paolo Cretaro, Pasqua D'Ambra, Marco Danelutto, Roberto Esposito, Lionel Eyraud-Dubois, Antonio Filgueras, William Fornaciari, Ottorino Frezza, Andrea Galimberti, Francesco Giacomini, Brice Goglin, Daniele Gregori, Abdou Guermouche, Francesco Iannone, Michal Kulczewski, Francesca Lo Cicero, Alessandro Lonardo, Alberto R. Martinelli, Michele Martinelli, Xavier Martorell, Giuseppe Massari, Simone Montangero, Gianluca Mittone, Raymond Namyst, Ariel Oleksiak, Paolo Palazzari, Pier Stanislao Paolucci, Federico Reghenzani, Cristian Rossi, Sergio Saponara, Francesco Simula, Federico Terraneo, Samuel Thibault, Massimo Torquati, Matteo Turisini, Piero Vicini, Miquel Vidal, Davide Zoni, Giuseppe Zummo
Towards EXtreme scale technologies and accelerators for euROhpc hw/Sw supercomputing applications for exascale: The TEXTAROSSA approach Journal Article
In: Microprocessors and Microsystems, vol. 95, pp. 104679, 2022, ISSN: 0141-9331.
Abstract | Links | BibTeX | Tags: textarossa
@article{textarossa2022micpro:,
title = {Towards EXtreme scale technologies and accelerators for euROhpc hw/Sw supercomputing applications for exascale: The TEXTAROSSA approach},
author = {Giovanni Agosta and Marco Aldinucci and Carlos Alvarez and Roberto Ammendola and Yasir Arfat and Olivier Beaumont and Massimo Bernaschi and Andrea Biagioni and Tommaso Boccali and Berenger Bramas and Carlo Brandolese and Barbara Cantalupo and Mauro Carrozzo and Daniele Cattaneo and Alessandro Celestini and Massimo Celino and Iacopo Colonnelli and Paolo Cretaro and Pasqua D'Ambra and Marco Danelutto and Roberto Esposito and Lionel Eyraud-Dubois and Antonio Filgueras and William Fornaciari and Ottorino Frezza and Andrea Galimberti and Francesco Giacomini and Brice Goglin and Daniele Gregori and Abdou Guermouche and Francesco Iannone and Michal Kulczewski and Francesca Lo Cicero and Alessandro Lonardo and Alberto R. Martinelli and Michele Martinelli and Xavier Martorell and Giuseppe Massari and Simone Montangero and Gianluca Mittone and Raymond Namyst and Ariel Oleksiak and Paolo Palazzari and Pier Stanislao Paolucci and Federico Reghenzani and Cristian Rossi and Sergio Saponara and Francesco Simula and Federico Terraneo and Samuel Thibault and Massimo Torquati and Matteo Turisini and Piero Vicini and Miquel Vidal and Davide Zoni and Giuseppe Zummo},
doi = {10.1016/j.micpro.2022.104679},
issn = {0141-9331},
year = {2022},
date = {2022-01-01},
journal = {Microprocessors and Microsystems},
volume = {95},
pages = {104679},
abstract = {In the near future, Exascale systems will need to bridge three technology gaps to achieve high performance while remaining under tight power constraints: energy efficiency and thermal control; extreme computation efficiency via HW acceleration and new arithmetic; methods and tools for seamless integration of reconfigurable accelerators in heterogeneous HPC multi-node platforms. TEXTAROSSA addresses these gaps through a co-design approach to heterogeneous HPC solutions, supported by the integration and extension of HW and SW IPs, programming models, and tools derived from European research.},
keywords = {textarossa},
pubstate = {published},
tppubtype = {article}
}
2021
Giovanni Agosta, William Fornaciari, Andrea Galimberti, Giuseppe Massari, Federico Reghenzani, Federico Terraneo, Davide Zoni, Carlo Brandolese, Massimo Celino, Francesco Iannone, Paolo Palazzari, Giuseppe Zummo, Massimo Bernaschi, Pasqua D'Ambra, Sergio Saponara, Marco Danelutto, Massimo Torquati, Marco Aldinucci, Yasir Arfat, Barbara Cantalupo, Iacopo Colonnelli, Roberto Esposito, Alberto Riccardo Martinelli, Gianluca Mittone, Olivier Beaumont, Berenger Bramas, Lionel Eyraud-Dubois, Brice Goglin, Abdou Guermouche, Raymond Namyst, Samuel Thibault, Antonio Filgueras, Miquel Vidal, Carlos Alvarez, Xavier Martorell, Ariel Oleksiak, Michal Kulczewski, Alessandro Lonardo, Piero Vicini, Francesco Lo Cicero, Francesco Simula, Andrea Biagioni, Paolo Cretaro, Ottorino Frezza, Pier Stanislao Paolucci, Matteo Turisini, Francesco Giacomini, Tommaso Boccali, Simone Montangero, Roberto Ammendola
TEXTAROSSA: Towards EXtreme scale Technologies and Accelerators for euROhpc hw/Sw Supercomputing Applications for exascale Proceedings Article
In: Proc. of the 24th Euromicro Conference on Digital System Design (DSD), IEEE, Palermo, Italy, 2021.
Abstract | Links | BibTeX | Tags: streamflow, textarossa
@inproceedings{21:DSD:textarossa,
title = {TEXTAROSSA: Towards EXtreme scale Technologies and Accelerators for euROhpc hw/Sw Supercomputing Applications for exascale},
author = {Giovanni Agosta and William Fornaciari and Andrea Galimberti and Giuseppe Massari and Federico Reghenzani and Federico Terraneo and Davide Zoni and Carlo Brandolese and Massimo Celino and Francesco Iannone and Paolo Palazzari and Giuseppe Zummo and Massimo Bernaschi and Pasqua D'Ambra and Sergio Saponara and Marco Danelutto and Massimo Torquati and Marco Aldinucci and Yasir Arfat and Barbara Cantalupo and Iacopo Colonnelli and Roberto Esposito and Alberto Riccardo Martinelli and Gianluca Mittone and Olivier Beaumont and Berenger Bramas and Lionel Eyraud-Dubois and Brice Goglin and Abdou Guermouche and Raymond Namyst and Samuel Thibault and Antonio Filgueras and Miquel Vidal and Carlos Alvarez and Xavier Martorell and Ariel Oleksiak and Michal Kulczewski and Alessandro Lonardo and Piero Vicini and Francesco Lo Cicero and Francesco Simula and Andrea Biagioni and Paolo Cretaro and Ottorino Frezza and Pier Stanislao Paolucci and Matteo Turisini and Francesco Giacomini and Tommaso Boccali and Simone Montangero and Roberto Ammendola},
doi = {10.1109/DSD53832.2021.00051},
year = {2021},
date = {2021-08-01},
booktitle = {Proc. of the 24th Euromicro Conference on Digital System Design (DSD)},
publisher = {IEEE},
address = {Palermo, Italy},
abstract = {To achieve high performance and high energy effi- ciency on near-future exascale computing systems, three key technology gaps needs to be bridged. These gaps include: en- ergy efficiency and thermal control; extreme computation effi- ciency via HW acceleration and new arithmetics; methods and tools for seamless integration of reconfigurable accelerators in heterogeneous HPC multi-node platforms. TEXTAROSSA aims at tackling this gap through a co-design approach to heterogeneous HPC solutions, supported by the integration and extension of HW and SW IPs, programming models and tools derived from European research.},
keywords = {streamflow, textarossa},
pubstate = {published},
tppubtype = {inproceedings}
}