Talks | Parallel Computing
2024
Petr Taborsky, Iacopo Colonnelli, Krzysztof Kurowski, Rakesh Sarma, Niels Henrik Pontoppidan, Branislav Jansík, Nicki Skafte Detlefsen, Jens Egholm Pedersen, Rasmus Larsen, Lars Kai Hansen
Towards a European AI Platform Miscellaneous
2nd EuroHPC User Day, 2024.
@misc{24:colonnelli:eurohpc-user-day,
title = {Towards a European AI Platform},
author = {Petr Taborsky and Iacopo Colonnelli and Krzysztof Kurowski and Rakesh Sarma and Niels Henrik Pontoppidan and Branislav Jansík and Nicki Skafte Detlefsen and Jens Egholm Pedersen and Rasmus Larsen and Lars Kai Hansen},
url = {https://datacloud.di.unito.it/index.php/s/nbk6YiABGsZXPp4},
year = {2024},
date = {2024-10-01},
address = {Amsterdam, Netherlands},
howpublished = {2nd EuroHPC User Day},
keywords = {ai},
pubstate = {published},
tppubtype = {misc}
}
Roberto Esposito Mirko Polato Samuele Fonio
FedHP: Federated Learning with Hyperspherical Prototypical Regularization Miscellaneous
32nd European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning, (ESANN), 2024.
Abstract | Links | BibTeX | Tags: ai, fl, icsc
@misc{24:esann:fedhp,
title = {FedHP: Federated Learning with Hyperspherical Prototypical Regularization},
author = {Roberto Esposito Mirko Polato Samuele Fonio},
url = {https://datacloud.di.unito.it/index.php/s/fKyKSSFQKT3LTxW},
year = {2024},
date = {2024-10-01},
address = {Bruges, Belgium},
abstract = {This paper introduces FedHP, an innovative algorithm that integrates federated learning, hyperspherical geometries, and prototype learning. Federated Learning (FL) has gained prominence as a privacy- preserving method for building robust models across distributed datasets. Traditionally, FL exchanges model parameters to maintain data privacy; however, in scenarios with expensive data communication, exchanging large neural network models becomes impractical. In such cases, prototype learning offers a viable solution by facilitating the exchange of only a few prototypes. Motivated by these considerations, our approach capitalizes on recent advancements in prototype learning, particularly the advantages offered by non-Euclidean geometries. In addition to presenting FedHP, we offer empirical evidence demonstrating its comparability to other state-of- the-art approaches while significantly reducing communication costs.},
howpublished = {32nd European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning, (ESANN)},
keywords = {ai, fl, icsc},
pubstate = {published},
tppubtype = {misc}
}
Giulio Malenza
Exploring energy consumption of AI frameworks on a 64-core RV64 Server CPU Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: ai, DYMAN, icsc
@misc{24:gmalenza:scihpcexa,
title = {Exploring energy consumption of AI frameworks on a 64-core RV64 Server CPU},
author = {Giulio Malenza},
url = {https://datacloud.di.unito.it/index.php/s/5aTdyzNB6n9CREq},
year = {2024},
date = {2024-09-01},
address = {Pisa, Italy},
abstract = {In today's era of rapid technological advancement, artificial intelligence (AI) applications require large-scale, high-performance, and data-intensive computations, leading to significant energy demands. Addressing this challenge necessitates a combined approach involving both hardware and software innovations. Hardware manufacturers are developing new, efficient, and specialized solutions, with the RISC-V architecture emerging as a prominent player due to its open, extensible, and energy-efficient instruction set architecture (ISA). Simultaneously, software developers are creating new algorithms and frameworks,
yet their energy efficiency often remains unclear.
In this study, we conduct a comprehensive benchmark analysis of machine learning (ML) applications on the 64-core SOPHON SG2042 RISC-V architecture. Specifically, we examine the energy consumption of deep learning inference models across various AI frameworks. By comparing the performance of different frameworks, we aim to provide a detailed understanding of how these frameworks can optimize energy consumption on this architecture.},
keywords = {ai, DYMAN, icsc},
pubstate = {published},
tppubtype = {misc}
}
yet their energy efficiency often remains unclear.
In this study, we conduct a comprehensive benchmark analysis of machine learning (ML) applications on the 64-core SOPHON SG2042 RISC-V architecture. Specifically, we examine the energy consumption of deep learning inference models across various AI frameworks. By comparing the performance of different frameworks, we aim to provide a detailed understanding of how these frameworks can optimize energy consumption on this architecture.
Gianluca Mittone
Benchmarking HPC Performance for State-of-the-Art AI Workloads Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: ai, eupilot, icsc
@misc{24:mittone:itadata:shpcpee,
title = {Benchmarking HPC Performance for State-of-the-Art AI Workloads},
author = {Gianluca Mittone},
url = {https://datacloud.di.unito.it/index.php/s/5Ep3W7cPW5baZfr},
year = {2024},
date = {2024-09-01},
address = {Pisa, Italy},
abstract = {Benchmarking the performance of modern High-Performance Computing (HPC) infrastructure on Artificial Intelligence (AI) workloads is a hot topic in the supercomputing community. While research communities and big-tech companies actively invest in larger, more powerful data centres to support AI research, the standard computational performance benchmarking tools (e.g., LINPACK) are increasingly becoming outdated since they are not specifically tailored for AI workloads. Some tools, such as MLPerf, are trying to bridge this gap, but the HPC community still has not adopted them as standards. Since this trend became particularly evident with the advent of Large Language Models (LLMs), this work will delve into LLM training at scale as a way to benchmark Top500 HPC infrastructures on current AI workloads. The scalability performances of a major LLM model (i.e., Meta's LLaMA) on different HPCs (Leonardo, LUMI, MeluXina, Karolina) are exposed and discussed along with their Top500 positioning.
However, it should be noted that state-of-the-art LLM models are not trained on thousands of computing nodes but on hundreds. This choice is due to multiple factors, such as the influence of the training scaling on the model's convergence and the instability of large-scale deployments due to hardware/software failure. A benchmarking approach based on the next-generation LLM training approach is proposed to bypass all these issues. State-of-the-art LLMs are not monolithic structures but Mixture-of-Experts (MoE) models; this design implies innovative frontiers for the distributed training of such models due to the experts' training being potentially more parallelisable than a single monolithic model. We thus propose to create an AI-oriented HPC benchmark suite based on the parallel training of MoE models to measure the throughput performance of HPC systems on state-of-the-art AI workloads.},
keywords = {ai, eupilot, icsc},
pubstate = {published},
tppubtype = {misc}
}
However, it should be noted that state-of-the-art LLM models are not trained on thousands of computing nodes but on hundreds. This choice is due to multiple factors, such as the influence of the training scaling on the model's convergence and the instability of large-scale deployments due to hardware/software failure. A benchmarking approach based on the next-generation LLM training approach is proposed to bypass all these issues. State-of-the-art LLMs are not monolithic structures but Mixture-of-Experts (MoE) models; this design implies innovative frontiers for the distributed training of such models due to the experts' training being potentially more parallelisable than a single monolithic model. We thus propose to create an AI-oriented HPC benchmark suite based on the parallel training of MoE models to measure the throughput performance of HPC systems on state-of-the-art AI workloads.
Samuele Fonio Bruno Casella Oussama Harrak
Federated Adaboost for Survival Analysis Miscellaneous
European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases, 2nd Workshop on Advancements in Federated Learning (WAFL), 2024.
Abstract | Links | BibTeX | Tags: ai, epi, fl, icsc
@misc{23:ecmlpkdd:fedsurvboost,
title = {Federated Adaboost for Survival Analysis},
author = {Samuele Fonio Bruno Casella Oussama Harrak},
url = {https://datacloud.di.unito.it/index.php/s/DtXiQfne6BEC235},
year = {2024},
date = {2024-09-01},
address = {Vilnius, Lithuania},
abstract = {This work proposes FedSurvBoost, a federated learning pipeline for survival analysis based on the AdaBoost.F algorithm, which iteratively aggregates the best local weak hypotheses. Our method extends AdaBoost.F by removing the dependence on the number of classes coefficient from the computation of the weights of the best model. This makes it suitable for regression tasks, such as survival analysis. We show the effectiveness of our approach by comparing it with state-of-the-art methods, specifically developed for survival analysis problems, on two common survival datasets.},
howpublished = {European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases, 2nd Workshop on Advancements in Federated Learning (WAFL)},
keywords = {ai, epi, fl, icsc},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
From HPC4AI to Software & Integration living lab to innovation Miscellaneous
HPC as an enabling platform for AI event, 2024.
Abstract | Links | BibTeX | Tags: ai, HPC, icsc
@misc{24:ma:hpcai:talk,
title = {From HPC4AI to Software & Integration living lab to innovation},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/3SS3Xa9XorN6D9o},
year = {2024},
date = {2024-06-01},
address = {Torino, Italy},
abstract = {The talk presents the motivation and the activity of the "Software and Integration" lab at UNITO.},
howpublished = {HPC as an enabling platform for AI event},
keywords = {ai, HPC, icsc},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
Software & Integration lab of FutureHPC spoke Miscellaneous
HPC as an enabling platform for AI event, 2024.
Abstract | Links | BibTeX | Tags: ai, HPC, icsc
@misc{24:ma:swi:lab,
title = {Software & Integration lab of FutureHPC spoke},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/WDjyXCGyYFJDQSd},
year = {2024},
date = {2024-06-01},
address = {Torino, Italy},
abstract = {The presentation describes the main the activity of the "Software and Integration" lab at UNITO across its main flagship codes.},
howpublished = {HPC as an enabling platform for AI event},
keywords = {ai, HPC, icsc},
pubstate = {published},
tppubtype = {misc}
}
Robert Birke
FLaaS: Federated Learning as a Service Miscellaneous
ICSC - Spoke 1 meeting, 2024.
Abstract | Links | BibTeX | Tags: ai, icsc
@misc{24:icsc:spoke1:ifab,
title = {FLaaS: Federated Learning as a Service},
author = {Robert Birke},
url = {https://datacloud.di.unito.it/index.php/s/yHXdTnC8xEqoJ6Y},
year = {2024},
date = {2024-02-01},
address = {Torino, Italy},
abstract = {Presentation about the Innovation Grant in collaboration with IFAB},
howpublished = {ICSC - Spoke 1 meeting},
keywords = {ai, icsc},
pubstate = {published},
tppubtype = {misc}
}
Robert Birke
The impact of the advances in generative models on applications and systems Miscellaneous
8th GDR RSD / ASF Winter School on Distributed Systems & Networks 2024, 2024, (Keynote talk).
Abstract | Links | BibTeX | Tags: ai, eupilot, textarossa
@misc{24:ASF:WINTER,
title = {The impact of the advances in generative models on applications and systems},
author = {Robert Birke},
url = {https://datacloud.di.unito.it/index.php/s/QYTCMfWp4sY5qx4},
year = {2024},
date = {2024-01-01},
address = {Le Pleynet, France},
abstract = {Generative models have achieved unprecedented quality levels across a wide range of data types. This advance often stems from the ever increasing data and compute used to train larger and larger models. One major use case of such synthetic data is in privacy-compliant data sharing. Gartner predicts that synthetic data will reduce by 2025 the need for real data by 70% for analytics and machine learning. We will look at generative models, with a special focus on tabular data, and the issue of democratization of large model training.},
howpublished = {8th GDR RSD / ASF Winter School on Distributed Systems & Networks 2024},
note = {Keynote talk},
keywords = {ai, eupilot, textarossa},
pubstate = {published},
tppubtype = {misc}
}
2023
Iacopo Colonnelli, Robert Birke, Giulio Malenza, Gianluca Mittone, Alberto Mulone, Marco Aldinucci, Valerio Basile, Marco Antonio Stranisci, Viviana Patti, Jeroen Galjaard, Lydia Y. Chen, Sanzio Bassini, Massimiliano Guarrasi, Gabriella Scipione, Jan Martinovič, Vit Vondrák
Cross-Facility Federated Learning Miscellaneous
1st EuroHPC User Day, 2023.
Links | BibTeX | Tags: across, ai, eupex, eupilot, HPC
@misc{23:eurohpc,
title = {Cross-Facility Federated Learning},
author = {Iacopo Colonnelli and Robert Birke and Giulio Malenza and Gianluca Mittone and Alberto Mulone and Marco Aldinucci and Valerio Basile and Marco Antonio Stranisci and Viviana Patti and Jeroen Galjaard and Lydia Y. Chen and Sanzio Bassini and Massimiliano Guarrasi and Gabriella Scipione and Jan Martinovič and Vit Vondrák},
url = {https://datacloud.di.unito.it/index.php/s/DDAz4QkJP3WZ68M},
year = {2023},
date = {2023-12-01},
address = {Bruxelles, Belgium},
howpublished = {1st EuroHPC User Day},
keywords = {across, ai, eupex, eupilot, HPC},
pubstate = {published},
tppubtype = {misc}
}
Gianluca Mittone, Giulio Malenza, Marco Aldinucci, Robert Birke
Distributed Edge Inference: an Experimental Study on Multiview Detection Miscellaneous
The 16th IEEE/ACM International Conference on Utility and Cloud Computing (UCC 2023), 2023.
Abstract | Links | BibTeX | Tags: ai, eupilot, icsc
@misc{23:ucc:multiview,
title = {Distributed Edge Inference: an Experimental Study on Multiview Detection},
author = {Gianluca Mittone and Giulio Malenza and Marco Aldinucci and Robert Birke},
url = {https://datacloud.di.unito.it/index.php/s/XfjNZEPSNfSKPFr},
year = {2023},
date = {2023-12-01},
address = {Taormina, Italy},
abstract = {Computing is evolving rapidly to cater to the increasing demand for sophisticated services, and Cloud computing lays a solid foundation for flexible on-demand provisioning. However, as the size of applications grows, the centralised client-server approach used by Cloud computing increasingly limits the applications scalability. To achieve ultra-scalability, cloud/edge/fog computing converges into the compute continuum, completely decentralising the infrastructure to encompass universal, pervasive resources. The compute continuum makes devising applications benefitting from this complex environment a challenging research problem. We put the opportunities the compute continuum others to the test through a real-world multi-view detection model (MvDet) implemented with the FastFL C/C++ high-performance edge inference framework. Computational performance is discussed considering many experimental scenarios, encompassing different edge computational capabilities and network bandwidths. We obtain up to 1.92x speedup in inference time over a centralised solution using the same devices.},
howpublished = {The 16th IEEE/ACM International Conference on Utility and Cloud Computing (UCC 2023)},
keywords = {ai, eupilot, icsc},
pubstate = {published},
tppubtype = {misc}
}
Samuele Fonio
Benchmarking Federated Learning Frameworks for Medical Imaging Tasks Miscellaneous
Image Analysis and Processing - ICIAP 2023 - 22th International Conference - FedMed, 2023.
Abstract | Links | BibTeX | Tags: ai, eupilot, fl, icsc
@misc{23:iciap:benchmed,
title = {Benchmarking Federated Learning Frameworks for Medical Imaging Tasks},
author = {Samuele Fonio},
url = {https://datacloud.di.unito.it/index.php/s/sR7YeTGgfH4DtCR},
year = {2023},
date = {2023-09-01},
address = {Udine, Italy},
abstract = {This paper presents a comprehensive benchmarking study of various Federated Learning (FL) frameworks applied to the task of Medical Image Classification. The research specifically addresses the often neglected and complex aspects of scalability and usability in off-the-shelf FL frameworks. Through experimental validation using real case deployments, we provide empirical evidence of the performance and practical relevance of open source FL frameworks. Our findings contribute valuable insights for anyone interested in deploying a FL system, with a particular focus on the healthcare domain—an increasingly attractive field for FL applications.},
howpublished = {Image Analysis and Processing - ICIAP 2023 - 22th International Conference - FedMed},
keywords = {ai, eupilot, fl, icsc},
pubstate = {published},
tppubtype = {misc}
}
Gianluca Mittone, Samuele Fonio
Benchmarking Federated Learning Scalability Miscellaneous
2nd Italian Conference on Big Data and Data Science (ITADATA 2023), 2023.
Abstract | Links | BibTeX | Tags: ai, eupilot, fl, icsc
@misc{23:itadata:fl_scaling,
title = {Benchmarking Federated Learning Scalability},
author = {Gianluca Mittone and Samuele Fonio},
url = {https://datacloud.di.unito.it/index.php/s/QZGxC4X3s5LG5oT},
year = {2023},
date = {2023-09-01},
address = {Naples, Italy},
abstract = {Federated Learning (FL) is a widespread Machine Learning paradigm handling distributed Big Data. In this work, we demonstrate that different FL frameworks expose different scaling performances despite adopting the same technologies, highlighting the need for a more comprehensive study on the topic.},
howpublished = {2nd Italian Conference on Big Data and Data Science (ITADATA 2023)},
keywords = {ai, eupilot, fl, icsc},
pubstate = {published},
tppubtype = {misc}
}
Gianluca Mittone, Walter Riviera, Iacopo Colonnelli, Robert Birke, Marco Aldinucci
Model-Agnostic Federated Learning Miscellaneous
29th International European Conference on Parallel and Distributed Computing (Euro-Par '23), 2023.
Abstract | Links | BibTeX | Tags: ai, eupilot, icsc
@misc{23:europar:mafl,
title = {Model-Agnostic Federated Learning},
author = {Gianluca Mittone and Walter Riviera and Iacopo Colonnelli and Robert Birke and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/9T6G2tRreRomBAE},
year = {2023},
date = {2023-09-01},
address = {Limassol, Cyprus},
abstract = {Since its debut in 2016, Federated Learning (FL) has been tied to the inner workings of Deep Neural Networks (DNNs); this allowed its development as DNNs proliferated but neglected those scenarios in which using DNNs is not possible or advantageous. The fact that most current FL frameworks only support DNNs reinforces this problem. To address the lack of non-DNN-based FL solutions, we propose MAFL (Model-Agnostic Federated Learning). MAFL merges a model-agnostic FL algorithm, AdaBoost.F, with an open industry-grade FL framework: Intel® OpenFL. MAFL is the first FL system not tied to any machine learning model, allowing exploration of FL beyond DNNs. We test MAFL from multiple points of view, assessing its correctness, flexibility, and scaling properties up to 64 nodes of an HPC cluster. We also show how we optimised OpenFL achieving a 5.5x speedup over a standard FL scenario. MAFL is compatible with x86-64, ARM-v8, Power and RISC-V.},
howpublished = {29th International European Conference on Parallel and Distributed Computing (Euro-Par '23)},
keywords = {ai, eupilot, icsc},
pubstate = {published},
tppubtype = {misc}
}
Gianluca Mittone, Nicolò Tonci, Robert Birke, Iacopo Colonnelli, Doriana Medić, Andrea Bartolini, Roberto Esposito, Emanuele Parisi, Francesco Beneventi, Mirko Polato, Massimo Torquati, Luca Benini, Marco Aldinucci
Experimenting with Emerging RISC-V Systems for Decentralised Machine Learning Miscellaneous
20th ACM international conference on computing frontiers (CF '23), 2023, (Invited talk).
Abstract | Links | BibTeX | Tags: ai, eupilot, icsc
@misc{23:ACMCF,
title = {Experimenting with Emerging RISC-V Systems for Decentralised Machine Learning},
author = {Gianluca Mittone and Nicolò Tonci and Robert Birke and Iacopo Colonnelli and Doriana Medić and Andrea Bartolini and Roberto Esposito and Emanuele Parisi and Francesco Beneventi and Mirko Polato and Massimo Torquati and Luca Benini and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/BYyqZbHzzN4DL8Z},
year = {2023},
date = {2023-05-01},
abstract = {Decentralised Machine Learning (DML) enables collaborative machine learning without centralised input data. Federated Learning (FL) and Edge Inference are examples of DML. While tools for DML (especially FL) are starting to flourish, many are not flexible and portable enough to experiment with novel processors (e.g., RISC-V), non-fully connected network topologies, and asynchronous collaboration schemes. We overcome these limitations via a domain-specific language allowing us to map DML schemes to an underlying middleware, i.e. the FastFlow parallel programming library. We experiment with it by generating different working DML schemes on x86-64 and ARM platforms and an emerging RISC-V one. We characterise the performance and energy efficiency of the presented schemes and systems. As a byproduct, we introduce a RISC-V porting of the PyTorch framework, the first publicly available to our knowledge.},
howpublished = {20th ACM international conference on computing frontiers (CF '23)},
note = {Invited talk},
keywords = {ai, eupilot, icsc},
pubstate = {published},
tppubtype = {misc}
}
Bruno Casella, Samuele Fonio
Architecture-Based FedAvg for Vertical Federated Learning Miscellaneous
2023.
Abstract | Links | BibTeX | Tags: ai, epi, fl, icsc
@misc{23:casella:architecturalfedavgtalk,
title = {Architecture-Based FedAvg for Vertical Federated Learning},
author = {Bruno Casella and Samuele Fonio},
url = {https://datacloud.di.unito.it/index.php/s/kJQxnqG4d2ZSicK},
year = {2023},
date = {2023-01-01},
booktitle = {Proceedings of the 3rd Workshop on Distributed Machine Learning for the Intelligent Computing Continuum (DML-ICC), IEEE/ACM UCC 2023, Taormina, Italy, 4 December 2023},
abstract = {Federated Learning (FL) has emerged as a promising solution to address privacy concerns by collaboratively training Deep Learning (DL) models across distributed parties. This work proposes an architecture-based aggregation strategy in Vertical FL, where parties hold data with different attributes but shared instances. Our approach leverages the identical architectural parts, i.e. neural network layers, of different models to selectively aggregate weights, which is particularly relevant when collaborating with institutions holding different types of datasets, i.e., image, text, or tabular datasets. In a scenario where two entities train DL models, such as a Convolutional Neural Network (CNN) and a Multi-Layer Perceptron (MLP), our strategy computes the average only for architecturally identical segments. This preserves data-specific features learned from demographic and clinical data. We tested our approach on two clinical datasets, i.e., the COVID-CXR dataset and the ADNI study. Results show that our method achieves comparable results with the centralized scenario, in which all the data are collected in a single data lake, and benefits from FL generalizability. In particular, compared to the non-federated models, our proposed proof-of-concept model exhibits a slight performance loss on the COVID-CXR dataset (less than 8%), but outperforms ADNI models by up to 12%. Moreover, communication costs between training rounds are minimized by exchanging only the dense layer parameters.},
keywords = {ai, epi, fl, icsc},
pubstate = {published},
tppubtype = {misc}
}