Talks | Parallel Computing
2024
Gianluca Mittone, Alberto Mulone, Iacopo Colonnelli, Robert Birke, Marco Aldinucci
Enabling Cross-Facility LLMs Pre-Training Miscellaneous
Accelerating the Development and Use of Generative AI for Science and Engineering: The Trillion Parameter Consortium (TPC), 2024.
Abstract | Links | BibTeX | Tags: eupilot, fl, icsc, space, streamflow
@misc{24:mittone:TPC,
title = {Enabling Cross-Facility LLMs Pre-Training},
author = {Gianluca Mittone and Alberto Mulone and Iacopo Colonnelli and Robert Birke and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/DRgm8ebBkKQgD2d},
year = {2024},
date = {2024-11-01},
address = {Atlanta, GE, USA},
abstract = {Big-tech companies pre-train SOTA LLMs on special-purpose, private HPCs, while public research centres lack the resources to compete. We advocate a new take on large model training, e.g., LLMs, called xFFL, which leverages federated learning as an enabling technique to exploit geographically distributed computing power to bridge such digital divide. This work introduces a proof-of-concept federated training of LLaMA-3 8B on three EuroHPC Top500 facilities, proving the viability of leveraging cross-facility publicly available computational power to sustain SOTA LLM workloads.},
howpublished = {Accelerating the Development and Use of Generative AI for Science and Engineering: The Trillion Parameter Consortium (TPC)},
keywords = {eupilot, fl, icsc, space, streamflow},
pubstate = {published},
tppubtype = {misc}
}
Giulio Malenza
Performance portability via C++ PSTL, SYCL, OpenMP, and HIP: the Gaia AVU-GSR case study Miscellaneous
IEEE, 2024.
Links | BibTeX | Tags: eupex, icsc
@misc{24:malenza:p3hpc,
title = {Performance portability via C++ PSTL, SYCL, OpenMP, and HIP: the Gaia AVU-GSR case study},
author = {Giulio Malenza},
url = {https://datacloud.di.unito.it/index.php/s/55T6LbGcWtssNno},
year = {2024},
date = {2024-11-01},
address = {Atlanta, US},
howpublished = {IEEE},
keywords = {eupex, icsc},
pubstate = {published},
tppubtype = {misc}
}
Lorenzo Brescia
Secure workflow computation Miscellaneous
Presentation of previous works and future directions on securing workflows, 2024.
Links | BibTeX | Tags: confidential, icsc
@misc{24:brescia:swiss:talk,
title = {Secure workflow computation},
author = {Lorenzo Brescia},
url = {https://datacloud.di.unito.it/index.php/s/5nwZ2bi7by3twQB},
year = {2024},
date = {2024-11-01},
address = {Neuchatel, Switzerland},
howpublished = {Presentation of previous works and future directions on securing workflows},
keywords = {confidential, icsc},
pubstate = {published},
tppubtype = {misc}
}
Roberto Esposito Mirko Polato Samuele Fonio
FedHP: Federated Learning with Hyperspherical Prototypical Regularization Miscellaneous
32nd European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning, (ESANN), 2024.
Abstract | Links | BibTeX | Tags: ai, fl, icsc
@misc{24:esann:fedhp,
title = {FedHP: Federated Learning with Hyperspherical Prototypical Regularization},
author = {Roberto Esposito Mirko Polato Samuele Fonio},
url = {https://datacloud.di.unito.it/index.php/s/fKyKSSFQKT3LTxW},
year = {2024},
date = {2024-10-01},
address = {Bruges, Belgium},
abstract = {This paper introduces FedHP, an innovative algorithm that integrates federated learning, hyperspherical geometries, and prototype learning. Federated Learning (FL) has gained prominence as a privacy- preserving method for building robust models across distributed datasets. Traditionally, FL exchanges model parameters to maintain data privacy; however, in scenarios with expensive data communication, exchanging large neural network models becomes impractical. In such cases, prototype learning offers a viable solution by facilitating the exchange of only a few prototypes. Motivated by these considerations, our approach capitalizes on recent advancements in prototype learning, particularly the advantages offered by non-Euclidean geometries. In addition to presenting FedHP, we offer empirical evidence demonstrating its comparability to other state-of- the-art approaches while significantly reducing communication costs.},
howpublished = {32nd European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning, (ESANN)},
keywords = {ai, fl, icsc},
pubstate = {published},
tppubtype = {misc}
}
Adriano Marques Garcia
Assessing Large Language Models Inference Performance on a 64-core RISC-V CPU with Silicon-Enabled Vectors Miscellaneous
Proceedings of BigHPC2024: Special Track on Big Data and High-Performance Computing, co-located with the 3textsuperscriptrd Italian Conference on Big Data and Data Science, ITADATA2024, 2024.
Links | BibTeX | Tags: confidential, icsc
@misc{24:garcia:bigHPC:talk,
title = {Assessing Large Language Models Inference Performance on a 64-core RISC-V CPU with Silicon-Enabled Vectors},
author = {Adriano Marques Garcia},
url = {https://datacloud.di.unito.it/index.php/s/eN6Z62RQr2QsRYa},
year = {2024},
date = {2024-09-01},
address = {Pisa, Italy},
howpublished = {Proceedings of BigHPC2024: Special Track on Big Data and High-Performance Computing, co-located with the 3textsuperscriptrd Italian Conference on Big Data and Data Science, ITADATA2024},
keywords = {confidential, icsc},
pubstate = {published},
tppubtype = {misc}
}
Lorenzo Brescia
Performance Analysis on DNA Alignment Workload with Intel SGX Multithreading Miscellaneous
Proceedings of BigHPC2024: Special Track on Big Data and High-Performance Computing, co-located with the 3textsuperscriptrd Italian Conference on Big Data and Data Science, ITADATA2024, 2024.
Links | BibTeX | Tags: confidential, icsc
@misc{24:brescia:bigHPC:talk,
title = {Performance Analysis on DNA Alignment Workload with Intel SGX Multithreading},
author = {Lorenzo Brescia},
url = {https://datacloud.di.unito.it/index.php/s/wqgYiKpHBw5zbSa},
year = {2024},
date = {2024-09-01},
address = {Pisa, Italy},
howpublished = {Proceedings of BigHPC2024: Special Track on Big Data and High-Performance Computing, co-located with the 3textsuperscriptrd Italian Conference on Big Data and Data Science, ITADATA2024},
keywords = {confidential, icsc},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli
Scientific Workflows in the Heterogeneous Computing Era Miscellaneous
2024.
@misc{24:icolonne:ICSC,
title = {Scientific Workflows in the Heterogeneous Computing Era},
author = {Iacopo Colonnelli},
url = {https://datacloud.di.unito.it/index.php/s/CyxiWsDbdg6rbpQ},
year = {2024},
date = {2024-09-01},
address = {Roma, Italy},
keywords = {icsc},
pubstate = {published},
tppubtype = {misc}
}
Giulio Malenza
Exploiting C++ Parallel Algorithms through FastFlow Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: icsc
@misc{24:gmalenza:BigHPC2024,
title = {Exploiting C++ Parallel Algorithms through FastFlow},
author = {Giulio Malenza},
url = {https://datacloud.di.unito.it/index.php/s/GcpQ8cz9BRyM85B},
year = {2024},
date = {2024-09-01},
address = {Pisa, Italy},
abstract = {High-performance computing and artificial intelligent simulations necessitate the rapid processing of large quantities of data. To handle such data volumes efficiently, leveraging the parallelism inherent in algorithms is crucial. Consequently, parallel programming frameworks have been developed to fully exploit modern parallel architectures. Among these, C++ PSTL stands out for its user-friendliness, portability, and high performance.
In this study, we introduce a back-end for the PSTL implemented using the FastFlow parallel programming framework. We will evaluate correctness and performance of the back-end comparing results with other coming from traditional vendor-dependent back-ends like TBB and nvc++. Performance metrics are derived from running the LULESH application on both RISC-V and ARM architectures. Our results indicate that all three back-ends deliver comparable performance.},
keywords = {icsc},
pubstate = {published},
tppubtype = {misc}
}
In this study, we introduce a back-end for the PSTL implemented using the FastFlow parallel programming framework. We will evaluate correctness and performance of the back-end comparing results with other coming from traditional vendor-dependent back-ends like TBB and nvc++. Performance metrics are derived from running the LULESH application on both RISC-V and ARM architectures. Our results indicate that all three back-ends deliver comparable performance.
Giulio Malenza
Exploring energy consumption of AI frameworks on a 64-core RV64 Server CPU Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: ai, DYMAN, icsc
@misc{24:gmalenza:scihpcexa,
title = {Exploring energy consumption of AI frameworks on a 64-core RV64 Server CPU},
author = {Giulio Malenza},
url = {https://datacloud.di.unito.it/index.php/s/5aTdyzNB6n9CREq},
year = {2024},
date = {2024-09-01},
address = {Pisa, Italy},
abstract = {In today's era of rapid technological advancement, artificial intelligence (AI) applications require large-scale, high-performance, and data-intensive computations, leading to significant energy demands. Addressing this challenge necessitates a combined approach involving both hardware and software innovations. Hardware manufacturers are developing new, efficient, and specialized solutions, with the RISC-V architecture emerging as a prominent player due to its open, extensible, and energy-efficient instruction set architecture (ISA). Simultaneously, software developers are creating new algorithms and frameworks,
yet their energy efficiency often remains unclear.
In this study, we conduct a comprehensive benchmark analysis of machine learning (ML) applications on the 64-core SOPHON SG2042 RISC-V architecture. Specifically, we examine the energy consumption of deep learning inference models across various AI frameworks. By comparing the performance of different frameworks, we aim to provide a detailed understanding of how these frameworks can optimize energy consumption on this architecture.},
keywords = {ai, DYMAN, icsc},
pubstate = {published},
tppubtype = {misc}
}
yet their energy efficiency often remains unclear.
In this study, we conduct a comprehensive benchmark analysis of machine learning (ML) applications on the 64-core SOPHON SG2042 RISC-V architecture. Specifically, we examine the energy consumption of deep learning inference models across various AI frameworks. By comparing the performance of different frameworks, we aim to provide a detailed understanding of how these frameworks can optimize energy consumption on this architecture.
Marco Edoardo Santimaria, Iacopo Colonnelli, Marco Aldinucci
Releasing the CAPIO middleware from MPI derived constraints Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: across, admire, capio, capiocl, eupex, icsc
@misc{24:santimaria:bighpc,
title = {Releasing the CAPIO middleware from MPI derived constraints},
author = {Marco Edoardo Santimaria and Iacopo Colonnelli and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/zrJGD4i36fWdp5g},
year = {2024},
date = {2024-09-01},
address = {Pisa, Italy},
abstract = {CAPIO is a middleware that transparently injects streaming capabilities into file-based workflows. However, its implementation is limited to HPC environments based on the MPI framework, significantly limiting its applications. This paper will illustrate a proposed architecture and some preliminary results aimed at investigating the usage of a distributed files system as a communication media for the CAPIO middleware, with the ultimate goal of supporting both CLOUD-based and HPC-based workflows.},
keywords = {across, admire, capio, capiocl, eupex, icsc},
pubstate = {published},
tppubtype = {misc}
}
Marco Edoardo Santimaria, Iacopo Colonnelli, Massimo Torquati, Marco Aldinucci
CAPIO: Cross Application Programamble IO Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: across, admire, capio, capiocl, eupex, icsc
@misc{24:santimaria:itadata:shpcpee,
title = {CAPIO: Cross Application Programamble IO},
author = {Marco Edoardo Santimaria and Iacopo Colonnelli and Massimo Torquati and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/rg6LWwrZXi6tTXm},
year = {2024},
date = {2024-09-01},
address = {Pisa, Italy},
abstract = {With the increasing amount of digital data available for analysis and simulation, the class of I/O-intensive HPC workflows is fated to expand, further exacerbating quickly the performance gap between computing, memory, and storage technologies. CAPIO (Cross-Application Programmable I/O), is a middleware capable of injecting I/O streaming capabilities into file-based workflows, improving the computation-I/O overlap without the need to change the application code. In this presentation, we will introduce the CAPIO-CL language with its semantics, as well as the implementation of the CAPIO-CL language through the CAPIO middleware. We will also provide some case studies of how CAPIO has been employed to improve workflow execution time as well as some future directions.},
keywords = {across, admire, capio, capiocl, eupex, icsc},
pubstate = {published},
tppubtype = {misc}
}
Gianluca Mittone
Benchmarking HPC Performance for State-of-the-Art AI Workloads Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: ai, eupilot, icsc
@misc{24:mittone:itadata:shpcpee,
title = {Benchmarking HPC Performance for State-of-the-Art AI Workloads},
author = {Gianluca Mittone},
url = {https://datacloud.di.unito.it/index.php/s/5Ep3W7cPW5baZfr},
year = {2024},
date = {2024-09-01},
address = {Pisa, Italy},
abstract = {Benchmarking the performance of modern High-Performance Computing (HPC) infrastructure on Artificial Intelligence (AI) workloads is a hot topic in the supercomputing community. While research communities and big-tech companies actively invest in larger, more powerful data centres to support AI research, the standard computational performance benchmarking tools (e.g., LINPACK) are increasingly becoming outdated since they are not specifically tailored for AI workloads. Some tools, such as MLPerf, are trying to bridge this gap, but the HPC community still has not adopted them as standards. Since this trend became particularly evident with the advent of Large Language Models (LLMs), this work will delve into LLM training at scale as a way to benchmark Top500 HPC infrastructures on current AI workloads. The scalability performances of a major LLM model (i.e., Meta's LLaMA) on different HPCs (Leonardo, LUMI, MeluXina, Karolina) are exposed and discussed along with their Top500 positioning.
However, it should be noted that state-of-the-art LLM models are not trained on thousands of computing nodes but on hundreds. This choice is due to multiple factors, such as the influence of the training scaling on the model's convergence and the instability of large-scale deployments due to hardware/software failure. A benchmarking approach based on the next-generation LLM training approach is proposed to bypass all these issues. State-of-the-art LLMs are not monolithic structures but Mixture-of-Experts (MoE) models; this design implies innovative frontiers for the distributed training of such models due to the experts' training being potentially more parallelisable than a single monolithic model. We thus propose to create an AI-oriented HPC benchmark suite based on the parallel training of MoE models to measure the throughput performance of HPC systems on state-of-the-art AI workloads.},
keywords = {ai, eupilot, icsc},
pubstate = {published},
tppubtype = {misc}
}
However, it should be noted that state-of-the-art LLM models are not trained on thousands of computing nodes but on hundreds. This choice is due to multiple factors, such as the influence of the training scaling on the model's convergence and the instability of large-scale deployments due to hardware/software failure. A benchmarking approach based on the next-generation LLM training approach is proposed to bypass all these issues. State-of-the-art LLMs are not monolithic structures but Mixture-of-Experts (MoE) models; this design implies innovative frontiers for the distributed training of such models due to the experts' training being potentially more parallelisable than a single monolithic model. We thus propose to create an AI-oriented HPC benchmark suite based on the parallel training of MoE models to measure the throughput performance of HPC systems on state-of-the-art AI workloads.
Gianluca Mittone
Pushing Federated Learning Boundaries: Three Innovative Distributed Intelligence Approaches Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: eupilot, fl, icsc
@misc{24:mittone:bighpc,
title = {Pushing Federated Learning Boundaries: Three Innovative Distributed Intelligence Approaches},
author = {Gianluca Mittone},
url = {https://datacloud.di.unito.it/index.php/s/eKbRtSAEdmSFJYW},
year = {2024},
date = {2024-09-01},
address = {Pisa, Italy},
abstract = {Federated learning is a distributed, privacy-preserving machine learning technique used on private, decentralised data. It allows multiple parties to cooperatively solve a common machine learning problem without sharing the local data. Three assumptions of state-of-the-art federated learning software constitute the starting points for this research work: 1) their inner workings being strictly tied to deep learning models, 2) the centralised structure currently implemented by many commercial frameworks, and 3) their assumption of being deployed on private, specialised computing infrastructures. The proposed research expands the federated learning paradigm to handle scenarios in which these three conditions do not hold. Such research problems are addressed methodologically and practically, and three open-source, proof-of-concept software are made freely available as tangible research results: OpenFL-x, FastFL, and xFFL.},
keywords = {eupilot, fl, icsc},
pubstate = {published},
tppubtype = {misc}
}
Samuele Fonio Bruno Casella Oussama Harrak
Federated Adaboost for Survival Analysis Miscellaneous
European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases, 2nd Workshop on Advancements in Federated Learning (WAFL), 2024.
Abstract | Links | BibTeX | Tags: ai, epi, fl, icsc
@misc{23:ecmlpkdd:fedsurvboost,
title = {Federated Adaboost for Survival Analysis},
author = {Samuele Fonio Bruno Casella Oussama Harrak},
url = {https://datacloud.di.unito.it/index.php/s/DtXiQfne6BEC235},
year = {2024},
date = {2024-09-01},
address = {Vilnius, Lithuania},
abstract = {This work proposes FedSurvBoost, a federated learning pipeline for survival analysis based on the AdaBoost.F algorithm, which iteratively aggregates the best local weak hypotheses. Our method extends AdaBoost.F by removing the dependence on the number of classes coefficient from the computation of the weights of the best model. This makes it suitable for regression tasks, such as survival analysis. We show the effectiveness of our approach by comparing it with state-of-the-art methods, specifically developed for survival analysis problems, on two common survival datasets.},
howpublished = {European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases, 2nd Workshop on Advancements in Federated Learning (WAFL)},
keywords = {ai, epi, fl, icsc},
pubstate = {published},
tppubtype = {misc}
}
Alberto Mulone, Doriana Medić, Marco Aldinucci
A Fault Tolerance mechanism for Hybrid Scientific Workflows Miscellaneous
1st workshop about High-Performance e-Science (HiPES), 2024.
Abstract | Links | BibTeX | Tags: eupex, icsc, streamflow
@misc{24:madrid:hipes:talk,
title = {A Fault Tolerance mechanism for Hybrid Scientific Workflows},
author = {Alberto Mulone and Doriana Medić and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/9Ddj6fGgmDbLDXj},
year = {2024},
date = {2024-08-01},
address = {Madrid, Spain},
abstract = {In large distributed systems, failures are a daily event occurring frequently, especially with growing numbers of computation tasks and locations on which they are deployed. The advantage of representing an application as a workflow is possibility to utilize the Workflow Management Systems which are reliable systems guaranteeing the correct execution of the application and providing the features such as portability, scalability, and fault tolerance. Over recent years, the emergence of hybrid workflows has posed new and intriguing challenges by increasing the possibility of distributing computations involving heterogeneous and independent environments. As a consequence, the number of possible points of failure in the execution augmented, creating different important challenges interesting to study.},
howpublished = {1st workshop about High-Performance e-Science (HiPES)},
keywords = {eupex, icsc, streamflow},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli
Scientific Workflows in the Continuum Era Miscellaneous
2024, (Keynote Talk).
Abstract | Links | BibTeX | Tags: icsc
@misc{24:icolonne:wscc,
title = {Scientific Workflows in the Continuum Era},
author = {Iacopo Colonnelli},
url = {https://datacloud.di.unito.it/index.php/s/PkqYA3p38XLKgrt},
year = {2024},
date = {2024-08-01},
address = {Madrid, Spain},
abstract = {Thanks to their generality, workflow models represent a powerful abstraction for designing complex applications and executing them on large-scale distributed architectures. However, several additional challenges appear when transitioning from cloud/HPC environments to the entire compute continuum. Continuum execution environments are fully distributed and modular, and modules can be heterogeneous and independent of each other. In addition, continuum workflows often rely on multiple intercommunicating agents that form complex micro-services architectures. Different agents deal with different communication and parallelization paradigms: network-based stream processing at the edge and file-based batch processing on HPC facilities. Finally, support for efficient interactive workflows in the continuum remains an open research problem. This talk explores these challenges and provides insights on how to deal with them. A ready-to-use software library accompanies each proposed solution to facilitate the reproducibility and reusability of the presented concepts.},
note = {Keynote Talk},
keywords = {icsc},
pubstate = {published},
tppubtype = {misc}
}
Giulio Malenza
Preliminary analysis of model parallelism applications on a 64-core RV64 Server CPU Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: eupilot, icsc
@misc{24:gmalenza:hlpp:MPRISC-v,
title = {Preliminary analysis of model parallelism applications on a 64-core RV64 Server CPU},
author = {Giulio Malenza},
url = {https://datacloud.di.unito.it/index.php/s/JrWwKALeaFEJSQo},
year = {2024},
date = {2024-07-01},
address = {Pisa, Italy},
abstract = {Massive Data Parallel workloads, driven by inference on large ML models, are pushing hardware vendors to develop efficient and cost-effective multi-core server CPUs. The RISC-V architecture plays a prominent role due to its open, extensible and energy-friendly ISA. Despite significant progress in recent years, finding efficient methods to run parallel applications on new architectures to harness their maximum performance fully remains a challenge. In this study, we benchmark the inference of machine learning models on the SOPHON SG2042 SoC, the first server-grade CPU based on the RV64 ISA, composed of 64 cores arranged in a grid of 16 groups of 4 cores. Specifically, we aim to enhance performance via better cache hit ratios stemming from model parallelism to split and assign parts of the model to specific (groups of) cores using a pipeline execution. We orchestrate execution using FastFlow, a low-level programming framework designed for multithreaded streaming applications. By comparing the results against the standard multi-core inference and analyzing the effects of different submodel-to-core mapping strategies, we aim to provide a comprehensive understanding of how the model parallel approach can maximize efficiency and utilization of hardware resources.},
keywords = {eupilot, icsc},
pubstate = {published},
tppubtype = {misc}
}
Marco Edoardo Santimaria
CAPIO-CL: Cross Application Programmable IO - Coordination Language Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: across, admire, capio, eupex, icsc
@misc{24:santimaria:hlpp:capiocl,
title = {CAPIO-CL: Cross Application Programmable IO - Coordination Language},
author = {Marco Edoardo Santimaria},
url = {https://datacloud.di.unito.it/index.php/s/zsKY3PWzX5NFCiX},
year = {2024},
date = {2024-07-01},
address = {Pisa, Italy},
abstract = {The performance bottleneck in file-based workflows remains a pressing issue in the realm of I/O-based workflows. To address this challenge, a novel annotation language has been developed. CAPIO-CL is positioned as an innovative I/O coordination language, enabling users to annotate data dependencies within file-based workflows with synchronization semantics pertinent to the involved files and directories. Through the information provided by the language, optimization opportunities arise in streaming and preemptive data movement. This paper serves to illustrate the semantics and syntax enabling CAPIO-CL to enhance the performance of in situ workflows without necessitating the rewriting or modification of the original workflow application steps. Finally, an analysis of CAPIO-CL is provided, taking into consideration both language expressiveness and application performance enhancement.},
keywords = {across, admire, capio, eupex, icsc},
pubstate = {published},
tppubtype = {misc}
}
Lorenzo Brescia
Towards Secure WMS with TEEs Miscellaneous
Mentoring session: HPC Summer School, university of Trento, 2024.
Links | BibTeX | Tags: confidential, icsc
@misc{24:brescia:hpc:school:talk,
title = {Towards Secure WMS with TEEs},
author = {Lorenzo Brescia},
url = {https://datacloud.di.unito.it/index.php/s/eAxEgqiTsGSRQz4},
year = {2024},
date = {2024-06-01},
address = {Trento, Italy},
howpublished = {Mentoring session: HPC Summer School, university of Trento},
keywords = {confidential, icsc},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli, Robert Birke, Giulio Malenza, Gianluca Mittone, Alberto Mulone, Marco Aldinucci
Cross-Facility Federated Learning - Part II Miscellaneous
2024, (Invited talk).
Links | BibTeX | Tags: eupex, icsc, space
@misc{24:ic:elise:xffl,
title = {Cross-Facility Federated Learning - Part II},
author = {Iacopo Colonnelli and Robert Birke and Giulio Malenza and Gianluca Mittone and Alberto Mulone and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/7HonBpcWPxotXLX},
year = {2024},
date = {2024-06-01},
address = {Helsinki, Finland},
note = {Invited talk},
keywords = {eupex, icsc, space},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
From HPC4AI to Software & Integration living lab to innovation Miscellaneous
HPC as an enabling platform for AI event, 2024.
Abstract | Links | BibTeX | Tags: ai, HPC, icsc
@misc{24:ma:hpcai:talk,
title = {From HPC4AI to Software & Integration living lab to innovation},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/3SS3Xa9XorN6D9o},
year = {2024},
date = {2024-06-01},
address = {Torino, Italy},
abstract = {The talk presents the motivation and the activity of the "Software and Integration" lab at UNITO.},
howpublished = {HPC as an enabling platform for AI event},
keywords = {ai, HPC, icsc},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
Software & Integration lab of FutureHPC spoke Miscellaneous
HPC as an enabling platform for AI event, 2024.
Abstract | Links | BibTeX | Tags: ai, HPC, icsc
@misc{24:ma:swi:lab,
title = {Software & Integration lab of FutureHPC spoke},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/WDjyXCGyYFJDQSd},
year = {2024},
date = {2024-06-01},
address = {Torino, Italy},
abstract = {The presentation describes the main the activity of the "Software and Integration" lab at UNITO across its main flagship codes.},
howpublished = {HPC as an enabling platform for AI event},
keywords = {ai, HPC, icsc},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli
Dynamic hybrid workflows for Deep Learning on HPC infrastructure Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: icsc, jupyter-workflow, streamflow
@misc{24:icolonne:ictp,
title = {Dynamic hybrid workflows for Deep Learning on HPC infrastructure},
author = {Iacopo Colonnelli},
url = {https://datacloud.di.unito.it/index.php/s/EaFHJEKNbW5oXeq},
year = {2024},
date = {2024-05-01},
address = {Trieste, Italy},
abstract = {Hybrid workflow abstractions allow users to quickly design and orchestrate cross-facility workloads, decoupling tasks from environment-specific technical details to reduce complexity and increase reusability. Plus, workflow descriptions help ensure the reproducibility of scientific experiments through prospective and retrospective provenance collection. This module has been designed to provide a hands-on exploration of scientific workflows from various angles, from the initial design phase to their orchestration at extreme scales. We will use the practical example of the CommonWorkflow Language (CWL) open standard to demonstrate how workflows can be written, and the StreamFlow workflow system to execute them seamlessly on the CINECA HPC facility. We will also delve into the integration between scientific workflows and Jupyter Notebooks, which aims to give data scientists a familiar interface to scientific workflows. In this module, students will gain a comprehensive understanding of scientific workflows. They will learn how to use these workflows to model and orchestrate Machine Learning and Deep Learning pipelines. Additionally, they will explore how modern workflow management systems can efficiently scale data-oriented workloads from a researcher’s laptop to an entire HPC facility.},
keywords = {icsc, jupyter-workflow, streamflow},
pubstate = {published},
tppubtype = {misc}
}
Gianluca Mittone
Into to Federated Learning Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: fl, icsc
@misc{24:mittone:ictp,
title = {Into to Federated Learning},
author = {Gianluca Mittone},
url = {https://datacloud.di.unito.it/index.php/s/nSwwmedjqe2jbWJ},
year = {2024},
date = {2024-05-01},
address = {Trieste, Italy},
abstract = {Machine Learning (ML) is the branch of Artificial Intelligence focused on developing algorithms capable of adapting and improving their predictive or generative performance by feeding on data. Adapting or improving the system’s behaviour based on the provided data is called learning since it is similar to the human learning process in many aspects. The same ML algorithm, usually referred to as a model, trained on different data will thus expose different capabilities and can, therefore, solve different tasks. FL is a relatively recent distributed ML methodology aiming to bridge the gap between the need to train ever bigger ML models on ever larger datasets and the individual and companies’ will to protect and not share their private data. From another point of view, FL is also a way to distribute the training of an ML model even more than before. However, it should be considered that the learning performance of FL is usually lower than that of traditional centralised learning. This course will start from Kairouz ad McMahan’s definition of FL: ”Federated learning is a machine learning setting where multiple entities (clients) collaborate in solving a machine learning problem, under the coordination of a central server or service provider. Each client’s raw data is stored locally and not exchanged or transferred; instead, focused updates intended for immediate aggregation are used to achieve the learning objective.” From this starting point, the most significant aspects of FL will be exposed and discussed. This tutorial will particularly explore FL from both the learning and computational [5] performance perspectives, investigating its pros and cons in a distributed ML setting. Since FL natively targets data privacy, some insights on how the FL process can be attacked and protected will also be discussed from a high-level perspective. Finally, a hands-on session will guide the participants in building a basic FL system, providing a better understanding of the major implementational difficulties of such a technique.},
keywords = {fl, icsc},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli
CWL Working Groups Miscellaneous
2024 CWL Conference, 2024.
Abstract | Links | BibTeX | Tags: icsc
@misc{24:icolonne:cwlcon2024,
title = {CWL Working Groups},
author = {Iacopo Colonnelli},
url = {https://datacloud.di.unito.it/index.php/s/zZDKdL8deLd4jSi},
year = {2024},
date = {2024-05-01},
address = {Amsterdam, Netherlands},
abstract = {This presentation introduces the new CWL Working Groups initiative, describing what a Working Group actually is, which Working Groups already exist in the CWL community, and how anybody can create a new officially recognized Working Group. Then, the presentation will explore the CWL4HPC Working Group, using it as an example of how a CWL Working Group can actually work.},
howpublished = {2024 CWL Conference},
keywords = {icsc},
pubstate = {published},
tppubtype = {misc}
}
Lorenzo Brescia
Secure Generic Remote Workflow Execution with TEEs Miscellaneous
Proceedings of the 2nd Workshop on Workflows in Distributed Environments, 2024.
Abstract | Links | BibTeX | Tags: confidential, icsc
@misc{23:brescia:wide:talk,
title = {Secure Generic Remote Workflow Execution with TEEs},
author = {Lorenzo Brescia},
url = {https://datacloud.di.unito.it/index.php/s/Prxq6EWGbcN8sWx},
year = {2024},
date = {2024-04-01},
address = {Athens, Greece},
abstract = {In scientific environments, the frequent need to process substantial volumes of data poses a common challenge. Individuals tasked with executing these computations frequently encounter a deficit in local computational resources, leading them to opt for the facilities of a Cloud Service Provider (CSP) for data processing. However, the data subjected to these calculations may be subject to confidentiality constraints. This paper introduces a proof-of-concept framework that leverages Gramine LibOS and Intel SGX, enabling the protection of generic remote workflow computations through SGX enclaves as Trusted Execution Environments (TEEs). The framework entails the delineation of user and CSP behavior and has been implemented using Bash scripts. Furthermore, an infrastructure has been designed for the Data Center Attestation Primitives (DCAP) remote attestation mechanism, wherein the user gains trust in the proper instantiation of the enclave within the CSP. To assess the framework efficacy, it has been tested on two distinct workflows, one trivial and the other involving real-world bioinformatics applications for processing DNA data. The performance study revealed that the framework incurred an acceptable overhead, ranging from a factor of x1.4 to x1.8 compared to unsafe execution practice.},
howpublished = {Proceedings of the 2nd Workshop on Workflows in Distributed Environments},
keywords = {confidential, icsc},
pubstate = {published},
tppubtype = {misc}
}
Alberto Mulone
Workflows for future High-Performance Computing Miscellaneous
COMETE PhD Workshop, 2024.
@misc{24:amulone:comete,
title = {Workflows for future High-Performance Computing},
author = {Alberto Mulone},
url = {https://datacloud.di.unito.it/index.php/s/ZGG8fLMp5B7qRHS},
year = {2024},
date = {2024-04-01},
address = {Torino, Italy},
howpublished = {COMETE PhD Workshop},
keywords = {icsc},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli
CWL in the HPC Ecosystem Miscellaneous
Workshop on workflow languages for HEP analysis, 2024.
Links | BibTeX | Tags: across, eupex, icsc, space, streamflow
@misc{24:icolonne:cwl4hpccern,
title = {CWL in the HPC Ecosystem},
author = {Iacopo Colonnelli},
url = {https://datacloud.di.unito.it/index.php/s/PRmqdwWHt6P2PH7},
year = {2024},
date = {2024-04-01},
address = {CERN, Meyrin, Switzerland},
howpublished = {Workshop on workflow languages for HEP analysis},
keywords = {across, eupex, icsc, space, streamflow},
pubstate = {published},
tppubtype = {misc}
}
Giulio Malenza, Marco Edoardo Santimaria
Benchmarking Parallelization Models through Karmarkar`s algorithm Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: HPC, icsc
@misc{24:pdp:karmarkartalk,
title = {Benchmarking Parallelization Models through Karmarkar`s algorithm},
author = {Giulio Malenza and Marco Edoardo Santimaria},
url = {https://datacloud.di.unito.it/index.php/s/JjKcAJpYS7ctX9r},
year = {2024},
date = {2024-03-01},
address = {Dublin, Irelans},
abstract = {Optimization problems are one of the main focus of scientific research. Their computational-intensive nature makes them prone to be parallelized with consistent improvements in performance. This paper sheds light on different parallel models for accelerating Karmarkar’s Interior-point method. To do so, we assess parallelization strategies for individual operations within the aforementioned Karmarkar’s algorithm using OpenMP, GPU acceleration with CUDA, and the recent Parallel Standard C++ Linear Algebra library (PSTL) executing both on GPU and CPU. Our different implementations yield interesting benchmark results that show the optimal approach for parallelizing interior point algorithms for general Linear Programming (LP) problems. In addition, we propose a more theoretical perspective of the parallelization of this algorithm, with a detailed study of our OpenMP implementation, showing the limits of optimizing the single operations},
keywords = {HPC, icsc},
pubstate = {published},
tppubtype = {misc}
}
Robert Birke
FLaaS: Federated Learning as a Service Miscellaneous
ICSC - Spoke 1 meeting, 2024.
Abstract | Links | BibTeX | Tags: ai, icsc
@misc{24:icsc:spoke1:ifab,
title = {FLaaS: Federated Learning as a Service},
author = {Robert Birke},
url = {https://datacloud.di.unito.it/index.php/s/yHXdTnC8xEqoJ6Y},
year = {2024},
date = {2024-02-01},
address = {Torino, Italy},
abstract = {Presentation about the Innovation Grant in collaboration with IFAB},
howpublished = {ICSC - Spoke 1 meeting},
keywords = {ai, icsc},
pubstate = {published},
tppubtype = {misc}
}
Alberto Mulone
Cross-Platform Full Waveform Inversion Miscellaneous
ICSC - Spoke 1 meeting, 2024.
Abstract | Links | BibTeX | Tags: icsc, streamflow
@misc{24:icsc:spoke1:eni,
title = {Cross-Platform Full Waveform Inversion},
author = {Alberto Mulone},
url = {https://datacloud.di.unito.it/index.php/s/M3HkxA5wsBPS5ro},
year = {2024},
date = {2024-02-01},
address = {Torino, Italy},
abstract = {Presentation about the Innovation Grant in collaboration with ENI},
howpublished = {ICSC - Spoke 1 meeting},
keywords = {icsc, streamflow},
pubstate = {published},
tppubtype = {misc}
}
Gianluca Mittone
RISC-V for AI Miscellaneous
High Performance, Edge And Cloud computing Conference 2024 (HiPEAC 2024), 2024.
Abstract | Links | BibTeX | Tags: eupilot, icsc
@misc{24:HiPEAC:riscv,
title = {RISC-V for AI},
author = {Gianluca Mittone},
url = {https://datacloud.di.unito.it/index.php/s/rFtxT7zryoKNGbP},
year = {2024},
date = {2024-01-01},
address = {Garching bei München, München, Germany},
abstract = {AI-focused RISC-V-based hardware accelerators},
howpublished = {High Performance, Edge And Cloud computing Conference 2024 (HiPEAC 2024)},
keywords = {eupilot, icsc},
pubstate = {published},
tppubtype = {misc}
}
2023
Lorenzo Brescia, Iacopo Colonnelli
Trusted Computing at Scale Miscellaneous
CN HPC Flagship 4 Working Day, 2023.
Links | BibTeX | Tags: confidential, icsc
@misc{23:brescia:trusted:workflow:fl4:talk,
title = {Trusted Computing at Scale},
author = {Lorenzo Brescia and Iacopo Colonnelli},
url = {https://datacloud.di.unito.it/index.php/s/5ij6tLd5SAX4Nn4},
year = {2023},
date = {2023-12-01},
address = {Turin, Italy},
howpublished = {CN HPC Flagship 4 Working Day},
keywords = {confidential, icsc},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
Federated Learning: A Distributed System Viewpoint Miscellaneous
Bicocca University seminars, Milan, Italy, 2023, (Invited talk).
Abstract | Links | BibTeX | Tags: eupilot, icsc, textarossa
@misc{23:FL:bicocca,
title = {Federated Learning: A Distributed System Viewpoint},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/FfEzADQtC73GgLs},
year = {2023},
date = {2023-12-01},
abstract = {Decentralized machine learning (DML) enables collaborative machine learning without centralized input data. Federated learning (FL) and edge inference (EI) are examples of DML. Collaboration naturally happens at the edge of a distributed system with inherently distributed data. While tools for DML are starting to flourish, much needs to be done to get more flexible and portable tools to experiment with novel techniques, non-fully connected topologies, multiple data domains, and asynchronous collaboration schemes. We'll present recent advances in DML, aiming to improve usability in data centers and, at the edge, to widen the class of models extending FL to non-DDN paradigms, to improve the accuracy of models controlling normalization and frequency of communications, and to boost data privacy through generative adversarial networks.},
howpublished = {Bicocca University seminars, Milan, Italy},
note = {Invited talk},
keywords = {eupilot, icsc, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Gianluca Mittone, Giulio Malenza, Marco Aldinucci, Robert Birke
Distributed Edge Inference: an Experimental Study on Multiview Detection Miscellaneous
The 16th IEEE/ACM International Conference on Utility and Cloud Computing (UCC 2023), 2023.
Abstract | Links | BibTeX | Tags: ai, eupilot, icsc
@misc{23:ucc:multiview,
title = {Distributed Edge Inference: an Experimental Study on Multiview Detection},
author = {Gianluca Mittone and Giulio Malenza and Marco Aldinucci and Robert Birke},
url = {https://datacloud.di.unito.it/index.php/s/XfjNZEPSNfSKPFr},
year = {2023},
date = {2023-12-01},
address = {Taormina, Italy},
abstract = {Computing is evolving rapidly to cater to the increasing demand for sophisticated services, and Cloud computing lays a solid foundation for flexible on-demand provisioning. However, as the size of applications grows, the centralised client-server approach used by Cloud computing increasingly limits the applications scalability. To achieve ultra-scalability, cloud/edge/fog computing converges into the compute continuum, completely decentralising the infrastructure to encompass universal, pervasive resources. The compute continuum makes devising applications benefitting from this complex environment a challenging research problem. We put the opportunities the compute continuum others to the test through a real-world multi-view detection model (MvDet) implemented with the FastFL C/C++ high-performance edge inference framework. Computational performance is discussed considering many experimental scenarios, encompassing different edge computational capabilities and network bandwidths. We obtain up to 1.92x speedup in inference time over a centralised solution using the same devices.},
howpublished = {The 16th IEEE/ACM International Conference on Utility and Cloud Computing (UCC 2023)},
keywords = {ai, eupilot, icsc},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci, Elena Baralis, Valeria Cardellini, Iacopo Colonnelli, Marco Danelutto, Sergio Decherchi, Giuseppe Di Modica, Luca Ferrucci, Marco Gribaudo, Francesco Iannone, Marco Lapegna, Doriana Medić, Giuseppa Muscianisi, Francesca Righetti, Eva Sciacca, Nicola Tonellotto, Mauro Tortonesi, Paolo Trunfio, Tullio Vardanega
A Systematic Mapping Study of Italian Research on Workflows Miscellaneous
18th Workshop on Workflows in Support of Large-Scale Science (WORKS 2023), 2023.
Abstract | Links | BibTeX | Tags: icsc
@misc{23:sc:works,
title = {A Systematic Mapping Study of Italian Research on Workflows},
author = {Marco Aldinucci and Elena Baralis and Valeria Cardellini and Iacopo Colonnelli and Marco Danelutto and Sergio Decherchi and Giuseppe Di Modica and Luca Ferrucci and Marco Gribaudo and Francesco Iannone and Marco Lapegna and Doriana Medić and Giuseppa Muscianisi and Francesca Righetti and Eva Sciacca and Nicola Tonellotto and Mauro Tortonesi and Paolo Trunfio and Tullio Vardanega},
url = {https://datacloud.di.unito.it/index.php/s/2kgooG43pGCykji},
year = {2023},
date = {2023-11-01},
address = {Denver, CO, Usa},
abstract = {An entire ecosystem of methodologies and tools revolves around scientific workflow management. They cover crucial non-functional requirements that standard workflow models fail to target, such as interactive execution, energy efficiency, performance portability, Big Data management, and intelligent orchestration in the Computing Continuum. Characterizing and monitoring this ecosystem is crucial to developing an informed view of current and future research directions. This work conducts a systematic mapping study of the Italian workflow research community, analyzing 25 tools and 10 applications from several scientific domains in the context of the ``National Research Centre for HPC, Big Data, and Quantum Computing'' (ICSC). The study aims to outline the main current research directions and determine how they address the critical needs of modern scientific applications. The findings highlight a variegated research ecosystem of tools, with a prominent interest in advanced workflow orchestration and still immature but promising efforts toward energy efficiency.},
howpublished = {18th Workshop on Workflows in Support of Large-Scale Science (WORKS 2023)},
keywords = {icsc},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli, Doriana Medić, Barbara Cantalupo, Marco Aldinucci
Università degli Studi di Torino: Alpha parallel research group Miscellaneous
HaMMon Kick-Off meeting, 2023.
Links | BibTeX | Tags: icsc, streamflow
@misc{23:HaMMonProject,
title = {Università degli Studi di Torino: Alpha parallel research group},
author = {Iacopo Colonnelli and Doriana Medić and Barbara Cantalupo and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/cmgy9BZ3nwCR2QJ},
year = {2023},
date = {2023-10-01},
address = {Bologna, Italy},
howpublished = {HaMMon Kick-Off meeting},
keywords = {icsc, streamflow},
pubstate = {published},
tppubtype = {misc}
}
Giulio Malenza, Valentina Cesare, Marco Aldinucci
Performance portability in HPC: the Gaia use-case. Miscellaneous
2nd Italian Conference on Big Data and Data Science (ITADATA 2023), 2023.
@misc{23:GAIA:bigHPC,
title = {Performance portability in HPC: the Gaia use-case.},
author = {Giulio Malenza and Valentina Cesare and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/RqcZpizFtC9toFq},
year = {2023},
date = {2023-09-01},
address = {Naples, Italy},
howpublished = {2nd Italian Conference on Big Data and Data Science (ITADATA 2023)},
keywords = {icsc},
pubstate = {published},
tppubtype = {misc}
}
Samuele Fonio
Benchmarking Federated Learning Frameworks for Medical Imaging Tasks Miscellaneous
Image Analysis and Processing - ICIAP 2023 - 22th International Conference - FedMed, 2023.
Abstract | Links | BibTeX | Tags: ai, eupilot, fl, icsc
@misc{23:iciap:benchmed,
title = {Benchmarking Federated Learning Frameworks for Medical Imaging Tasks},
author = {Samuele Fonio},
url = {https://datacloud.di.unito.it/index.php/s/sR7YeTGgfH4DtCR},
year = {2023},
date = {2023-09-01},
address = {Udine, Italy},
abstract = {This paper presents a comprehensive benchmarking study of various Federated Learning (FL) frameworks applied to the task of Medical Image Classification. The research specifically addresses the often neglected and complex aspects of scalability and usability in off-the-shelf FL frameworks. Through experimental validation using real case deployments, we provide empirical evidence of the performance and practical relevance of open source FL frameworks. Our findings contribute valuable insights for anyone interested in deploying a FL system, with a particular focus on the healthcare domain—an increasingly attractive field for FL applications.},
howpublished = {Image Analysis and Processing - ICIAP 2023 - 22th International Conference - FedMed},
keywords = {ai, eupilot, fl, icsc},
pubstate = {published},
tppubtype = {misc}
}
Gianluca Mittone, Samuele Fonio
Benchmarking Federated Learning Scalability Miscellaneous
2nd Italian Conference on Big Data and Data Science (ITADATA 2023), 2023.
Abstract | Links | BibTeX | Tags: ai, eupilot, fl, icsc
@misc{23:itadata:fl_scaling,
title = {Benchmarking Federated Learning Scalability},
author = {Gianluca Mittone and Samuele Fonio},
url = {https://datacloud.di.unito.it/index.php/s/QZGxC4X3s5LG5oT},
year = {2023},
date = {2023-09-01},
address = {Naples, Italy},
abstract = {Federated Learning (FL) is a widespread Machine Learning paradigm handling distributed Big Data. In this work, we demonstrate that different FL frameworks expose different scaling performances despite adopting the same technologies, highlighting the need for a more comprehensive study on the topic.},
howpublished = {2nd Italian Conference on Big Data and Data Science (ITADATA 2023)},
keywords = {ai, eupilot, fl, icsc},
pubstate = {published},
tppubtype = {misc}
}
Gianluca Mittone, Walter Riviera, Iacopo Colonnelli, Robert Birke, Marco Aldinucci
Model-Agnostic Federated Learning Miscellaneous
29th International European Conference on Parallel and Distributed Computing (Euro-Par '23), 2023.
Abstract | Links | BibTeX | Tags: ai, eupilot, icsc
@misc{23:europar:mafl,
title = {Model-Agnostic Federated Learning},
author = {Gianluca Mittone and Walter Riviera and Iacopo Colonnelli and Robert Birke and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/9T6G2tRreRomBAE},
year = {2023},
date = {2023-09-01},
address = {Limassol, Cyprus},
abstract = {Since its debut in 2016, Federated Learning (FL) has been tied to the inner workings of Deep Neural Networks (DNNs); this allowed its development as DNNs proliferated but neglected those scenarios in which using DNNs is not possible or advantageous. The fact that most current FL frameworks only support DNNs reinforces this problem. To address the lack of non-DNN-based FL solutions, we propose MAFL (Model-Agnostic Federated Learning). MAFL merges a model-agnostic FL algorithm, AdaBoost.F, with an open industry-grade FL framework: Intel® OpenFL. MAFL is the first FL system not tied to any machine learning model, allowing exploration of FL beyond DNNs. We test MAFL from multiple points of view, assessing its correctness, flexibility, and scaling properties up to 64 nodes of an HPC cluster. We also show how we optimised OpenFL achieving a 5.5x speedup over a standard FL scenario. MAFL is compatible with x86-64, ARM-v8, Power and RISC-V.},
howpublished = {29th International European Conference on Parallel and Distributed Computing (Euro-Par '23)},
keywords = {ai, eupilot, icsc},
pubstate = {published},
tppubtype = {misc}
}
Gianluca Mittone, Robert Birke, Marco Aldinucci
Model-Agnostic Federated Learning Miscellaneous
29th International European Conference on Parallel and Distributed Computing (Euro-Par '23), 2023.
Abstract | Links | BibTeX | Tags: eupilot, icsc
@misc{23:europar:phdtalk,
title = {Model-Agnostic Federated Learning},
author = {Gianluca Mittone and Robert Birke and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/pT3qxkwzzsHR3nS},
year = {2023},
date = {2023-08-01},
address = {Limassol, Cyprus},
abstract = {Since its debut in 2016, Federated Learning (FL) has been tied to the inner workings of Deep Neural Networks (DNNs); this allowed its development as DNNs proliferated but neglected those scenarios in which using DNNs is not possible or advantageous. The fact that most current FL frameworks only support DNNs reinforces this problem. To address the lack of non-DNN-based FL solutions, we propose MAFL (Model-Agnostic Federated Learning). Decentralised Machine Learning (DML) enables collaborative machine learning without centralised input data. Federated Learning (FL) and Edge Inference are examples of DML. While tools for DML (especially FL) are starting to flourish, many are not flexible and portable enough to experiment with novel processors (e.g., RISC-V), non-fully connected network topologies, and asynchronous collaboration schemes. We overcome these limitations via a domain-specific language allowing us to map DML schemes to an underlying middleware, i.e. the FastFlow parallel programming library.},
howpublished = {29th International European Conference on Parallel and Distributed Computing (Euro-Par '23)},
keywords = {eupilot, icsc},
pubstate = {published},
tppubtype = {misc}
}
Giulio Malenza
Building an accelerated OpenFOAM Proof-of-Concept application using Modern C++. Miscellaneous
18th OpenFOAM Workshop 2023, Genova, 2023.
@misc{23:OF:genova,
title = {Building an accelerated OpenFOAM Proof-of-Concept application using Modern C++.},
author = {Giulio Malenza},
url = {https://datacloud.di.unito.it/index.php/s/mB6omsDB8ERBkGW},
year = {2023},
date = {2023-07-01},
address = {Genova, Italy},
howpublished = {18th OpenFOAM Workshop 2023, Genova},
keywords = {icsc},
pubstate = {published},
tppubtype = {misc}
}
Alberto Mulone, Sherine Awad, Davide Chiarugi, Marco Aldinucci
Porting the Variant Calling Pipeline for NGS data in cloud-HPC environment Miscellaneous
47th IEEE Annual Computers, Software, and Applications Conference, COMPSAC 2023, 2023.
Abstract | Links | BibTeX | Tags: across, icsc, streamflow
@misc{23:mulone:wide:talk,
title = {Porting the Variant Calling Pipeline for NGS data in cloud-HPC environment},
author = {Alberto Mulone and Sherine Awad and Davide Chiarugi and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/zNLj3LCZNsNxHwy},
year = {2023},
date = {2023-06-01},
address = {Torino, Italy},
abstract = {In recent years we have understood the importance of analyzing and sequencing human genetic variation. A relevant aspect that emerged from the Covid-19 pandemic was the need to obtain results very quickly; this involved using High-Performance Computing (HPC) environments to execute the Next Generation Sequencing (NGS) pipeline. However, HPC is not always the most suitable environment for the entire execution of a pipeline, especially when it involves many heterogeneous tools. The ability to execute parts of the pipeline on different environments can lead to higher performance but also cheaper executions. This work shows the design and optimization process that led us to a state-of-the-art Variant Calling hybrid workflow based on the StreamFlow Workflow Management System (WfMS). We also compare StreamFlow with Snakemake, an established WfMS targeting HPC facilities, observing comparable performance on single environments and satisfactory improvements with a hybrid cloud-HPC configuration.},
howpublished = {47th IEEE Annual Computers, Software, and Applications Conference, COMPSAC 2023},
keywords = {across, icsc, streamflow},
pubstate = {published},
tppubtype = {misc}
}
Gianluca Mittone, Nicolò Tonci, Robert Birke, Iacopo Colonnelli, Doriana Medić, Andrea Bartolini, Roberto Esposito, Emanuele Parisi, Francesco Beneventi, Mirko Polato, Massimo Torquati, Luca Benini, Marco Aldinucci
Experimenting with Emerging RISC-V Systems for Decentralised Machine Learning Miscellaneous
20th ACM international conference on computing frontiers (CF '23), 2023, (Invited talk).
Abstract | Links | BibTeX | Tags: ai, eupilot, icsc
@misc{23:ACMCF,
title = {Experimenting with Emerging RISC-V Systems for Decentralised Machine Learning},
author = {Gianluca Mittone and Nicolò Tonci and Robert Birke and Iacopo Colonnelli and Doriana Medić and Andrea Bartolini and Roberto Esposito and Emanuele Parisi and Francesco Beneventi and Mirko Polato and Massimo Torquati and Luca Benini and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/BYyqZbHzzN4DL8Z},
year = {2023},
date = {2023-05-01},
abstract = {Decentralised Machine Learning (DML) enables collaborative machine learning without centralised input data. Federated Learning (FL) and Edge Inference are examples of DML. While tools for DML (especially FL) are starting to flourish, many are not flexible and portable enough to experiment with novel processors (e.g., RISC-V), non-fully connected network topologies, and asynchronous collaboration schemes. We overcome these limitations via a domain-specific language allowing us to map DML schemes to an underlying middleware, i.e. the FastFlow parallel programming library. We experiment with it by generating different working DML schemes on x86-64 and ARM platforms and an emerging RISC-V one. We characterise the performance and energy efficiency of the presented schemes and systems. As a byproduct, we introduce a RISC-V porting of the PyTorch framework, the first publicly available to our knowledge.},
howpublished = {20th ACM international conference on computing frontiers (CF '23)},
note = {Invited talk},
keywords = {ai, eupilot, icsc},
pubstate = {published},
tppubtype = {misc}
}
Gianluca Mittone, Filip Svoboda, Marco Aldinucci, Nicholas D. Lane, Pietro Lio'
A Federated Learning Benchmark for Drug-Target Interaction Miscellaneous
2023 ACM international Web Conference (WWW '23), 2023, (Invited talk).
Abstract | Links | BibTeX | Tags: eupilot, icsc
@misc{23:WWW,
title = {A Federated Learning Benchmark for Drug-Target Interaction},
author = {Gianluca Mittone and Filip Svoboda and Marco Aldinucci and Nicholas D. Lane and Pietro Lio'},
url = {https://datacloud.di.unito.it/index.php/s/js7go3EorZxSLn9},
year = {2023},
date = {2023-05-01},
abstract = {Aggregating pharmaceutical data in the drug-target interaction (DTI) domain can potentially deliver life-saving breakthroughs. It is, however, notoriously difficult due to regulatory constraints and commercial interests. This work proposes the application of federated learning, which is reconcilable with the industry's constraints. It does not require sharing any information that would reveal the entities' data or any other high-level summary. When used on a representative GraphDTA model and the KIBA dataset, it achieves up to 15% improved performance relative to the best available non-privacy preserving alternative. Our extensive battery of experiments shows that, unlike in other domains, the non-IID data distribution in the DTI datasets does not deteriorate FL performance. Additionally, we identify a material trade-off between the benefits of adding new data and the cost of adding more clients.},
howpublished = {2023 ACM international Web Conference (WWW '23)},
note = {Invited talk},
keywords = {eupilot, icsc},
pubstate = {published},
tppubtype = {misc}
}
Bruno Casella, Samuele Fonio
Architecture-Based FedAvg for Vertical Federated Learning Miscellaneous
2023.
Abstract | Links | BibTeX | Tags: ai, epi, fl, icsc
@misc{23:casella:architecturalfedavgtalk,
title = {Architecture-Based FedAvg for Vertical Federated Learning},
author = {Bruno Casella and Samuele Fonio},
url = {https://datacloud.di.unito.it/index.php/s/kJQxnqG4d2ZSicK},
year = {2023},
date = {2023-01-01},
booktitle = {Proceedings of the 3rd Workshop on Distributed Machine Learning for the Intelligent Computing Continuum (DML-ICC), IEEE/ACM UCC 2023, Taormina, Italy, 4 December 2023},
abstract = {Federated Learning (FL) has emerged as a promising solution to address privacy concerns by collaboratively training Deep Learning (DL) models across distributed parties. This work proposes an architecture-based aggregation strategy in Vertical FL, where parties hold data with different attributes but shared instances. Our approach leverages the identical architectural parts, i.e. neural network layers, of different models to selectively aggregate weights, which is particularly relevant when collaborating with institutions holding different types of datasets, i.e., image, text, or tabular datasets. In a scenario where two entities train DL models, such as a Convolutional Neural Network (CNN) and a Multi-Layer Perceptron (MLP), our strategy computes the average only for architecturally identical segments. This preserves data-specific features learned from demographic and clinical data. We tested our approach on two clinical datasets, i.e., the COVID-CXR dataset and the ADNI study. Results show that our method achieves comparable results with the centralized scenario, in which all the data are collected in a single data lake, and benefits from FL generalizability. In particular, compared to the non-federated models, our proposed proof-of-concept model exhibits a slight performance loss on the COVID-CXR dataset (less than 8%), but outperforms ADNI models by up to 12%. Moreover, communication costs between training rounds are minimized by exchanging only the dense layer parameters.},
keywords = {ai, epi, fl, icsc},
pubstate = {published},
tppubtype = {misc}
}
Doriana Medić, Marco Aldinucci
Towards formal model for location aware workflows Miscellaneous
2023.
Abstract | Links | BibTeX | Tags: eupex, icsc
@misc{23:wide:medic,
title = {Towards formal model for location aware workflows},
author = {Doriana Medić and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/wpDd9HETzioixTW},
year = {2023},
date = {2023-01-01},
address = {Torino, Italy},
abstract = {Designing complex applications and executing them on large-scale topologies of heterogeneous architectures is becoming increasingly crucial in many scientific domains. As a result, diverse workflow modelling paradigms are developed, most of them with no formalisation provided. In these circumstances, comparing two different models or switching from one system to the other becomes a hard nut to crack. This paper investigates the capability of process algebra to model a location aware workflow system. Distributed π-calculus is considered as the base of the formal model due to its ability to describe the communicating components that change their structure as an outcome of the communication. Later, it is discussed how the base model could be extended or modified to capture different features of location aware workflow system. The intention of this paper is to highlight the fact that due to its flexibility, π-calculus, could be a good candidate to represent the behavioural perspective of the workflow system.},
keywords = {eupex, icsc},
pubstate = {published},
tppubtype = {misc}
}
2022
Marco Aldinucci
EuroHPC and the Italian HPC ecosystem Miscellaneous
Critical Infrastructure Protection Forum - EuroCC Romania, 2022, (Invited talk).
Abstract | Links | BibTeX | Tags: across, admire, eumaster4hpc, eupex, eupilot, icsc, textarossa
@misc{22:cip:romania,
title = {EuroHPC and the Italian HPC ecosystem},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/5dFFoNsZzwTzQkn},
year = {2022},
date = {2022-06-01},
address = {Bucharest, Romania},
abstract = {The talk presents the main investments currently ongoing in Italy in the HPC area as well as the activity of Italian stakeholders within EuroHPC. The novel Italian National Centre on HPC (ICSC) is introduced.},
howpublished = {Critical Infrastructure Protection Forum - EuroCC Romania},
note = {Invited talk},
keywords = {across, admire, eumaster4hpc, eupex, eupilot, icsc, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
The Italian HPC ecosystem and the next generation of EuroHPC CoE Miscellaneous
EuroHPC EoCoE final summit, 2022, (Invited talk).
Abstract | Links | BibTeX | Tags: across, admire, eumaster4hpc, eupex, eupilot, icsc, textarossa
@misc{22:eocoe:summit,
title = {The Italian HPC ecosystem and the next generation of EuroHPC CoE},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/AH5Ms3NekeoEooB},
year = {2022},
date = {2022-06-01},
address = {Napoli, Italy},
abstract = {The talk presents the main investments currently ongoing in Italy in the HPC area as well as the activity of Italian stakeholders within EuroHPC. The novel Italian National Centre on HPC (ICSC) is introduced.},
howpublished = {EuroHPC EoCoE final summit},
note = {Invited talk},
keywords = {across, admire, eumaster4hpc, eupex, eupilot, icsc, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Marco Aldinucci
Da HPC4AI al living lab dello spoke FutureHPC del Centro Nazionale HPC Miscellaneous
Condivisioni, Conferenza GARR 2022, 2022, (Keynote talk).
Abstract | Links | BibTeX | Tags: across, admire, eumaster4hpc, eupex, hpc4ai, icsc, textarossa
@misc{22:garr,
title = {Da HPC4AI al living lab dello spoke FutureHPC del Centro Nazionale HPC},
author = {Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/P3KSroSSmrRxZMc},
year = {2022},
date = {2022-05-01},
address = {Palermo, Italy},
abstract = {HPC4AI is an open-access laboratory of the University of Turin open to researchers, students and companies that manages a double pair of systems: a production cloud-HPC system and its twin dedicated to development. The cloud-HPC system is implemented thanks to an extended version of the GARR cloud (OpenStack) and the SLURM workload manager. HPC4AI is specifically designed to support system software development and cloud-HPC convergence tools. Among these streamflow (WMS), jupyter-as-a-service (SaaS), portable-secure-tenant (PasS). The experience gained in the design and management of HPC4AI forms the heart of the design of the livinglab of the Turin "FutureHPC" spoke of the National Center "HPC, BigData and Quantum Computing" funded by the PNRR which should be operational from September 2022.},
howpublished = {Condivisioni, Conferenza GARR 2022},
note = {Keynote talk},
keywords = {across, admire, eumaster4hpc, eupex, hpc4ai, icsc, textarossa},
pubstate = {published},
tppubtype = {misc}
}