Papers | Parallel Computing
2024
Bruno Casella, Alessio Barbaro Chisari, Marco Aldinucci, Sebastiano Battiato, Mario Valerio Giuffrida
Federated Learning in a Semi-Supervised Environment for Earth Observation Data Proceedings Article
In: Proceedings of the 32nd European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning, ESANN, Bruges, Belgium, 2024.
Abstract | Links | BibTeX | Tags: ai, epi, icsc
@inproceedings{24:casella:fedrec,
title = {Federated Learning in a Semi-Supervised Environment for Earth Observation Data},
author = {Bruno Casella and Alessio Barbaro Chisari and Marco Aldinucci and Sebastiano Battiato and Mario Valerio Giuffrida},
url = {https://iris.unito.it/retrieve/a798d7b8-6b98-48c2-92f4-327d2aaa8788/ES2024-214.pdf},
doi = {10.14428/esann/2024.es2024-214},
year = {2024},
date = {2024-10-01},
booktitle = {Proceedings of the 32nd European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning, ESANN},
address = {Bruges, Belgium},
abstract = {We propose FedRec, a federated learning workflow taking advantage of unlabelled data in a semi-supervised environment to assist in the training of a supervised aggregated model. In our proposed method, an encoder architecture extracting features from unlabelled data is aggregated with the feature extractor of a classification model via weight averaging. The fully connected layers of the supervised models are also averaged in a federated fashion. We show the effectiveness of our approach by comparing it with the state-of-the-art federated algorithm, an isolated and a centralised baseline, on novel cloud detection datasets.},
keywords = {ai, epi, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Bruno Casella, Jakobs Matthias, Marco Aldinucci, Sebastian Buschjager
Federated Time Series Classification with ROCKET features Proceedings Article
In: Proceedings of the 32nd European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning, ESANN, Bruges, Belgium, 2024.
Abstract | Links | BibTeX | Tags: ai, epi, icsc
@inproceedings{24:casella:frocks,
title = {Federated Time Series Classification with ROCKET features},
author = {Bruno Casella and Jakobs Matthias and Marco Aldinucci and Sebastian Buschjager},
url = {https://iris.unito.it/retrieve/51b63fc1-3e22-4ad4-8926-84af69cde739/ES2024-61.pdf},
doi = {10.14428/esann/2024.es2024-61},
year = {2024},
date = {2024-10-01},
booktitle = {Proceedings of the 32nd European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning, ESANN},
address = {Bruges, Belgium},
abstract = {This paper proposes FROCKS, a federated time series classification method using ROCKET features. Our approach dynamically adapts the models’ features by selecting and exchanging the best-performing ROCKET kernels from a federation of clients. Specifically, the server gathers the best-performing kernels of the clients together with the associated model parameters, and it performs a weighted average if a kernel is best-performing for more than one client. We compare the proposed method with state-of-the-art approaches on the UCR archive binary classification datasets and show superior performance on most datasets.},
keywords = {ai, epi, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Chi Hong, Robert Birke, Pin-Yu Chen, Lydia Chen
On Dark Knowledge for Distilling Generators Proceedings Article
In: Yang, De-Nian, Xie, Xing, Tseng, Vincent S., Pei, Jian, Huang, Jen-Wei, Lin, Jerry Chun-Wei (Ed.): Proceedings of the 28th Pacific-Asia Conference on Knowledge Discovery and Data Mining, pp. 235–247, Springer, Taipei, Taiwan, 2024.
Abstract | Links | BibTeX | Tags: ai, epi, icsc
@inproceedings{24:chen:llm,
title = {On Dark Knowledge for Distilling Generators},
author = {Chi Hong and Robert Birke and Pin-Yu Chen and Lydia Chen},
editor = {De-Nian Yang and Xing Xie and Vincent S. Tseng and Jian Pei and Jen-Wei Huang and Jerry Chun-Wei Lin},
url = {https://hdl.handle.net/2318/1976671},
doi = {10.1007/978-981-97-2253-2_19},
year = {2024},
date = {2024-05-01},
booktitle = {Proceedings of the 28th Pacific-Asia Conference on Knowledge Discovery and Data Mining},
volume = {14646},
pages = {235–247},
publisher = {Springer},
address = {Taipei, Taiwan},
series = {Lecture Notes in Computer Science},
abstract = {Knowledge distillation has been applied on generative models, such as Variational Autoencoder (VAE) and Generative Adversarial Networks (GANs). To distill the knowledge, the synthetic outputs of a teacher generator are used to train a student model. While the dark knowledge, i.e., the probabilistic output, is well explored in distilling classifiers, little is known about the existence of an equivalent dark knowledge for generative models and its extractability. In this paper, we derive the first kind of empirical risk bound for distilling generative models from a Bayesian perspective. Through our analysis, we show the existence of the dark knowledge for generative models, i.e., Bayes probability distribution of a synthetic output from a given input, which achieves lower empirical risk bound than merely using the synthetic output of the generators. Furthermore, we propose a Dark Knowledge based Distillation , DKtill, which trains the student generator based on the (approximate) dark knowledge. Our extensive evaluation on distilling VAE, conditional GANs, and translation GANs on Facades and CelebA datasets show that the FID of student generators trained by DKtill combining dark knowledge are lower than student generators trained only by the synthetic outputs by up to 42.66%, and 78.99%, respectively.},
keywords = {ai, epi, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Bruno Casella, Iacopo Colonnelli, Gianluca Mittone, Robert Birke, Walter Riviera, Antonio Sciarappa, Carlo Cavazzoni, Marco Aldinucci
A Performance Analysis for Confidential Federated Learning Proceedings Article
In: Proceedings of the 2024 Deep Learning Security and Privacy Workshop, IEEE Symposium on Security and Privacy 2024, San Francisco, CA, 2024.
Abstract | Links | BibTeX | Tags: ai, confidential, epi, icsc
@inproceedings{24:casella:sgx,
title = {A Performance Analysis for Confidential Federated Learning},
author = {Bruno Casella and Iacopo Colonnelli and Gianluca Mittone and Robert Birke and Walter Riviera and Antonio Sciarappa and Carlo Cavazzoni and Marco Aldinucci},
url = {https://iris.unito.it/retrieve/b5877a97-2d8d-4e95-8791-0aa4a1b953b3/DLSP___CONFIDENTIAL_FL.pdf},
doi = {10.1109/SPW63631.2024.00009},
year = {2024},
date = {2024-05-01},
booktitle = {Proceedings of the 2024 Deep Learning Security and Privacy Workshop, IEEE Symposium on Security and Privacy 2024},
address = {San Francisco, CA},
abstract = {Federated Learning (FL) has emerged as a solution to preserve data privacy by keeping the data locally on each participant's device. However, FL alone is still vulnerable to attacks that can cause privacy leaks. Therefore, it becomes necessary to take additional security measures at the cost of increasing runtimes. The Trusted Execution Environment (TEE) approach promises to offer the highest degree of security during execution. However, TEEs suffer from memory limits which prevent safe end-to-end FL training of modern deep models. State-of- the-art approaches limit secure training to selected layers, failing to avert the full spectrum of attacks or adopt layer-wise training affecting model performance. We benchmark the usage of a library OS (LibOS) to run the full, unmodified end-to-end FL training inside the TEE. We extensively evaluate and model the overhead of the different security mechanisms needed to protect the data and model during computation (TEE), communication (TLS), and storage (disk encryption). The obtained results across three datasets and two models demonstrate that LibOSes are a viable way to seamlessly inject security into FL with limited overhead (at most 2x), offering valuable guidance for researchers and developers aiming to apply FL in data-security-focused contexts.},
keywords = {ai, confidential, epi, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Oussama Harrak, Bruno Casella, Samuele Fonio, Piero Fariselli, Gianluca Mittone, Tiziana Sanavia, Marco Aldinucci
Federated AdaBoost for Survival Analysis Proceedings Article
In: Proceedings of the ECML-PKDD Workshop, 2nd workshop on advancements in Federated Learning, Vilnius, Lithuania, 2024.
Abstract | BibTeX | Tags: epi, icsc
@inproceedings{harrak2024fedsurvboost,
title = {Federated AdaBoost for Survival Analysis},
author = {Oussama Harrak and Bruno Casella and Samuele Fonio and Piero Fariselli and Gianluca Mittone and Tiziana Sanavia and Marco Aldinucci},
year = {2024},
date = {2024-01-01},
booktitle = {Proceedings of the ECML-PKDD Workshop, 2nd workshop on advancements in Federated Learning},
address = {Vilnius, Lithuania},
abstract = {This work proposes FedSurvBoost, a federated learning pipeline for survival analysis based on the AdaBoost.F algorithm, which iteratively aggregates the best local weak hypotheses. Our method extends AdaBoost.F by removing the dependence on the number of classes coefficient from the computation of the weights of the best model. This makes it suitable for regression tasks, such as survival analysis. We show the effectiveness of our approach by comparing it with state-of-the-art methods, specifically developed for survival analysis problems, on two common survival datasets. Our code is available at https://github.com/oussamaHarrak/FedSurvBoost.},
keywords = {epi, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Bruno Casella, Roberto Esposito, Antonio Sciarappa, Carlo Cavazzoni, Marco Aldinucci
Experimenting With Normalization Layers in Federated Learning on Non-IID Scenarios Journal Article
In: IEEE Access, vol. 12, pp. 47961-47971, 2024.
Links | BibTeX | Tags: epi, icsc
@article{24:casella:normalization,
title = {Experimenting With Normalization Layers in Federated Learning on Non-IID Scenarios},
author = {Bruno Casella and Roberto Esposito and Antonio Sciarappa and Carlo Cavazzoni and Marco Aldinucci},
doi = {10.1109/ACCESS.2024.3383783},
year = {2024},
date = {2024-01-01},
journal = {IEEE Access},
volume = {12},
pages = {47961-47971},
keywords = {epi, icsc},
pubstate = {published},
tppubtype = {article}
}
Bruno Casella, Walter Riviera, Marco Aldinucci, Gloria Menegaz
Protocol for training MERGE: A federated multi-input neural network for COVID-19 prognosis Journal Article
In: STAR Protocols, 2024, (https://prod-shared-star-protocols.s3.amazonaws.com/protocols/3225.pdf).
Abstract | Links | BibTeX | Tags: epi, icsc
@article{24:casella:starprotocol,
title = {Protocol for training MERGE: A federated multi-input neural network for COVID-19 prognosis},
author = {Bruno Casella and Walter Riviera and Marco Aldinucci and Gloria Menegaz},
url = {https://prod-shared-star-protocols.s3.amazonaws.com/protocols/3225.pdf},
doi = {10.1016/j.xpro.2023.102812},
year = {2024},
date = {2024-01-01},
journal = {STAR Protocols},
institution = {Computer Science Department, University of Torino},
abstract = {Federated learning is a cooperative learning approach that has emerged as an effective way to address privacy concerns. Here, we present a protocol for training MERGE: a federated multi-input neural network (NN) for COVID-19 prognosis. We describe steps for collecting and preprocessing datasets. We then detail the process of training a multi-input NN. This protocol can be adapted for use with datasets containing both image- and table-based input sources.},
note = {https://prod-shared-star-protocols.s3.amazonaws.com/protocols/3225.pdf},
keywords = {epi, icsc},
pubstate = {published},
tppubtype = {article}
}
2023
Bruno Casella, Roberto Esposito, Antonio Sciarappa, Carlo Cavazzoni, Marco Aldinucci
Experimenting with Normalization Layers in Federated Learning on non-IID scenarios Technical Report
Computer Science Department, University of Torino 2023.
Abstract | Links | BibTeX | Tags: confidential, epi, icsc
@techreport{23:casella:normalization,
title = {Experimenting with Normalization Layers in Federated Learning on non-IID scenarios},
author = {Bruno Casella and Roberto Esposito and Antonio Sciarappa and Carlo Cavazzoni and Marco Aldinucci},
url = {https://arxiv.org/pdf/2303.10630.pdf},
year = {2023},
date = {2023-01-01},
institution = {Computer Science Department, University of Torino},
abstract = {Training Deep Learning (DL) models require large, high-quality datasets, often assembled with data from different institutions. Federated Learning (FL) has been emerging as a method for privacy-preserving pooling of datasets employing collaborative training from different institutions by iteratively globally aggregating locally trained models. One critical performance challenge of FL is operating on datasets not independently and identically distributed (non-IID) among the federation participants. Even though this fragility cannot be eliminated, it can be debunked by a suitable optimization of two hyperparameters: layer normalization methods and collaboration frequency selection. In this work, we benchmark five different normalization layers for training Neural Networks (NNs), two families of non-IID data skew, and two datasets. Results show that Batch Normalization, widely employed for centralized DL, is not the best choice for FL, whereas Group and Layer Normalization consistently outperform Batch Normalization. Similarly, frequent model aggregation decreases convergence speed and mode quality.},
keywords = {confidential, epi, icsc},
pubstate = {published},
tppubtype = {techreport}
}
Bruno Casella, Lorenzo Paletto
Predicting Cryptocurrencies Market Phases through On-Chain Data Long-Term Forecasting Proceedings Article
In: Proceedings of the 2023 IEEE International Conference on Blockchain and Cryptocurrency (ICBC), 1-5 May 2023, Dubai, 2023, (https://ieeexplore.ieee.org/document/10174989).
Abstract | Links | BibTeX | Tags: epi, icsc
@inproceedings{23:casella:onchain,
title = {Predicting Cryptocurrencies Market Phases through On-Chain Data Long-Term Forecasting},
author = {Bruno Casella and Lorenzo Paletto},
url = {https://iris.unito.it/bitstream/2318/1902652/1/6.%20ICBC23%20-%20PREDICTING%20BTC.pdf},
doi = {https://doi.org/10.1109/ICBC56567.2023.10174989},
year = {2023},
date = {2023-01-01},
booktitle = {Proceedings of the 2023 IEEE International Conference on Blockchain and Cryptocurrency (ICBC), 1-5 May 2023, Dubai},
abstract = {Blockchain, the underlying technology of Bitcoin and several other cryptocurrencies, like Ethereum, produces a massive amount of open-access data that can be analyzed, providing important information about the network's activity and its respective token. The on-chain data have extensively been used as input to Machine Learning algorithms for predicting cryptocurrencies' future prices; however, there is a lack of study in predicting the future behaviour of on-chain data. This study aims to show how on-chain data can be used to detect cryptocurrency market regimes, like minimum and maximum, bear and bull market phases, and how forecasting these data can provide an optimal asset allocation for long-term investors.},
note = {https://ieeexplore.ieee.org/document/10174989},
keywords = {epi, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Bruno Casella, Samuele Fonio
Architecture-Based FedAvg for Vertical Federated Learning Proceedings Article
In: Proceedings of the 3rd Workshop on Distributed Machine Learning for the Intelligent Computing Continuum (DML-ICC), IEEE/ACM UCC 2023, Taormina, Italy, 4 December 2023, 2023, (https://iris.unito.it/bitstream/2318/1949730/1/HALF_HVL_for_DML_ICC23___Taormina-2.pdf).
Abstract | Links | BibTeX | Tags: ai, epi, icsc
@inproceedings{23:casella:architecturalfedavg,
title = {Architecture-Based FedAvg for Vertical Federated Learning},
author = {Bruno Casella and Samuele Fonio},
url = {https://iris.unito.it/retrieve/173d9960-8531-419d-9bd5-5acce6694c4e/Aggregation%20Based%20VFL.pdf},
doi = {10.1145/3603166.3632559},
year = {2023},
date = {2023-01-01},
booktitle = {Proceedings of the 3rd Workshop on Distributed Machine Learning for the Intelligent Computing Continuum (DML-ICC), IEEE/ACM UCC 2023, Taormina, Italy, 4 December 2023},
abstract = {Federated Learning (FL) has emerged as a promising solution to address privacy concerns by collaboratively training Deep Learning (DL) models across distributed parties. This work proposes an architecture-based aggregation strategy in Vertical FL, where parties hold data with different attributes but shared instances. Our approach leverages the identical architectural parts, i.e. neural network layers, of different models to selectively aggregate weights, which is particularly relevant when collaborating with institutions holding different types of datasets, i.e., image, text, or tabular datasets. In a scenario where two entities train DL models, such as a Convolutional Neural Network (CNN) and a Multi-Layer Perceptron (MLP), our strategy computes the average only for architecturally identical segments. This preserves data-specific features learned from demographic and clinical data. We tested our approach on two clinical datasets, i.e., the COVID-CXR dataset and the ADNI study. Results show that our method achieves comparable results with the centralized scenario, in which all the data are collected in a single data lake, and benefits from FL generalizability. In particular, compared to the non-federated models, our proposed proof-of-concept model exhibits a slight performance loss on the COVID-CXR dataset (less than 8%), but outperforms ADNI models by up to 12%. Moreover, communication costs between training rounds are minimized by exchanging only the dense layer parameters.},
note = {https://iris.unito.it/bitstream/2318/1949730/1/HALF_HVL_for_DML_ICC23___Taormina-2.pdf},
keywords = {ai, epi, icsc},
pubstate = {published},
tppubtype = {inproceedings}
}
Bruno Casella, Walter Riviera, Marco Aldinucci, Gloria Menegaz
MERGE: A model for multi-input biomedical federated learning Journal Article
In: Patterns, pp. 100856, 2023, ISSN: 2666-3899.
Abstract | Links | BibTeX | Tags: ai, epi, icsc
@article{23:fl:patterns,
title = {MERGE: A model for multi-input biomedical federated learning},
author = {Bruno Casella and Walter Riviera and Marco Aldinucci and Gloria Menegaz},
url = {https://www.sciencedirect.com/science/article/pii/S2666389923002404},
doi = {10.1016/j.patter.2023.100856},
issn = {2666-3899},
year = {2023},
date = {2023-01-01},
journal = {Patterns},
pages = {100856},
abstract = {Driven by the deep learning (DL) revolution, artificial intelligence (AI) has become a fundamental tool for many biomedical tasks, including analyzing and classifying diagnostic images. Imaging, however, is not the only source of information. Tabular data, such as personal and genomic data and blood test results, are routinely collected but rarely considered in DL pipelines. Nevertheless, DL requires large datasets that often must be pooled from different institutions, raising non-trivial privacy concerns. Federated learning (FL) is a cooperative learning paradigm that aims to address these issues by moving models instead of data across different institutions. Here, we present a federated multi-input architecture using images and tabular data as a methodology to enhance model performance while preserving data privacy. We evaluated it on two showcases: the prognosis of COVID-19 and patients' stratification in Alzheimer's disease, providing evidence of enhanced accuracy and F1 scores against single-input models and improved generalizability against non-federated models.},
keywords = {ai, epi, icsc},
pubstate = {published},
tppubtype = {article}
}