Talks | Parallel Computing
2024
Iacopo Colonnelli
Dynamic hybrid workflows for Deep Learning on HPC infrastructure Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: icsc, jupyter-workflow, streamflow
@misc{24:icolonne:ictp,
title = {Dynamic hybrid workflows for Deep Learning on HPC infrastructure},
author = {Iacopo Colonnelli},
url = {https://datacloud.di.unito.it/index.php/s/EaFHJEKNbW5oXeq},
year = {2024},
date = {2024-05-01},
address = {Trieste, Italy},
abstract = {Hybrid workflow abstractions allow users to quickly design and orchestrate cross-facility workloads, decoupling tasks from environment-specific technical details to reduce complexity and increase reusability. Plus, workflow descriptions help ensure the reproducibility of scientific experiments through prospective and retrospective provenance collection. This module has been designed to provide a hands-on exploration of scientific workflows from various angles, from the initial design phase to their orchestration at extreme scales. We will use the practical example of the CommonWorkflow Language (CWL) open standard to demonstrate how workflows can be written, and the StreamFlow workflow system to execute them seamlessly on the CINECA HPC facility. We will also delve into the integration between scientific workflows and Jupyter Notebooks, which aims to give data scientists a familiar interface to scientific workflows. In this module, students will gain a comprehensive understanding of scientific workflows. They will learn how to use these workflows to model and orchestrate Machine Learning and Deep Learning pipelines. Additionally, they will explore how modern workflow management systems can efficiently scale data-oriented workloads from a researcher’s laptop to an entire HPC facility.},
keywords = {icsc, jupyter-workflow, streamflow},
pubstate = {published},
tppubtype = {misc}
}
2023
Iacopo Colonnelli
Workflow models for heterogeneous distributed systems Miscellaneous
2nd Italian Conference on Big Data and Data Science (ITADATA 2023), 2023, (Best PhD Thesis Award).
Links | BibTeX | Tags: jupyter-workflow, streamflow
@misc{23:ITADATABestPhDThesis,
title = {Workflow models for heterogeneous distributed systems},
author = {Iacopo Colonnelli},
url = {https://datacloud.di.unito.it/index.php/s/6RqcaJ4djqFNDC8},
year = {2023},
date = {2023-09-01},
address = {Napoli, Italy},
howpublished = {2nd Italian Conference on Big Data and Data Science (ITADATA 2023)},
note = {Best PhD Thesis Award},
keywords = {jupyter-workflow, streamflow},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli
UNITO tools presentation Miscellaneous
CN HPC Flagship 3 Working Day, 2023.
Links | BibTeX | Tags: jupyter-workflow, streamflow
@misc{23:FL3WorkingDay,
title = {UNITO tools presentation},
author = {Iacopo Colonnelli},
url = {https://datacloud.di.unito.it/index.php/s/fgHbnLDQSFtcwLd},
year = {2023},
date = {2023-05-01},
address = {Bologna, Italy},
howpublished = {CN HPC Flagship 3 Working Day},
keywords = {jupyter-workflow, streamflow},
pubstate = {published},
tppubtype = {misc}
}
2022
Iacopo Colonnelli, Marco Aldinucci
Hybrid Workflows For Large-Scale Scientific Applications Miscellaneous
6th EAGE High Performance Computing Workshop, 2022.
Abstract | Links | BibTeX | Tags: across, eupex, jupyter-workflow, textarossa
@misc{22:eage,
title = {Hybrid Workflows For Large-Scale Scientific Applications},
author = {Iacopo Colonnelli and Marco Aldinucci},
url = {https://datacloud.di.unito.it/index.php/s/GScPS5LCPdt6Yoo},
year = {2022},
date = {2022-09-01},
address = {Milano, Italy},
abstract = {Large-scale scientific applications are facing an irreversible transition from monolithic, high-performance oriented codes to modular and polyglot deployments of specialised (micro-)services. The reasons behind this transition are many: coupling of standard solvers with Deep Learning techniques, offloading of data analysis and visualisation to Cloud, and the advent of specialised hardware accelerators. Topology-aware Workflow Management Systems (WMSs) play a crucial role. In particular, topology-awareness allows an explicit mapping of workflow steps onto heterogeneous locations, allowing automated executions on top of hybrid architectures (e.g., cloud+HPC or classical+quantum). Plus, topology-aware WMSs can offer non-functional requirements OOTB, e.g. components’ life-cycle orchestration, secure and efficient data transfers, fault tolerance, and cross-cluster execution of urgent workloads. Augmenting interactive Jupyter Notebooks with distributed workflow capabilities allows domain experts to prototype and scale applications using the same technological stack, while relying on a feature-rich and user-friendly web interface. This abstract will showcase how these general methodologies can be applied to a typical geoscience simulation pipeline based on the Full Wavefront Inversion (FWI) technique. In particular, a prototypical Jupyter Notebook will be executed interactively on Cloud. Preliminary data analyses and post-processing will be executed locally, while the computationally demanding optimisation loop will be scheduled on a remote HPC cluster.},
howpublished = {6th EAGE High Performance Computing Workshop},
keywords = {across, eupex, jupyter-workflow, textarossa},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli, Dario Tranchitella
Dossier: multi-tenant distributed Jupyter Notebooks Miscellaneous
DoK Talks 141, 2022, (Invited talk).
Abstract | Links | BibTeX | Tags: across, deephealth, hpc4ai, jupyter-workflow
@misc{22:data-on-kubernetes,
title = {Dossier: multi-tenant distributed Jupyter Notebooks},
author = {Iacopo Colonnelli and Dario Tranchitella},
url = {https://datacloud.di.unito.it/index.php/s/RNqTGmTqWS66qHT},
year = {2022},
date = {2022-07-01},
address = {Virtual event},
abstract = {When providing data analysis as a service, one must tackle several problems. Data privacy and protection by design are crucial when working on sensitive data. Performance and scalability are fundamental for compute-intensive workloads, e.g. training Deep Neural Networks. User-friendly interfaces and fast prototyping tools are essential to allow domain experts to experiment with new techniques. Portability and reproducibility are necessary to assess the actual value of results. Kubernetes is the best platform to provide reliable, elastic, and maintainable services. However, Kubernetes alone is not enough to achieve large-scale multi-tenant reproducible data analysis. OOTB support for multi-tenancy is too rough, with only two levels of segregation (i.e. the single namespace or the entire cluster). Offloading computation to off-cluster resources is non-trivial and requires the user's manual configuration. Also, Jupyter Notebooks per se cannot provide much scalability (they execute locally and sequentially) and reproducibility (users can run cells in any order and any number of times). The Dossier platform allows system administrators to manage multi-tenant distributed Jupyter Notebooks at the cluster level in the Kubernetes way, i.e. through CRDs. Namespaces are aggregated in Tenants, and all security and accountability aspects are managed at that level. Each Notebook spawns into a user-dedicated namespace, subject to all Tenant-level constraints. Users can rely on provisioned resources, either in-cluster worker nodes or external resources like HPC facilities. Plus, they can plug their computing nodes in a BYOD fashion. Notebooks are interpreted as distributed workflows, where each cell is a task that one can offload to a different location in charge of its execution.},
howpublished = {DoK Talks 141},
note = {Invited talk},
keywords = {across, deephealth, hpc4ai, jupyter-workflow},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli, Dario Tranchitella
OpenDeepHealth: Crafting a Deep Learning Platform as a Service with Kubernetes Miscellaneous
J on The Beach 2022, 2022.
Links | BibTeX | Tags: across, deephealth, hpc4ai, jupyter-workflow, streamflow
@misc{22:jotb22,
title = {OpenDeepHealth: Crafting a Deep Learning Platform as a Service with Kubernetes},
author = {Iacopo Colonnelli and Dario Tranchitella},
url = {https://datacloud.di.unito.it/index.php/s/n6J7STNnwdyqtET},
year = {2022},
date = {2022-04-01},
address = {Malaga, Spain},
howpublished = {J on The Beach 2022},
keywords = {across, deephealth, hpc4ai, jupyter-workflow, streamflow},
pubstate = {published},
tppubtype = {misc}
}
Iacopo Colonnelli
Distributed workflows with Jupyter Miscellaneous
J on The Beach 2022, 2022, (Workshop).
Links | BibTeX | Tags: across, deephealth, jupyter-workflow, streamflow
@misc{22:jotb22-workshop,
title = {Distributed workflows with Jupyter},
author = {Iacopo Colonnelli},
url = {https://datacloud.di.unito.it/index.php/s/om89q55S6ePf2Ji},
year = {2022},
date = {2022-04-01},
address = {Malaga, Spain},
howpublished = {J on The Beach 2022},
note = {Workshop},
keywords = {across, deephealth, jupyter-workflow, streamflow},
pubstate = {published},
tppubtype = {misc}
}
2020
Iacopo Colonnelli, Sergio Rabellino
JupyterFlow: Jupyter Notebooks su larga scala Miscellaneous
Workshop GARR 2020, 2020.
Abstract | Links | BibTeX | Tags: deephealth, hpc4ai, jupyter-workflow
@misc{20:GarrWorkshop,
title = {JupyterFlow: Jupyter Notebooks su larga scala},
author = {Iacopo Colonnelli and Sergio Rabellino},
url = {https://datacloud.di.unito.it/index.php/s/ASPEmyXAj5QscgC},
year = {2020},
date = {2020-11-01},
address = {Virtual event},
abstract = {I Jupyter Notebook sono largamente utilizzati sia in ambito industriale che accademico come strumento di didattica, prototipazione e analisi esplorative. Purtroppo il sistema runtime standard di Jupyter non è abbastanza potente per sostenere un carichi di lavoro reali e spesso l'unica soluzione è quella di riscrivere il codice da zero in una tecnologia con supporto HPC. Intrgrando lo stack Jupyter con StreamFlow (https://streamflow.di.unito.it/) è possibile creare i Notebook tramite un'interfaccia web su cloud ed eseguirli in maniera trasparente in remoto su una VM con GPU o su nodi HPC.},
howpublished = {Workshop GARR 2020},
keywords = {deephealth, hpc4ai, jupyter-workflow},
pubstate = {published},
tppubtype = {misc}
}