In recent years, the data available from IoT devices have increased rapidly. Using a machine learning solution to detect faults in these devices requires the release of device data to a central server. However, these data typically contain sensitive information, leading to the need for privacy-preserving distributed machine learning solutions, such as federated learning, where a model is trained locally on the edge device, and only the trained model weights are shared with a central server. Device failure data are typically imbalanced, i.e., the number of failures is minimal compared to the number of normal samples. Therefore, re-balancing techniques are needed to improve the performance of a machine learning model. In this paper, we present FLY-SMOTE, a new approach to re-balance the data in different non-IID scenarios by generating synthetic data for the minority class in supervised learning tasks using a modified SMOTE method. Our approach takes $k$ samples from the minority class and generates $Y$ new synthetic samples based on one of the nearest neighbors of each $k$ sample. An experimental campaign on a real IoT dataset and three well-known public datasets show that the proposed solution improves the balance accuracy without compromising the model’s accuracy.
Beschreibung
FLY-SMOTE: Re-Balancing the Non-IID IoT Edge Devices Data in Federated Learning System | IEEE Journals & Magazine | IEEE Xplore
%0 Journal Article
%1 9800764
%A Younis, Raneen
%A Fisichella, Marco
%D 2022
%J IEEE Access
%K myown
%P 65092-65102
%R 10.1109/ACCESS.2022.3184309
%T FLY-SMOTE: Re-Balancing the Non-IID IoT Edge Devices Data in Federated Learning System
%U https://ieeexplore.ieee.org/document/9800764
%V 10
%X In recent years, the data available from IoT devices have increased rapidly. Using a machine learning solution to detect faults in these devices requires the release of device data to a central server. However, these data typically contain sensitive information, leading to the need for privacy-preserving distributed machine learning solutions, such as federated learning, where a model is trained locally on the edge device, and only the trained model weights are shared with a central server. Device failure data are typically imbalanced, i.e., the number of failures is minimal compared to the number of normal samples. Therefore, re-balancing techniques are needed to improve the performance of a machine learning model. In this paper, we present FLY-SMOTE, a new approach to re-balance the data in different non-IID scenarios by generating synthetic data for the minority class in supervised learning tasks using a modified SMOTE method. Our approach takes $k$ samples from the minority class and generates $Y$ new synthetic samples based on one of the nearest neighbors of each $k$ sample. An experimental campaign on a real IoT dataset and three well-known public datasets show that the proposed solution improves the balance accuracy without compromising the model’s accuracy.
@article{9800764,
abstract = {In recent years, the data available from IoT devices have increased rapidly. Using a machine learning solution to detect faults in these devices requires the release of device data to a central server. However, these data typically contain sensitive information, leading to the need for privacy-preserving distributed machine learning solutions, such as federated learning, where a model is trained locally on the edge device, and only the trained model weights are shared with a central server. Device failure data are typically imbalanced, i.e., the number of failures is minimal compared to the number of normal samples. Therefore, re-balancing techniques are needed to improve the performance of a machine learning model. In this paper, we present FLY-SMOTE, a new approach to re-balance the data in different non-IID scenarios by generating synthetic data for the minority class in supervised learning tasks using a modified SMOTE method. Our approach takes $k$ samples from the minority class and generates $Y$ new synthetic samples based on one of the nearest neighbors of each $k$ sample. An experimental campaign on a real IoT dataset and three well-known public datasets show that the proposed solution improves the balance accuracy without compromising the model’s accuracy.},
added-at = {2022-11-07T10:30:28.000+0100},
author = {Younis, Raneen and Fisichella, Marco},
biburl = {https://www.bibsonomy.org/bibtex/24bd45af784f6e5e47a02539c676e5d92/mfisichella},
description = {FLY-SMOTE: Re-Balancing the Non-IID IoT Edge Devices Data in Federated Learning System | IEEE Journals & Magazine | IEEE Xplore},
doi = {10.1109/ACCESS.2022.3184309},
interhash = {706deeae4daf717857860b85f424e669},
intrahash = {4bd45af784f6e5e47a02539c676e5d92},
issn = {2169-3536},
journal = {IEEE Access},
keywords = {myown},
pages = {65092-65102},
timestamp = {2022-11-07T10:30:28.000+0100},
title = {FLY-SMOTE: Re-Balancing the Non-IID IoT Edge Devices Data in Federated Learning System},
url = {https://ieeexplore.ieee.org/document/9800764},
volume = 10,
year = 2022
}