@article {Nakach2022446, title = {Deep Hybrid AdaBoost Ensembles for Histopathological Breast Cancer Classification}, journal = {Lecture Notes in Networks and Systems}, volume = {468 LNNS}, year = {2022}, note = {cited By 0}, pages = {446-455}, abstract = {Breast cancer (BC) is the most common diagnosed cancer type and one of the top leading causes of death in women worldwide. The early diagnosis of this type of cancer is the main driver of high survival rate. This paper aims to use homogenous ensemble learning and transfer learning for binary classification of BC histological images over the four-magnification factor (MF) values of the BreakHis dataset: 40X, 100X, 200X, and 400X. The proposed ensembles are implemented using a hybrid architecture (HA) that combines: (1) three of the most recent deep learning (DL) techniques as feature extractors (FE): DenseNet_201, Inception_V3, and MobileNet_V2, and (2) the boosting method AdaBoost with Decision Tree (DT) as a base learner. The study evaluated and compared: the ensembles designed with the same HA but with different number of trees (50, 100, 150 and 200), the single DT classifiers with the best AdaBoost ensembles and the best AdaBoost ensembles of each FE over each MF. The empirical evaluations used: four classification performance criteria (accuracy, recall, precision and F1-score), 5-fold cross-validation, Scott Knott (SK) statistical test to select the best cluster of the outperforming models, and Borda Count voting system to rank the best performing ones. Results showed the potential of combining DL techniques for FE and AdaBoost boosting method to classify BC in malignant and benign tumors, furthermore the AdaBoost ensemble constructed using 200 trees, DenseNet_201 as FE and MF 200X achieved the best mean accuracy value with 90.36\%. {\textcopyright} 2022, The Author(s), under exclusive license to Springer Nature Switzerland AG.}, doi = {10.1007/978-3-031-04826-5_45}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85130279063\&doi=10.1007\%2f978-3-031-04826-5_45\&partnerID=40\&md5=59d8d87203f77eed9f4da56cae519226}, author = {Nakach, F.-Z. and Zerouaoui, H. and Idri, A.} } @article {Zerouaoui2022, title = {Deep hybrid architectures for binary classification of medical breast cancer images}, journal = {Biomedical Signal Processing and Control}, volume = {71}, year = {2022}, note = {cited By 16}, abstract = {The diagnosis of breast cancer in the early stages significantly decreases the mortality rate by allowing the choice of adequate treatment. This study developed and evaluated twenty-eight hybrid architectures combining seven recent deep learning techniques for feature extraction (DenseNet 201, Inception V3, Inception ReseNet V2, MobileNet V2, ResNet 50, VGG16, and VGG19), and four classifiers (MLP, SVM, DT, and KNN) for a binary classification of breast pathological images over the BreakHis and FNAC datasets. The designed architectures were evaluated using: (1) four classification performance criteria (accuracy, precision, recall, and F1-score), (2) Scott Knott (SK) statistical test to cluster the proposed architectures and identify the best cluster of the outperforming architectures, and (3) the Borda Count voting method to rank the best performing architectures. The results showed the potential of combining deep learning techniques for feature extraction and classical classifiers to classify breast cancer in malignant and benign tumors. The hybrid architecture using the MLP classifier and DenseNet 201 for feature extraction (MDEN) was the top performing architecture with higher accuracy values reaching 99\% over the FNAC dataset, 92.61\%, 92\%, 93.93\%, and 91.73\% over the four magnification factor values of the BreakHis dataset: 40X, 100X, 200X, and 400X, respectively. The results of this study recommend the use of hybrid architectures using DenseNet 201 for the feature extraction of the breast cancer histological images because it gave the best results for both datasets BreakHis and FNAC, especially when combined with the MLP classifier. {\textcopyright} 2021 Elsevier Ltd}, keywords = {accuracy, algorithm, Article, augmentation index, Binary classification, biopsy technique, Breast Cancer, Breast Cancer Histopathological Image Classification, Classification (of information), Classification algorithm, classifier, Cluster computing, clustering algorithm, Clustering algorithms, colloid carcinoma, Computer aided diagnosis, Computer architecture, construct validity, contrast limited adaptive histogram equalization, Convolutional neural network, Convolutional neural networks, deep hybrid architecture, Deep learning, Deep neural networks, Diseases, ductal carcinoma, external validity, Extraction, F1 score, Feature extraction, Features extraction, feed forward neural network, fibroadenoma, fine needle aspiration biopsy, histogram, Histological images, histology, Hybrid architectures, Image classification, image processing, Images processing, internal validity, learning algorithm, Learning algorithms, Learning techniques, lobular carcinoma, Machine learning, measurement precision, Medical imaging, MLP classifiers, Mortality rate, Network architecture, papillary carcinoma, Pathological images, phyllodes tumor, recall, residual neural network, scoring system, Scott Knott, Support vector machines}, doi = {10.1016/j.bspc.2021.103226}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85125746862\&doi=10.1016\%2fj.bspc.2021.103226\&partnerID=40\&md5=b9b74d0dcb135861bc2e3d820f836efa}, author = {Zerouaoui, H. and Idri, A.} } @article {Lahmar2022, title = {Deep hybrid architectures for diabetic retinopathy classification}, journal = {Computer Methods in Biomechanics and Biomedical Engineering: Imaging and Visualization}, year = {2022}, note = {cited By 0}, abstract = {Diabetic retinopathy (DR) is the most severe ocular complication of diabetes. It leads to serious eye complications such as vision impairment and blindness. A computer-aided diagnosis may help in the early detection of this disease, which increases the chances of treating it efficiently. This paper carried out an empirical evaluation of the performances of 28 deep hybrid architectures for an automatic binary classification of the referable diabetic retinopathy, and compared them to seven end-to-end deep learning (DL) architectures. For the hybrid architectures, we combined seven DL techniques for feature extraction (DenseNet201, VGG16, VGG19, MobileNet_V2, Inception_V3, Inception_ResNet_V2 and ResNet50) and four classifiers (SVM, MLP, DT and KNN). For the end-to-end DL architectures, we used the same techniques used for the feature extraction in the hybrid architectures. The architectures were compared in terms of accuracy, sensitivity, precision and F1-score using the Scott Knott test and the Borda count voting method. All the empirical evaluations were over three datasets: APTOS, Kaggle DR and Messidor-2, using a k-fold cross validation method. The results showed the potential of combining deep learning techniques for feature extraction and classical machine learning techniques to classify referable diabetic retinopathy. The hybrid architecture using the SVM classifier and MobileNet_V2 for feature extraction was the top performing architecture and it was classified with the best performing end-to-end architectures in the best clusters of APTOS, Kaggle DR and Messidor-2 datasets with an accuracy equal to 88.80\%, 84.01\% and 84.05\% respectively. Note that the two end-to-end architectures DenseNet201 and MobileNet_V2 outperformed all the hybrid architectures over the three datasets. However, we recommend the use of the hybrid architecture designed with SVM and MobileNet_V2 since it is promising and less time consuming, and requires less parameter tuning compared to the end-to-end techniques. {\textcopyright} 2022 Informa UK Limited, trading as Taylor \& Francis Group.}, keywords = {Classification (of information), Computer aided diagnosis, Deep learning, Diabetic retinopathy, Empirical evaluations, End to end, Extraction, Eye protection, Feature extraction, Features extraction, Hybrid architectures, Image classification, Learning algorithms, Learning architectures, Learning techniques, Medical image, Medical imaging, Performance, Support vector machines, Vision impairments}, doi = {10.1080/21681163.2022.2060864}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85129262042\&doi=10.1080\%2f21681163.2022.2060864\&partnerID=40\&md5=35c1d7e5976fbdb00e23b4aa456eadc9}, author = {Lahmar, C. and Idri, A.} } @article {ElAlaoui2022435, title = {Deep Stacked Ensemble for Breast Cancer Diagnosis}, journal = {Lecture Notes in Networks and Systems}, volume = {468 LNNS}, year = {2022}, note = {cited By 1}, pages = {435-445}, abstract = {Breast cancer is considered one of the major public health issues and a leading cause of death among women in the world. Its early diagnosis can significantly help to increase the chances of survival rate. Therefore, this study proposes a deep stacking ensemble technique for binary classification of breast histopathological images over the BreakHis dataset. Initially, to form the base learners of the deep stacking ensemble, we trained seven deep learning (DL) techniques based on pre-trained VGG16, VGG19, ResNet50, Inception_V3, Inception_ResNet_V2, Xception, and MobileNet with a 5-fold cross-validation method. Then, a meta-model was built, a logistic regression algorithm that learns how to best combine the predictions of the base learners. Furthermore, to evaluate and compare the performance of the proposed technique, we used: (1) four classification performance criteria (accuracy, precision, recall, and F1-score), and (2) Scott Knott (SK) statistical test to cluster and identify the outperforming models. Results showed the potential of the stacked deep learning techniques to classify breast cancer images into malignant or benign tumor. The proposed deep stacking ensemble reports an overall accuracy of 93.8\%, 93.0\%, 93.3\%, and 91.8\% over the four magnification factors (MF) values of the BreakHis dataset: 40X, 100X, 200X and 400X, respectively. {\textcopyright} 2022, The Author(s), under exclusive license to Springer Nature Switzerland AG.}, doi = {10.1007/978-3-031-04826-5_44}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85130273664\&doi=10.1007\%2f978-3-031-04826-5_44\&partnerID=40\&md5=4246b5750ce0b8a03e4a80001cdfc5e5}, author = {El Alaoui, O. and Zerouaoui, H. and Idri, A.} } @article {Wadghiri2022, title = {Ensemble blood glucose prediction in diabetes mellitus: A review}, journal = {Computers in Biology and Medicine}, volume = {147}, year = {2022}, note = {cited By 0}, abstract = {Considering the complexity of blood glucose dynamics, the adoption of a single model to predict blood glucose level does not always capture the inter- and intra-patients{\textquoteright} context changes. Ensembles are a set of machine learning techniques combining multiple single learners to find a better variance/bias trade-off and hence improve the prediction accuracy. The present paper aims to review the state of the art in predicting blood glucose using ensemble methods with regard to 8 criteria: publication year and sources, datasets used to train/evaluate the models, types of ensembles used, single learners involved to construct ensembles, combination schemes used to aggregate the base learners, metrics and validation methods adopted to assess the performance of ensembles, reported overall performance of the predictors and accuracy comparison of ensemble techniques with single models. A systematic literature review has been conducted in order to analyze and synthetize primary studies published between 2000 and 2020 in six digital libraries. A total of 32 primary papers were selected and reviewed with regard to eight review questions. The results show that ensembles have gained wider interest during the last years and improved in general the performance compared with other single models. However, multiple gaps have been identified concerning the ensembles construction process and the performance metrics used. Several recommendations have been made in this regard to design accurate ensembles for blood glucose level prediction. {\textcopyright} 2022 Elsevier Ltd}, keywords = {algorithm, Algorithms, Blood, Blood glucose, Blood glucose level, Data mining, Diabetes mellitus, Digital libraries, Economic and social effects, Ensemble methods, Forecasting, Glucose, glucose blood level, Glucose dynamics, human, Humans, Machine learning, Machine learning techniques, Machine-learning, Performance, Single models, Trade off}, doi = {10.1016/j.compbiomed.2022.105674}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85132341147\&doi=10.1016\%2fj.compbiomed.2022.105674\&partnerID=40\&md5=05bd59c3726654494a4be346e4e6a682}, author = {Wadghiri, M.Z. and Idri, A. and El Idrissi, T. and Hakkoum, H.} } @article {Nakach20221043, title = {Hybrid deep boosting ensembles for histopathological breast cancer classification}, journal = {Health and Technology}, volume = {12}, number = {6}, year = {2022}, note = {cited By 0}, pages = {1043-1060}, abstract = {Purpose: Breast cancer (BC) is the most common diagnosed cancer type and one of the top leading causes of death in women worldwide. This paper aims to investigate ensemble learning and transfer learning for binary classification of BC histological images over the four-magnification factor (MF) values of the BreakHis dataset: 40X, 100X, 200X, and 400X. Methods: The proposed homogeneous ensembles are implemented using a hybrid architecture that combines: (1) three of the most recent deep learning (DL) techniques for feature extraction: DenseNet_201, MobileNet_V2, and Inception_V3, and (2) four of the most popular boosting methods for classification: AdaBoost (ADB), Gradient Boosting Machine (GBM), LightGBM (LGBM) and XGBoost (XGB) with Decision Tree (DT) as a base learner. The study evaluated and compared: (1) a set of boosting ensembles designed with the same hybrid architecture and different number of trees (50, 100, 150 and 200); (2) different boosting methods, and (3) the single DT classifier with the best boosting ensembles. The empirical evaluations used: four classification performance criteria (accuracy, recall, precision and F1-score), the fivefold cross-validation, Scott Knott statistical test to select the best cluster of the outperforming models, and Borda Count voting system to rank the best performing ones. Results: The best boosting ensemble achieved an accuracy value of 92.52\% and it was constructed using XGB with 200 trees and Inception_V3 as feature extractor (FE). Conclusions: The results showed the potential of combining DL techniques for feature extraction and boosting ensembles to classify BC in malignant and benign tumors. {\textcopyright} 2022, The Author(s) under exclusive licence to International Union for Physical and Engineering Sciences in Medicine (IUPESM).}, doi = {10.1007/s12553-022-00709-z}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85141693839\&doi=10.1007\%2fs12553-022-00709-z\&partnerID=40\&md5=9e968402920eaed7fb6aaa79efc60f1a}, author = {Nakach, F.-Z. and Zerouaoui, H. and Idri, A.} } @article {Miloudi2022191, title = {The Impact of Instance Selection Algorithms on Maintenance Effort Estimation for Open-Source Software}, journal = {Lecture Notes in Networks and Systems}, volume = {470 LNNS}, year = {2022}, note = {cited By 0}, pages = {191-201}, abstract = {Open-source software are very used nowadays in the industry, and the performance of the estimation of their maintenance effort becomes an interesting research topic. In this context, researchers have conducted many open-source software maintenance effort estimation (O-MEE) studies based on statistical and machine learning (ML) techniques for better estimation. This study focuses on the impact of instance selection on the performance of ML techniques in O-MEE, mainly for bug resolution. An empirical study was conducted using three techniques: K-nearest neighbor (kNN), support vector machine (SVM), and multinomial na{\"\i}ve Bayes (MNB) using all-kNN instance selection algorithms on three datasets: Eclipse JDT, Eclipse Platform, and Mozilla Thunderbird datasets. This study reports on a set of 18 experiments and a comparison of the results. The results of this study show that instance selection helped make ML techniques more performant. {\textcopyright} 2022, The Author(s), under exclusive license to Springer Nature Switzerland AG.}, doi = {10.1007/978-3-031-04829-6_17}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85130234489\&doi=10.1007\%2f978-3-031-04829-6_17\&partnerID=40\&md5=7ac0a2d5915327c057f3ec68815ba211}, author = {Miloudi, C. and Cheikhi, L. and Idri, A. and Abran, A.} } @article {Hakkoum2022, title = {Interpretability in the medical field: A systematic mapping and review study}, journal = {Applied Soft Computing}, volume = {117}, year = {2022}, note = {cited By 4}, abstract = {Context: Recently, the machine learning (ML) field has been rapidly growing, mainly owing to the availability of historical datasets and advanced computational power. This growth is still facing a set of challenges, such as the interpretability of ML models. In particular, in the medical field, interpretability is a real bottleneck to the use of ML by physicians. Therefore, numerous interpretability techniques have been proposed and evaluated to help ML gain the trust of its users. Methods: This review was carried out according to the well-known systematic map and review process to analyze the literature on interpretability techniques when applied in the medical field with regard to different aspects: publication venues and publication year, contribution and empirical types, medical and ML disciplines and objectives, ML black-box techniques interpreted, interpretability techniques investigated, their performance and the best performing techniques, and lastly, the datasets used when evaluating interpretability techniques. Results: A total of 179 articles (1994{\textendash}2020) were selected from six digital libraries: ScienceDirect, IEEE Xplore, ACM Digital Library, SpringerLink, Wiley, and Google Scholar. The results showed that the number of studies dealing with interpretability increased over the years with a dominance of solution proposals and experiment-based empirical type. Diagnosis, oncology, and classification were the most frequent medical task, discipline, and ML objective studied, respectively. Artificial neural networks were the most widely used ML black-box techniques investigated for interpretability. Additionally, global interpretability techniques focusing on a specific black-box model, such as rules, were the dominant explanation types, and most of the metrics used to evaluate interpretability were accuracy, fidelity, and number of rules. Moreover, the variety of the techniques used by the selected papers did not allow categorization at the technique level, and the high number of the sum of evaluations (671) of the articles raised a suspicion of subjectivity. Datasets that contained numerical and categorical attributes were the most frequently used in the selected studies. Conclusions: Further effort is needed in disciplines other than diagnosis and classification. Global techniques such as rules are the most used because of their comprehensibility to doctors, but new local techniques should be explored more in the medical field to gain more insights into the model{\textquoteright}s behavior. More experiments and comparisons against existing techniques are encouraged to determine the best performing techniques. Lastly, quantitative evaluation of interpretability and physicians{\textquoteright} implications in interpretability techniques evaluation is highly recommended to evaluate how the techniques will perform in real-world scenarios. It can ensure the soundness of the techniques and help gain trust in black-box models in medical environments. {\textcopyright} 2022 Elsevier B.V.}, keywords = {Black box modelling, Black boxes, Computational power, Computer aided diagnosis, Digital libraries, Explainability, Historical dataset, Interpretability, Machine learning, Medical fields, Neural networks, Systematic mapping, Systematic Review, XAI}, doi = {10.1016/j.asoc.2021.108391}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85122624142\&doi=10.1016\%2fj.asoc.2021.108391\&partnerID=40\&md5=38db4d1f5c417a07d0a3204639e157a2}, author = {Hakkoum, H. and Abnane, I. and Idri, A.} } @article {Benaida2022476, title = {Machine and~Deep Learning Predictive Techniques for~Blood Glucose Level}, journal = {Lecture Notes in Networks and Systems}, volume = {468 LNNS}, year = {2022}, note = {cited By 0}, pages = {476-485}, abstract = {Allowing diabetic patients to predict their BGL is an important task for self-management of their metabolic disease. This allows to avoid hypo or hyperglycaemia by taking appropriate actions. Currently, this is possible due to the development of machine and deep learning techniques which are successfully used in many prediction tasks. This paper evaluates and compares the performances of six ML/DL techniques to forecast BGL predictions; four DL techniques: CNN, LSTM, GRU, DBN and two ML/statistic techniques: SVR, and AR. The evaluation of the performance of the six regressors were in term of four criteria: RMSE, MAE, MMRE, and PRED. In addition, the Scott-Knott were used to evaluate the statistical significance test and to rank the regressors. The results show that AR was the best for 5~min ahead forecasting with a mean of RMSE equal to 8.67~mg/dl. {\textcopyright} 2022, The Author(s), under exclusive license to Springer Nature Switzerland AG.}, doi = {10.1007/978-3-031-04826-5_48}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85130238876\&doi=10.1007\%2f978-3-031-04826-5_48\&partnerID=40\&md5=8889c0cf2c91459b8f32389a846059f5}, author = {Benaida, M. and Abnane, I. and Idri, A. and El Idrissi, T.} } @conference {Miloudi2022, title = {Maintenance Effort Estimation for Open Source Software: Current trends}, booktitle = {CEUR Workshop Proceedings}, volume = {3272}, year = {2022}, note = {cited By 0}, abstract = {Software maintenance of Open Source Software (OSS) has gained more attention in recent years and facilitated by the help of the Internet. Since volunteers in OSS do not record the effort of their contribution in maintenance tasks, researchers have to indirectly estimate the maintenance effort of such software. A review of the published OSS-MEE models has been performed using a set of 65 selected studies in a Systematic Mapping Study (SMS). This study analyses, discusses the state of the art about O-MEE and identifies trends through five additional Mapping Questions (MQs). In summary, various maintenance effort estimation (MEE) models were developed for OSS or industrial software. Researchers have mostly expressed the maintenance effort in terms of bug fixing, bug resolution time and severity in conjunction with bug report attributes. Regression Analysis and Bayesian Networks were most used estimation techniques, Recall, Precision, R2 and F-measure evaluation criteria in addition to k-fold cross validation method. Most of the models were implemented using WEKA, R software and MATLAB. More than half of the selected studies lacked of any validity analysis of their results. Trends are also discussed to identify a set of implications for researchers. {\textcopyright} 2020 Copyright for this paper by its authors.}, keywords = {{\textquoteright}current, Bayesian networks, Bug-fixing, Computer software maintenance, Effort Estimation, Effort estimation model, Industrial software, Maintenance efforts, Maintenance tasks, Mapping, MATLAB, Open source software, Open systems, Open-source softwares, Regression analysis, State of the art, Systematic mapping studies}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85142872497\&partnerID=40\&md5=d5e99ad4babcbb3a4fd5f50001fae51c}, author = {Miloudi, C. and Cheikhi, L. and Abran, A. and Idri, A.} } @article {Zerouaoui20221, title = {A new approach for histological classification of breast cancer using deep hybrid heterogenous ensemble}, journal = {Data Technologies and Applications}, year = {2022}, note = {cited By 0}, pages = {1-34}, abstract = {Purpose: Hundreds of thousands of deaths each year in the world are caused by breast cancer (BC). An early-stage diagnosis of this disease can positively reduce the morbidity and mortality rate by helping to select the most appropriate treatment options, especially by using histological BC images for the diagnosis. Design/methodology/approach: The present study proposes and evaluates a novel approach which consists of 24 deep hybrid heterogenous ensembles that combine the strength of seven deep learning techniques (DenseNet 201, Inception V3, VGG16, VGG19, Inception-ResNet-V3, MobileNet V2 and ResNet 50) for feature extraction and four well-known classifiers (multi-layer perceptron, support vector machines, K-nearest neighbors and decision tree) by means of hard and weighted voting combination methods for histological classification of BC medical image. Furthermore, the best deep hybrid heterogenous ensembles were compared to the deep stacked ensembles to determine the best strategy to design the deep ensemble methods. The empirical evaluations used four classification performance criteria (accuracy, sensitivity, precision and F1-score), fivefold cross-validation, Scott{\textendash}Knott (SK) statistical test and Borda count voting method. All empirical evaluations were assessed using four performance measures, including accuracy, precision, recall and F1-score, and were over the histological BreakHis public dataset with four magnification factors (40{\texttimes}, 100{\texttimes}, 200{\texttimes} and 400{\texttimes}). SK statistical test and Borda count were also used to cluster the designed techniques and rank the techniques belonging to the best SK cluster, respectively. Findings: Results showed that the deep hybrid heterogenous ensembles outperformed both their singles and the deep stacked ensembles and reached the accuracy values of 96.3, 95.6, 96.3 and 94 per cent across the four magnification factors 40{\texttimes}, 100{\texttimes}, 200{\texttimes} and 400{\texttimes}, respectively. Originality/value: The proposed deep hybrid heterogenous ensembles can be applied for the BC diagnosis to assist pathologists in reducing the missed diagnoses and proposing adequate treatments for the patients. {\textcopyright} 2022, Emerald Publishing Limited.}, keywords = {Borda Count, Breast Cancer, Computer aided diagnosis, Convolutional neural network, Convolutional neural networks, Decision trees, Deep convolutional neural network, Deep neural networks, Digital pathologies, Diseases, Empirical evaluations, Ensemble methods, F1 scores, Histological images, Image classification, Images processing, Medical imaging, Nearest neighbor search, Statistical tests, Support vector machines}, doi = {10.1108/DTA-05-2022-0210}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85140306559\&doi=10.1108\%2fDTA-05-2022-0210\&partnerID=40\&md5=52f113ae4cf2b14566d0b92c7140ffe1}, author = {Zerouaoui, H. and Idri, A. and El Alaoui, O.} } @article {Ouifak2022456, title = {Performance-Interpretability Tradeoff of Mamdani Neuro-Fuzzy Classifiers for Medical Data}, journal = {Lecture Notes in Networks and Systems}, volume = {468 LNNS}, year = {2022}, note = {cited By 0}, pages = {456-465}, abstract = {Neuro-fuzzy systems are models that incorporate the learning ability and performance of Artificial Neural Networks (ANNs) with the interpretable reasoning of fuzzy inference systems (FISs). An ANN can learn patterns from data and achieve high accuracy, while a FIS uses linguistic and interpretable rules to match inputs and outputs of the data. Two types of FISs are used the most in literature: Takagi-Sugeno-Kang (TSK) and Mamdani. The main focus of this paper is on the Mamdani neuro-fuzzy systems, notably the Hybrid Neuro-Fuzzy Inference System (HyFIS) and the Neuro-Fuzzy Classifier (NEFCLASS). It aims at evaluating and comparing the two classifiers over two medical datasets to study their performance-interpretability tradeoff. Results show that HyFIS is the best in terms of performance, while NEFCLASS is better in terms of interpretability. As for the performance-interpretability tradeoff, NEFCLASS has the best overall results; it achieves a good performance while being less complicated and more interpretable. {\textcopyright} 2022, The Author(s), under exclusive license to Springer Nature Switzerland AG.}, doi = {10.1007/978-3-031-04826-5_46}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85130272925\&doi=10.1007\%2f978-3-031-04826-5_46\&partnerID=40\&md5=65dc74043fe03a59608e34846f4fd7db}, author = {Ouifak, H. and Idri, A. and Benbriqa, H. and Abnane, I.} } @conference {ElFhel2022309, title = {Quality Evaluation of Mobile GIS for Data Collection}, booktitle = {International Conference on Evaluation of Novel Approaches to Software Engineering, ENASE - Proceedings}, year = {2022}, note = {cited By 0}, pages = {309-316}, abstract = {High-quality software has to fulfil stakeholders{\textquoteright} requirements identified in a requirement engineering process.This paper presents an overview of requirements regarding mobile Geographic Information System for data collection, which have been extracted based on literature, standards and existing apps in the market. The quality model ISO/IEC 25010 was explored using measures stated in the ISO/IEC 25023 standard. A checklist that marks the influence of each requirement on the quality characteristics and sub-characteristics has been established in order to calculate three degrees of the requirements influence on the external product quality of mobile GIS. The result obtained show that requirements related to online data access have the highest impact on the external quality characteristics whereas functional suitability and usability are the most influenced characteristics by the requirements. Copyright {\textcopyright} 2022 by SCITEPRESS {\textendash} Science and Technology Publications, Lda. All rights reserved.}, keywords = {Computer software selection and evaluation, Data acquisition, Data collection, Geographic information systems, High-quality software, Information systems, Information use, ISO Standards, ISO/IEC, ISO/IEC 25010, Mobile GIS, Mobile GIS for data collection, Quality characteristic, Quality control, Quality evaluation, Requirement Engineering, Requirements engineering, Software product quality}, doi = {10.5220/0011033900003176}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85141002348\&doi=10.5220\%2f0011033900003176\&partnerID=40\&md5=fdade3912dd3d890bfb0d23f2879e3f0}, author = {El Fhel, B. and Sardi, L. and Idri, A.} } @article {Nakach20223, title = {Random Forest Based Deep Hybrid Architecture for Histopathological Breast Cancer Images Classification}, journal = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)}, volume = {13376 LNCS}, year = {2022}, note = {cited By 0}, pages = {3-18}, abstract = {Breast cancer is the most common cancer in women worldwide. While the early diagnosis and treatment can significantly reduce the mortality rate, it is a challenging task for pathologists to accurately estimate the cancerous cells and tissues. Therefore, machine learning techniques are playing a significant role in assisting pathologists and improving the diagnosis results. This paper proposes a hybrid architecture that combines: three of the most recent deep learning techniques for feature extraction (DenseNet_201, Inception_V3, and MobileNet_V2) and random forest to classify breast cancer histological images over the BreakHis dataset with its four magnification factors: 40X, 100X, 200X and 400X. The study evaluated and compared: (1) the developed random forest models with their base learners, (2) the designed random forest models with the same architecture but with a different number of trees, (3) the decision tree classifiers with the best random forest models and (4) the best random forest models of each feature extractor. The empirical evaluations used: four classification performance criteria (accuracy, sensitivity, precision and F1-score), 5-fold cross-validation, Scott Knott statistical test, and Borda Count voting method. The best random forest model achieved an accuracy mean value of 85.88\%, and was constructed using 9 trees, 200X as a magnification factor, and Inception_V3 as a feature extractor. The experimental results demonstrated that combining random forest with deep learning models is effective for the automatic classification of malignant and benign tumors using histopathological images of breast cancer. {\textcopyright} 2022, The Author(s), under exclusive license to Springer Nature Switzerland AG.}, keywords = {Breast Cancer, Classification (of information), Decision trees, Deep learning, Diagnosis, Diseases, early diagnosis, Feature extractor, hybrid, Hybrid architectures, Image classification, Images classification, Learning systems, Magnification factors, Medical imaging, Random forest modeling, Random forests, Transfer learning}, doi = {10.1007/978-3-031-10450-3_1}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85135002675\&doi=10.1007\%2f978-3-031-10450-3_1\&partnerID=40\&md5=892757314e050f81e9b0dba117f2d64a}, author = {Nakach, F.-Z. and Zerouaoui, H. and Idri, A.} } @conference {Sardi2022435, title = {A Reusable Catalog of Requirements for Gamified Mobile Health Applications}, booktitle = {International Conference on Evaluation of Novel Approaches to Software Engineering, ENASE - Proceedings}, year = {2022}, note = {cited By 0}, pages = {435-442}, abstract = {The new era of mobile technologies has revolutionized all fundamental human pursuits in that many sectors have been reshaped and tangibly improved, such as education, finance, and healthcare. Referred to as mHealth, the use of mobile technologies to track and improve health outcomes is a rapidly expanding trend. A plethora of mobile health applications (mHealth apps) are being constantly developed thanks to the rapid adoption of mobile devices. From basic exercise trackers to powerful self-management tools, mHealth apps play several roles towards the provision of better healthcare. To induce playful experiences and boost engagement, gamification has proven to be extremely effective with mHealth apps. This paper presents a catalog of reusable requirements for gamified mHealth apps in terms of gamification, functional suitability, usability, and security, Further improvements are intended to be made continuously to the catalog by adding additional requirements scattered from other standards and information sources. Copyright {\textcopyright} 2022 by SCITEPRESS {\textendash} Science and Technology Publications, Lda. All rights reserved.}, keywords = {Biomedical engineering, Catalog, Gamification, Health outcomes, ISO/IEC, ISO/IEC 25010, Management tool, mHealth, Mobile health application, Mobile Technology, Requirement, Self management}, doi = {10.5220/0011071700003176}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85140984297\&doi=10.5220\%2f0011071700003176\&partnerID=40\&md5=dc18bd51fe840506f2a31af133df9fb2}, author = {Sardi, L. and Idri, A. and Redman, L.M. and Alami, H. and Fernandez-Aleman, J.L.} } @article {Kharbouch2022, title = {Reviewing the features and functionalities of contraception mPHRs}, journal = {Health Policy and Technology}, volume = {11}, number = {3}, year = {2022}, note = {cited By 0}, abstract = {Background and objective. Women{\textquoteright}s circumstances and preferences tend to change over times signifying that accessing and choosing a contraceptive method during their reproductive life is crucial. By preventing unintended pregnancies and reducing the need for abortion, effective contraception ensures women{\textquoteright}s well-being and autonomy, improves their social and economic role, and enables them to fully participate in society while supporting the health and development of their communities. The use of mobile applications (apps) for contraception is considered a promising approach by which to facilitate access to a reproductive health service of this nature and spread awareness about contraception. The aim of this paper is, therefore, to assess the features and functionalities of the contraception mPHRs (Mobile Personal Health Records) that are available on both the Android and iOS platforms. Methods. In order to select, extract data from and evaluate the features and functionalities of existing contraception apps, an analysis process based on the well-known Systematic Literature Review (SLR) protocol was conducted for the contraception apps that are available on both the Android and iOS app platforms. The analysis and assessment of selected apps{\textquoteright} features were performed in accordance with a twenty-seven item assessment questionnaire. The latter was developed on the basis of the scientific literature concerning contraception, and a preliminary analysis of the contraception apps currently available. Results. A total of 54 contraception apps were selected, including 38 for Android, 4 for iOS, and 12 for crossed platform apps. The results showed that only 11\% of the apps selected do support the World Health Organization{\textquoteright}s (WHO) medical eligibility criteria for contraception (MEC), and that barely 43\% provide relevant information about contraceptive methods, their efficiency, risks, and contraindications. Conclusion. This study will help users, eHealth apps providers, and developers. Users on the one hand will be able to better select the most appropriate contraceptive apps. On the other hand, both eHealth application providers and developers can thereby: (1) identify the relevant features and functionalities implemented by the top-scoring contraceptive apps, and (2) distinguish contraception-related aspects that require further efforts to be properly addressed and improved. {\textcopyright} 2022}, keywords = {awareness, contraception, Data availability, female, human, information security, medical record, Monitoring, Preferred Reporting Items for Systematic Reviews and Meta-Analyses, questionnaire, recording, reminder system, reproductive health, Review, social support, Systematic Review}, doi = {10.1016/j.hlpt.2022.100633}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85132651990\&doi=10.1016\%2fj.hlpt.2022.100633\&partnerID=40\&md5=b101e386be125c92bddf8bf212de3a39}, author = {Kharbouch, M. and Idri, A. and Fernandez-Aleman, J.L. and Redman, L. and Toval, A. and Stelate, Y.} } @article {El-Ateif2022, title = {Single-modality and joint fusion deep learning for diabetic retinopathy diagnosis}, journal = {Scientific African}, volume = {17}, year = {2022}, note = {cited By 0}, abstract = {The current study evaluated and compared single-modality and joint fusion deep learning approaches for automatic binary classification of diabetic retinopathy (DR) using seven convolutional neural network models (VGG19, ResNet50V2, DenseNet121, InceptionV3, InceptionResNetV2, Xception, and MobileNetV2) over two datasets: APTOS 2019 blindness detection and Messidor-2. The empirical evaluations used (1) six performance metrics (accuracy, sensitivity, specificity, precision, F1-score, and area under the curve), (2) the Scott-Knott Effect Size difference (SK ESD) statistical test to rank and cluster the models based on accuracy, and (3) the Borda count voting method to rank the best models figuring in the first SK ESD cluster, based on sensitivity, specificity, precision, F1-score, and area under the curve. Results showed that the single-modality DenseNet121 and InceptionV3 were the top-performing and less sensitive approaches, with an accuracy of 90.63\% and 75.25\%, respectively. The joint fusion strategy outperformed single-modality techniques across the two datasets, regardless of the modality used, because of the additional information provided by the preprocessed modality to the Fundus. The Fundus modality was the most favorable modality for DR diagnosis using the seven models. Furthermore, the joint fusion VGG19 model performed best with an accuracy of 97.49\% and 91.20\% over APTOS19 and Messidor-2, respectively; as the VGG19 model was fine-tuned in comparison to the remaining six models. In comparison with state-of-the-art models, Attention Fusion, and Cascaded Framework, joint fusion VGG19 ranks below the Attention Fusion network and outperforms the Cascaded Framework on the Messidor dataset by 5.6\% and 8\%, respectively. {\textcopyright} 2022 The Authors}, doi = {10.1016/j.sciaf.2022.e01280}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85134929351\&doi=10.1016\%2fj.sciaf.2022.e01280\&partnerID=40\&md5=9519f7afe80a2afb76cceae701940911}, author = {El-Ateif, S. and Idri, A.} } @article {Idlahcen2022466, title = {Systematic Map of Data Mining for Gynecologic Oncology}, journal = {Lecture Notes in Networks and Systems}, volume = {468 LNNS}, year = {2022}, note = {cited By 0}, pages = {466-475}, abstract = {Gynecologic cancers are a significant cause of morbidity and mortality among women both in developed and low- middle- income countries. To alleviate the burden, the application of Data Mining (DM) in gynecologic oncology is needed in clinical environments. This study presents a systematic mapping to explore in detail the breadth of the available literature on the use of DM in gynecologic oncology. The mapping questions and the PICO framework served to determine the search string of this systematic map. The resultant was conducted on five well-known databases, PubMed, IEEE Xplore, ScienceDirect, Springer Link, and Google Scholar, to catch relevant articles published between 2011, and mid of 2021. Of the 2,807 potential records, 169 studies fulfilled the inclusion/exclusion criteria and were in-depth analyzed. The findings revealed that DM efforts peaked considerably from 2019 in terms of cervical cancer screening and diagnosis. Further studies are needed to investigate a wider range of research questions as gynecologic oncology is a very rich field with a collection of distinct features cancers which, in turn, allow Machine Learning (ML) opportunities. {\textcopyright} 2022, The Author(s), under exclusive license to Springer Nature Switzerland AG.}, doi = {10.1007/978-3-031-04826-5_47}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85130252805\&doi=10.1007\%2f978-3-031-04826-5_47\&partnerID=40\&md5=62c013d4962d32086190ef98c3ae36e7}, author = {Idlahcen, F. and Idri, A.} } @article {Zizaan2022425, title = {Systematic Map of Machine Learning Based Breast Cancer Screening}, journal = {Lecture Notes in Networks and Systems}, volume = {468 LNNS}, year = {2022}, note = {cited By 0}, pages = {425-434}, abstract = {Although Breast Cancer (BC) deaths have decreased over time, it is still the second largest cause of cancer death among women. With the technical revolution of Artificial Intelligence (AI), and the big healthcare data that is becoming more of a reality, many researchers have attempted to employ Machine Learning (ML) techniques to gain a better understanding of this disease. The present paper is a systematic mapping study of the application of ML techniques in Breast Cancer Screening (BCS) between the years 2011 and early 2021. Out of 129 candidate papers we retrieved from six digital libraries, a total of 66 papers were selected according to 5 criteria: year and publication venue, paper type, BCS modality, and empirical type. The results show that classification was the most used ML objective, and that mammography was the most frequent BCS modality used. {\textcopyright} 2022, The Author(s), under exclusive license to Springer Nature Switzerland AG.}, doi = {10.1007/978-3-031-04826-5_43}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85130248734\&doi=10.1007\%2f978-3-031-04826-5_43\&partnerID=40\&md5=4017a93b47d63e929eb5d2e71e97f5f0}, author = {Zizaan, A. and Idri, A.} } @article {L{\'o}pez-Jim{\'e}nez2022, title = {Taking the pulse of a classroom with a gamified audience response system}, journal = {Computer Methods and Programs in Biomedicine}, volume = {213}, year = {2022}, note = {cited By 2}, abstract = {{Background and objective: This paper presents an empirical study of a gamified mobile-based assessment approach that can be used to engage students and improve their educational performance. Method: A gamified audience response system called G-SIDRA was employed. Three gamification elements were used to motivate students in classroom activities: badges for achievements to increase engagement, points to indicate progression and performance in the subject and ranking for promoting competitiveness. A total of 90 medical students in a General and Descriptive Anatomy of the Locomotor System course were taught using G-SIDRA in the academic year 2019/2020. Smart bracelets were configured to collect heart rate measurements from 30 students with the aim of evaluating the impact of the gamification elements. The control group consisted of a sample of 110 students enrolled on the same course in the academic year 2016/2017 using non-gamified SIDRA. Results: Statistically significant differences were found between multiple choice questions (MCQ) scores obtained by using SIDRA and G-SIDRA in the four experiments (U = 1.621,50, p < 0,01 for Exp1; U = 1.950,00, p < 0,01 for Exp2; U = 955,00, p < 0,01 for Exp3; U = 2.335,00, p < 0,01 for Exp4). In the students{\textquoteright} final exam grades, statistically significant differences between students that used G-SIDRA as opposed to SIDRA (T(157) = 3.992; p = 0.044) were obtained. Concerning gamification elements, statistically significantly differences were found in comparing the pulse increases after and before the badge event in the four experiments (U = 2.484,00}, keywords = {academic achievement, Article, Audience response systems, controlled study, E - learning, e-learning, Empirical studies, empiricism, Gamification, Gamified audience response system, Heart, heart rate, heart rate measurement, Heart rate variations, Heart-rate, human, Human anatomy, human experiment, Humans, Learning systems, Likert scale, major clinical study, medical student, motivation, multiple choice test, Musculoskeletal system, Performance, satisfaction, Statistically significant difference, Students, theoretical study}, doi = {10.1016/j.cmpb.2021.106459}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85118787418\&doi=10.1016\%2fj.cmpb.2021.106459\&partnerID=40\&md5=6338f4b58ed89528e2663ca12be4c986}, author = {L{\'o}pez-Jim{\'e}nez, J.J. and Fernandez-Aleman, J.L. and Gonz{\'a}lez, L.L. and Sequeros, O.G. and Valle, B.M. and Garc{\'\i}a-Bern{\'a}, J.A. and Idri, A. and Toval, A.} } @article {Elmidaoui2022202, title = {Towards a Taxonomy of Software Maintainability Predictors: A Detailed View}, journal = {Lecture Notes in Networks and Systems}, volume = {470 LNNS}, year = {2022}, note = {cited By 0}, pages = {202-210}, abstract = {To help practitioners and researchers choose the most suitable predictors when selecting from existing Software Product Maintainability Prediction (SPMP) models or designing new ones, a literature review of empirical studies on SPMP identified a large number of metrics or factors used as predictors of maintainability. However, there is a redundancy and ambiguity in both the naming and meaning of these predictors. To address this terminology issue, a one-level taxonomy of the SPMP predictors identified in the literature review have been proposed. This paper now proposes a more detailed two-level taxonomy where the first level refers to four categories, namely, software design, software size, quality attributes (or factors), and software process, the second to sub-categories, and predictors inventoried from empirical studies on SPMP. {\textcopyright} 2022, The Author(s), under exclusive license to Springer Nature Switzerland AG.}, doi = {10.1007/978-3-031-04829-6_18}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85130308935\&doi=10.1007\%2f978-3-031-04829-6_18\&partnerID=40\&md5=e8e2d1411e399bbdb041c3d9d26ccad6}, author = {Elmidaoui, S. and Cheikhi, L. and Idri, A. and Abran, A.} } @article {Benhar2022791, title = {Univariate and Multivariate Filter Feature Selection for Heart Disease Classification}, journal = {Journal of Information Science and Engineering}, volume = {38}, number = {4}, year = {2022}, note = {cited By 0}, pages = {791-803}, abstract = {Feature selection (FS) is a data preprocessing task that can be applied before the classification phase, and aims at improving the performance and interpretability of classifiers by finding only a few highly informative features. The present study aims at evaluating and comparing the performances of six univariate and two multivariate filter FS techniques for heart disease classification. The FS techniques were evaluated with two white-box and two black-box classification techniques using five heart disease datasets. Furthermore, this study deals with the setting of the hyperparameters{\textquoteright} values of the four classifiers. This study evaluates 600 variants of classifiers. Results show that white-box classification techniques such as K-Nearest Neighbors and Decision Trees can be very competitive with black-box ones when hyperparameters{\textquoteright} optimization and feature selection were applied. {\textcopyright} 2022 Institute of Information Science. All rights reserved.}, keywords = {Black boxes, Cardiology, Classification (of information), Data preprocessing, Decision trees, disease classification, Diseases, feature selection, Features selection, Filter, Heart, heart disease, Nearest neighbor search, Performance, Selection techniques, Univariate, White box}, doi = {10.6688/JISE.202207_38(4).0006}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85137170374\&doi=10.6688\%2fJISE.202207_38\%284\%29.0006\&partnerID=40\&md5=f75bca8ff78e6782d4c11e88d338784f}, author = {Benhar, H. and Hosni, M. and Idri, A.} } @article {Lahmar202289, title = {On the value of deep learning for diagnosing diabetic retinopathy}, journal = {Health and Technology}, volume = {12}, number = {1}, year = {2022}, note = {cited By 3}, pages = {89-105}, abstract = {Diabetic retinopathy (DR) is one of the main causes of vision loss around the world. The early diagnosis of this disease can help in treating it efficiently. Deep learning (DL) is rapidly becoming the state of the art, leading to enhanced performance in various medical applications such as diabetic retinopathy and breast cancer. In this paper, we conduct an empirical evaluation of seven convolutional neural networks (CNN) architectures for an automatic binary classification of the referable diabetic retinopathy; the DL architectures (Inception_ResNet_V2, Inception_V3, ResNet50, VGG16, VGG19, MobileNet_V2 and DenseNet201) were evaluated and compared in terms of accuracy, sensitivity, specificity, precision and F1-score using the Scott Knott test and the Borda count voting method. All the empirical evaluations were over three datasets: APTOS, Kaggle DR and the Messidor-2, using a k-fold cross validation method. Experiments showed the importance of using deep learning in the classification of DR since the seven models gave a high accuracy values. Furthermore, DenseNet201 and mobileNet_V2 were the top two performing techniques respectively. DenseNet201 provided the best performance for the Kaggle and Messidor-2 datasets with an accuracy equal to 84.74\% and 85.79\% respectively. MobileNet_V2 provided the best performance in the APTOS dataset with an accuracy equal to 93.09\%. As for the ResNet50, Inception_V3 and Inception_ResNet_V2, they were the worst performing compared to the other DL techniques. Therefore, we recommend the use of DenseNet201 and MobileNet_V2 for the detection of the referable DR since they provided the best performances on the three datasets. {\textcopyright} 2021, IUPESM and Springer-Verlag GmbH Germany, part of Springer Nature.}, keywords = {Article, Binary classification, comparative effectiveness, controlled study, Convolutional neural network, Deep learning, densenet201, Diabetic retinopathy, diagnostic accuracy, diagnostic test accuracy study, diagnostic value, human, inception resnet v2, inception v3, k fold cross validation, mobilenet v2, residual neural network, resnet50, sensitivity and specificity, vgg16, vgg19}, doi = {10.1007/s12553-021-00606-x}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85116986540\&doi=10.1007\%2fs12553-021-00606-x\&partnerID=40\&md5=0a388e86c6b6b49bc1f026a6defbfa2d}, author = {Lahmar, C. and Idri, A.} } @article {Hakkoum2021587, title = {Assessing and Comparing Interpretability Techniques for Artificial Neural Networks Breast Cancer Classification}, journal = {Computer Methods in Biomechanics and Biomedical Engineering: Imaging and Visualization}, volume = {9}, number = {6}, year = {2021}, note = {cited By 11}, pages = {587-599}, abstract = {Breast cancer is the most common type of cancer among women. Thankfully, early detection and treatment improvements helped decrease the number of deaths. Data Mining techniques have always assisted BC tasks whether it is screening, diagnosis, prognosis, treatment, monitoring, and/or management. Nowadays, the use of Data Mining is witnessing a new era. In fact, the main objective is no longer to replace humans but to enhance their capabilities, which is why Artificial Intelligence is now referred to as Intelligence Augmentation. In this context, interpretability is used to help domain experts learn new patterns and machine learning experts debug their models. This paper aims to investigate three black-boxes interpretation techniques: Feature Importance, Partial Dependence Plot, and LIME when applied to two types of feed-forward Artificial Neural Networks: Multilayer perceptrons, and Radial Basis Function Network, trained on the Wisconsin Original dataset for breast cancer diagnosis. Results showed that local LIME explanations were instance-level interpretations that came in line with the global interpretations of the other two techniques. Global/local interpretability techniques can thus be combined to define the trustworthiness of a black-box model. {\textcopyright} 2021 Informa UK Limited, trading as Taylor \& Francis Group.}, keywords = {Article, Artificial intelligence, artificial neural network, Breast Cancer, Breast cancer classifications, cancer diagnosis, Computer aided diagnosis, cross validation, Data mining, Data-mining techniques, Diseases, Domain experts, early diagnosis, entropy, Explainability, Feature importance, Interpretability, Learn+, learning, learning algorithm, Lime, Machine learning, Multilayer neural networks, nerve cell, nonhuman, Partial dependence plot, perceptron, prediction, prognosis, Radial basis function networks, Treatment monitoring}, doi = {10.1080/21681163.2021.1901784}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85103249025\&doi=10.1080\%2f21681163.2021.1901784\&partnerID=40\&md5=78e1e57a62692bab2b39984182af7904}, author = {Hakkoum, H. and Idri, A. and Abnane, I.} } @article {Zerouaoui2021186, title = {Breast Fine Needle Cytological Classification Using Deep Hybrid Architectures}, journal = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)}, volume = {12950 LNCS}, year = {2021}, note = {cited By 6}, pages = {186-202}, abstract = {Diagnosis of breast cancer in the early stages allows to significantly decrease the mortality rate by allowing to choose the adequate treatment. This paper develops and evaluates twenty-eight hybrid architectures combining seven recent deep learning techniques for feature extraction (DenseNet 201, Inception V3, Inception ReseNet V2, MobileNet V2, ResNet 50, VGG16 and VGG19), and four classifiers (MLP, SVM, DT and KNN) for binary classification of breast cytological images over the FNAC dataset. To evaluate the designed architectures, we used: (1) four classification performance criterias (accuracy, precision, recall and F1-score), (1) Scott Knott (SK) statistical test to cluster the developed architectures and identify the best cluster of the outperforming architectures, and (2) Borda Count voting method to rank the best performing architectures. Results showed the potential of combining deep learning techniques for feature extraction and classical classifiers to classify breast cancer in malignant and benign tumors. The hybrid architectures using MLP classifier and DenseNet 201 for feature extraction were the top performing architectures with a higher accuracy value reaching 99\% over the FNAC dataset. As results, the findings of this study recommend the use of the hybrid architectures using DenseNet 201 for the feature extraction of the breast cancer cytological images since it gave the best results for the FNAC data images, especially if combined with the MLP classifier. {\textcopyright} 2021, Springer Nature Switzerland AG.}, keywords = {Binary classification, Breast Cancer, Classification (of information), Classification performance, Cluster computing, Clustering algorithms, Computer aided diagnosis, Computer architecture, Convolutional neural networks, Deep neural networks, Diseases, Extraction, Feature extraction, Features extraction, Histological images, Hybrid architectures, Image classification, Images processing, Learning algorithms, Learning techniques, Medical imaging, MLP classifiers, Mortality rate, Network architecture, Support vector machines}, doi = {10.1007/978-3-030-86960-1_14}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85125258661\&doi=10.1007\%2f978-3-030-86960-1_14\&partnerID=40\&md5=a4535504f03d3143ae1648f7edeb152c}, author = {Zerouaoui, H. and Idri, A. and Nakach, F.Z. and Hadri, R.E.} } @article {ElOuassif202150, title = {Classification techniques in breast cancer diagnosis: A systematic literature review}, journal = {Computer Methods in Biomechanics and Biomedical Engineering: Imaging and Visualization}, volume = {9}, number = {1}, year = {2021}, note = {cited By 12}, pages = {50-77}, abstract = {Data mining (DM) consists in analysing a~set of observations to find unsuspected relationships and then summarising the data in new ways that are both understandable and useful. It has become widely used in various medical fields including breast cancer (BC), which is the most common cancer and the leading cause of death among women~worldwide.~BC diagnosis is a~challenging medical task and many studies have attempted to apply classification techniques to it. The objective of the present study is to identify studies on classification techniques~in~BC diagnosis and to analyse them from~three perspectives: classification techniques used, accuracy of the classifiers, and comparison of performance. We performed a~systematic literature review (SLR) of 176 selected studies published between January~2000 and November~2018. The results show that, of the nine classification techniques investigated, artificial neural networks, support vector machines and decision trees were the most frequently used. Moreover, artificial neural networks, support vector machines and ensemble classifiers performed better than the other techniques, with median accuracy values of 95\%, 95\% and 96\% respectively. Most of the selected studies (57.4\%) used datasets containing different types of images such as mammographic, ultrasound, and microarray images. {\textcopyright} 2021 Informa UK Limited, trading as Taylor \& Francis Group.}, keywords = {Article, Artificial intelligence, artificial neural network, Breast Cancer, Breast cancer diagnosis, cancer diagnosis, cause of death, Causes of death, Classification (of information), Classification technique, Comparison of performance, Computer aided diagnosis, data extraction, Data mining, data synthesis, decision tree, Decision trees, Diseases, human, k nearest neighbor, Machine learning, Medical fields, Medical informatics, Network support, Neural networks, publication, qualitative research, Quality control, support vector machine, Support vector machine classifiers, Support vector machines, Support vectors machine, Systematic literature review, Systematic Review, validity}, doi = {10.1080/21681163.2020.1811159}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85098842973\&doi=10.1080\%2f21681163.2020.1811159\&partnerID=40\&md5=9a48998b1c44d263863efcfb25f9920f}, author = {ElOuassif, B. and Idri, A. and Hosni, M. and Abran, A.} } @article {Benbriqa2021588, title = {Deep and Ensemble Learning Based Land Use and Land Cover Classification}, journal = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)}, volume = {12951 LNCS}, year = {2021}, note = {cited By 1}, pages = {588-604}, abstract = {Monitoring of Land use and Land cover (LULC) changes is a highly encumbering task for humans. Therefore, machine learning based classification systems can help to deal with this challenge. In this context, this study evaluates and compares the performance of two Single Learning (SL) techniques and one Ensemble Learning (EL) technique. All the empirical evaluations were over the open source LULC dataset proposed by the German Center for Artificial Intelligence (EuroSAT), and used the performance criteria -accuracy, precision, recall, F1 score and change in accuracy for the EL classifiers-. We firstly evaluate the performance of SL techniques: Building and optimizing a Convolutional Neural Network architecture, implementing Transfer learning, and training Machine learning algorithms on visual features extracted by Deep Feature Extractors. Second, we assess EL techniques and compare them with SL classifiers. Finally, we compare the capability of EL and hyperparameter tuning to improve the performance of the Deep Learning models we built. These experiments showed that Transfer learning is the SL technique that achieves the highest accuracy and that EL can indeed outperform the SL classifiers. {\textcopyright} 2021, Springer Nature Switzerland AG.}, keywords = {Classification (of information), Convolutional neural networks, Deep feature extraction, Deep learning, Ensemble learning, Features extraction, Hyper-parameter optimizations, Land cover, Land use, Learning algorithms, Learning classifiers, Learning techniques, Network architecture, Performance, Transfer learning}, doi = {10.1007/978-3-030-86970-0_41}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85115689890\&doi=10.1007\%2f978-3-030-86970-0_41\&partnerID=40\&md5=910871e0f58b4f00460e5e2509929a23}, author = {Benbriqa, H. and Abnane, I. and Idri, A. and Tabiti, K.} } @article {L{\'o}pez-Jim{\'e}nez2021, title = {Effects of gamification on the benefits of student response systems in learning of human anatomy: Three experimental studies}, journal = {International Journal of Environmental Research and Public Health}, volume = {18}, number = {24}, year = {2021}, note = {cited By 2}, abstract = {{This paper presents three experiments to assess the impact of gamifying an audience response system on the perceptions and educational performance of students. An audience response system called SIDRA (Immediate Audience Response System in Spanish) and two audience response systems with gamification features, R-G-SIDRA (gamified SIDRA with ranking) and RB-G-SIDRA (gamified SIDRA with ranking and badges), were used in a General and Descriptive Human Anatomy course. Students participated in an empirical study. In the academic year 2019{\textendash}2020, a total of 90 students used RB-G-SIDRA, 90 students employed R-G-SIDRA in the academic year 2018{\textendash}2019, and 92 students used SIDRA in the academic year 2017{\textendash}2018. Statistically significant differences were found between final exam grades obtained by using RB-G-SIDRA and SIDRA}, keywords = {academic achievement, academic performance, anatomy, experiment, Gamification, human, Humans, learning, motivation, perception, student, Students}, doi = {10.3390/ijerph182413210}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85121118668\&doi=10.3390\%2fijerph182413210\&partnerID=40\&md5=29b9263d7b8db439936394c1f2826a10}, author = {L{\'o}pez-Jim{\'e}nez, J.J. and Fernandez-Aleman, J.L. and Garc{\'\i}a-Bern{\'a}, J.A. and Gonz{\'a}lez, L.L. and Sequeros, O.G. and Ros, J.N. and Carrillo de Gea, J.M. and Idri, A. and Toval, A.} } @article {CHLIOUI20211039, title = {Ensemble case based reasoning imputation in breast cancer classification}, journal = {Journal of Information Science and Engineering}, volume = {37}, number = {5}, year = {2021}, note = {cited By 1}, pages = {1039-1051}, abstract = {Missing Data (MD) is a common drawback that affects breast cancer classification. Thus, handling missing data is primordial before building any breast cancer classifier. This paper presents the impact of using ensemble Case-Based Reasoning (CBR) imputation on breast cancer classification. Thereafter, we evaluated the influence of CBR using parameter tuning and ensemble CBR (E-CBR) with three missingness mechanisms (MCAR: Missing completely at random, MAR: Missing at random and NMAR: not missing at random) and nine percentages (10\% to 90\%) on the accuracy rates of five classifiers: Decision trees, Random forest, K-nearest neighbor, Support vector machine and Multi-layer perceptron over two Wisconsin breast cancer datasets. All experiments were implemented using Weka JAVA API code 3.8; SPSS v20 was used for statistical tests. The findings confirmed that E-CBR yields to better results compared to CBR for the five classifiers. The MD percentage affects negatively the classifier performance: As the MD percentage increases, the accuracy rates of the classifier decrease regardless the MD mechanism and technique. RF with E-CBR outperformed all the other combinations (MD technique, classifier) with 89.72\% for MCAR, 87.08\% for MAR and 86.84\% for NMAR. {\textcopyright} 2021 Institute of Information Science. All rights reserved.}, keywords = {Accuracy rate, Breast Cancer, Breast cancer classifications, Cancer classifier, Case based reasoning, Case-based reasoning imputation, Casebased reasonings (CBR), Classification (of information), Data handling, Decision trees, Diseases, Ensemble, Missing at randoms, Missing data, Nearest neighbor search, Parameters tuning, Support vector machines}, doi = {10.6688/JISE.202109_37(5).0004}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85115966179\&doi=10.6688\%2fJISE.202109_37\%285\%29.0004\&partnerID=40\&md5=97c15046a8900f9df38ec3430801c844}, author = {Chlioui, I. and Idri, A. and Abnane, I. and EZZAT, M.} } @article {Wadghiri2021544, title = {Ensemble Regression for Blood Glucose Prediction}, journal = {Advances in Intelligent Systems and Computing}, volume = {1365 AIST}, year = {2021}, note = {cited By 1}, pages = {544-554}, abstract = {Background: Predicting blood glucose present commonly many challenges when the designed models are tested under different contexts. Ensemble methods are a set of learning algorithms that have been successfully used in many medical fields to improve the prediction accuracy. This paper aims to review the typology of ensembles used in literature to predict blood glucose. Methods: 32 papers published between 2000 and 2020 in 6 digital libraries were selected and reviewed with regard to: years and publication sources, integrated factors and data sources used to collect the data and types of ensembles. Results: This review results found that this research topic is still recent but is gaining a growing interest in the last years. Ensemble models used often blood glucose, insulin, diet and exercise as input to predict blood glucose. Moreover, both homogeneous and heterogeneous ensembles have been investigated. Conclusions: An increasing interest have been devoted to blood glucose prediction using ensemble methods during the last decade. Several gaps regarding the design of the reviewed ensembles and the data collection process have been identified and recommendations have been formulated in this direction. {\textcopyright} 2021, The Author(s), under exclusive license to Springer Nature Switzerland AG.}, keywords = {Blood, Data acquisition, Data collection process, Designed models, Digital libraries, Ensemble methods, Ensemble models, Forecasting, Glucose, Heterogeneous ensembles, Information systems, Information use, Learning algorithms, Medical fields, Prediction accuracy, Research topics}, doi = {10.1007/978-3-030-72657-7_52}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85105963359\&doi=10.1007\%2f978-3-030-72657-7_52\&partnerID=40\&md5=4ba83ca8e7ba5bae5fdcd172b58fb8e9}, author = {Wadghiri, M.Z. and Idri, A. and Idrissi, T.E.} } @article {Amazal2021, title = {Estimating software development effort using fuzzy clustering-based analogy}, journal = {Journal of Software: Evolution and Process}, volume = {33}, number = {4}, year = {2021}, note = {cited By 3}, abstract = {During the past decades, many studies have been carried out in an attempt to build accurate software development effort estimation techniques. However, none of the techniques proposed has proven to be successful at predicting software effort in all circumstances. Among these techniques, analogy-based estimation has gained significant popularity within software engineering community because of its outstanding performance and ability to mimic the human problem solving approach. One of the challenges facing analogy-based effort estimation is how to predict effort when software projects are described by a mixture of continuous and categorical features. To address this issue, the present study proposes an improvement of our former 2FA-kprototypes technique referred to as 2FA-cmeans. 2FA-cmeans uses a clustering technique, called general fuzzy c-means, which is a generalization of the fuzzy c-means clustering technique to cluster objects with mixed attributes. The performance of 2FA-cmeans was evaluated and compared with that of our former 2FA-kprototypes technique as well as classical analogy over six datasets that are quite diverse and have different sizes. Empirical results showed that 2FA-cmeans outperforms the two other analogy techniques using both all-in and jackknife evaluation methods. This was also confirmed by the win{\textendash}tie{\textendash}loss statistics and the Scott{\textendash}Knott test. {\textcopyright} 2020 John Wiley \& Sons, Ltd.}, keywords = {Analogy based estimation, Categorical features, Cluster analysis, Clustering techniques, Engineering community, Estimating software, Fuzzy C means clustering, Fuzzy clustering, Fuzzy systems, Human problem solving, Software design, Software development effort}, doi = {10.1002/smr.2324}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85092446413\&doi=10.1002\%2fsmr.2324\&partnerID=40\&md5=76da5a67bd24c945c3aa985d13dad134}, author = {Amazal, F.A. and Idri, A.} } @article {ElIdrissi2021347, title = {Evaluating a Comparing Deep Learning Architectures for Blood Glucose Prediction}, journal = {Communications in Computer and Information Science}, volume = {1400 CCIS}, year = {2021}, note = {cited By 0}, pages = {347-365}, abstract = {To manage their disease, diabetic patients need to control the blood glucose level (BGL) by monitoring it and predicting its future values. This allows to avoid high or low BGL by taking recommended actions in advance. In this paper, we conduct a comparative study of two emerging deep learning techniques: Long-Short-Term Memory (LSTM) and Convolutional Neural Networks (CNN) for one-step and multi-steps-ahead forecasting of the BGL based on Continuous Glucose Monitoring (CGM) data. The objectives are twofold: 1) Determining the best strategies of multi-steps-ahead forecasting (MSF) to fit the CNN and LSTM models respectively, and 2) Comparing the performances of the CNN and LSTM models for one-step and multi-steps prediction. Toward these objectives, we firstly conducted series of experiments of a CNN model through parameters selection to determine its best configuration. The LSTM model we used in the present study was developed and evaluated in an earlier work. Thereafter, five MSF strategies were developed and evaluated for the CNN and LSTM models using the Root-Mean-Square Error (RMSE) with an horizon of 30~min. To statistically assess the differences between the performances of CNN and LSTM models, we used the Wilcoxon statistical test. The results showed that: 1) no MSF strategy outperformed the others for both CNN and LSTM models, and 2) the proposed CNN model significantly outperformed the LSTM model for both one-step and multi-steps prediction. {\textcopyright} 2021, Springer Nature Switzerland AG.}, keywords = {Biomedical engineering, Blood, Blood glucose level, Comparative studies, Continuous glucosemonitoring (CGM), Convolutional neural networks, Deep learning, Diabetic patient, Disease control, Forecasting, Glucose, Learning architectures, Learning techniques, Long short-term memory, Mean square error, Parameters selection, Root mean square errors}, doi = {10.1007/978-3-030-72379-8_17}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85107268637\&doi=10.1007\%2f978-3-030-72379-8_17\&partnerID=40\&md5=2b36d3e75ce8d317c8b80f3272f4d956}, author = {El Idrissi, T. and Idri, A.} } @article {ElHafidy2021, title = {Gamified mobile applications for improving driving behavior: A systematic mapping study}, journal = {Mobile Information Systems}, volume = {2021}, year = {2021}, note = {cited By 4}, abstract = {Many research works and official reports approve that irresponsible driving behavior on the road is the main cause of accidents. Consequently, responsible driving behavior can significantly reduce accidents{\textquoteright} number and severity. Therefore, in the research area as well as in the industrial area, mobile technologies are widely exploited in assisting drivers in reducing accident rates and preventing accidents. For instance, several mobile apps are provided to assist drivers in improving their driving behavior. Recently and thanks to mobile cloud computing, smartphones can benefit from the computing power of servers in the cloud for executing machine learning algorithms. Therefore, many mobile applications of driving assistance and control are based on machine learning techniques to adjust their functioning automatically to driver history, context, and profile. Additionally, gamification is a key element in the design of these mobile applications that allow drivers to develop their engagement and motivation to improve their driving behavior. To have an overview concerning existing mobile apps that improve driving behavior, we have chosen to conduct a systematic mapping study about driving behavior mobile apps that exist in the most common mobile apps repositories or that were published as research works in digital libraries. In particular, we should explore their functionalities, the kinds of collected data, the used gamification elements, and the used machine learning techniques and algorithms. We have successfully identified 220 mobile apps that help to improve driving behavior. In this work, we will extract all the data that seem to be useful for the classification and analysis of the functionalities offered by these applications. {\textcopyright} 2021 Abderrahim El hafidy et al.}, keywords = {Accidents, Computing power, Digital libraries, Driving assistance, Driving behavior, Gamification, Industrial area, Industrial research, Learning algorithms, Machine learning, Machine learning techniques, Mapping, Mobile applications, Mobile cloud computing, Mobile computing, Mobile Technology, Systematic mapping studies}, doi = {10.1155/2021/6677075}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85114117887\&doi=10.1155\%2f2021\%2f6677075\&partnerID=40\&md5=6ad1b624deabb49917039716eb133f13}, author = {El Hafidy, A. and Rachad, T. and Idri, A. and Zellou, A.} } @conference {Abnane20211, title = {Heterogeneous ensemble imputation for software development effort estimation}, booktitle = {PROMISE 2021 - Proceedings of the 17th International Conference on Predictive Models and Data Analytics in Software Engineering, co-located with ESEC/FSE 2021}, year = {2021}, note = {cited By 1}, pages = {1-10}, abstract = {Choosing the appropriate Missing Data (MD) imputation technique for a given Software development effort estimation (SDEE) technique is not a trivial task. In fact, the impact of the MD imputation on the estimation output depends on the dataset and the SDEE technique used and there is no best imputation technique in all contexts. Thus, an attractive solution is to use more than one single imputation technique and combine their results for a final imputation outcome. This concept is called ensemble imputation and can help to significantly improve the estimation accuracy. This paper develops and evaluates a heterogeneous ensemble imputation whose members were the four single imputation techniques: K-Nearest Neighbors (KNN), Expectation Maximization (EM), Support Vector Regression (SVR), and Decision Trees (DT). The impact of the ensemble imputation was evaluated and compared with those of the four single imputation techniques on the accuracy measured in terms of the standardized accuracy criterion of four SDEE techniques: Case Based Reasoning (CBR), Multi-Layers Perceptron (MLP), Support Vector Regression (SVR) and Reduced Error Pruning Tree (REPTree). The Wilcoxon statistical test was also performed in order to assess whether the results are significant. All the empirical evaluations were carried out over the six datasets, namely, ISBSG, China, COCOMO81, Desharnais, Kemerer, and Miyazaki. Results show that the use of heterogeneous ensemble-based imputation instead single imputation significantly improved the accuracy of the four SDEE techniques. Indeed, the ensemble imputation technique was ranked either first or second in all contexts. {\textcopyright} 2021 ACM.}, keywords = {Case based reasoning, Casebased reasonings (CBR), Decision trees, Empirical evaluations, Expectation Maximization, Forestry, Heterogeneous ensembles, Imputation techniques, K nearest neighbor (KNN), Maximum principle, Multilayer neural networks, Nearest neighbor search, Predictive analytics, Software design, Software development effort, Support vector regression, Support vector regression (SVR)}, doi = {10.1145/3475960.3475984}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85113586361\&doi=10.1145\%2f3475960.3475984\&partnerID=40\&md5=7bd6bbfb801a84cd7694e3713d2d0081}, author = {Abnane, I. and Idri, A. and Hosni, M. and Abran, A.} } @conference {ElOuassif2021352, title = {Homogeneous ensemble based support vector machine in breast cancer diagnosis}, booktitle = {HEALTHINF 2021 - 14th International Conference on Health Informatics; Part of the 14th International Joint Conference on Biomedical Engineering Systems and Technologies, BIOSTEC 2021}, year = {2021}, note = {cited By 5}, pages = {352-360}, abstract = {Breast Cancer (BC) is one of the most common forms of cancer and one of the leading causes of mortality among women. Hence, detecting and accurately diagnosing BC at an early stage remain a major factor for women{\textquoteright}s long-term survival. To this aim, numerous single techniques have been proposed and evaluated for BC classification. However, none of them proved to be suitable in all situations. Currently, ensemble methods have been widely investigated to help diagnosis BC and consists on generating one classification model by combining more than one single technique by means of a combination rule. This paper evaluates homogeneous ensembles whose members are four variants of the Support Vector Machine (SVM) classifier. The four SVM variants used four different kernels: Linear Kernel, Normalized Polynomial Kernel, Radial Basis Function Kernel, and Pearson VII function based Universal Kernel. A Multilayer Perceptron (MLP) classifier is used for combining the outputs of the base classifiers to produce a final decision. Four well-known available BC datasets are used from online repositories. The findings of this study suggest that: (1) ensembles provided a very promising performance compared to its base, and (2) there is no SVM ensemble with a combination of kernels that have better performance in all datasets. Copyright {\textcopyright} 2021 by SCITEPRESS {\textendash} Science and Technology Publications, Lda. All rights reserved}, keywords = {Base classifiers, Biomedical engineering, Breast cancer diagnosis, Classification models, Combination rules, Computer aided diagnosis, Diseases, Medical informatics, Multilayer neural networks, Multilayer Perceptron (MLP) classifier, Online repositories, Polynomial kernels, Radial basis function kernels, Support vector machines}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85103824221\&partnerID=40\&md5=2340dec93a42872a9ece0b6f6b9fccbc}, author = {El Ouassif, B. and Idri, A. and Hosni, M.} } @article {ElOuassif2021263, title = {Investigating Accuracy and Diversity in Heterogeneous Ensembles for Breast Cancer Classification}, journal = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)}, volume = {12950 LNCS}, year = {2021}, note = {cited By 2}, pages = {263-281}, abstract = {Breast Cancer (BC) is one of the most common forms of cancer among women. Detecting and accurately diagnosing breast cancer at an early phase increase the chances of women{\textquoteright}s survival. For this purpose, various single classification techniques have been investigated to diagnosis BC. Nevertheless, none of them proved to be accurate in all circumstances. Recently, a promising approach called ensemble classifiers have been widely used to assist physicians accurately diagnose BC. Ensemble classifiers consist on combining a set of single classifiers by means of an aggregation layer. The literature in general shows that ensemble techniques outperformed single ones when ensemble members are accurate (i.e. have the lowest percentage error) and diverse (i.e. the single classifiers make uncorrelated errors on new~instances). Hence, selecting ensemble members is often a crucial task since it can lead to the opposite: single techniques outperformed their ensemble. This paper evaluates and compares ensemble members{\textquoteright} selection based on accuracy and diversity with ensemble members{\textquoteright} selection based on accuracy only. A comparison with ensembles without member selection was also performed. Ensemble performance was assessed in terms of accuracy, F1-score. Q statistics diversity measure was used to calculate the classifiers diversity. The experiments were carried out on three well-known BC datasets available from online repositories. Seven single classifiers were used in our experiments. Skott Knott test and Borda Count voting system were used to assess the significance of the performance differences and rank ensembles according to theirs performances. The findings of this study suggest that: (1) Investigating both accuracy and diversity to select ensemble members often led to better performance, and (2) In general, selecting ensemble members using accuracy and/or diversity led to better ensemble performance than constructing ensembles without members{\textquoteright} selection. {\textcopyright} 2021, Springer Nature Switzerland AG.}, keywords = {Breast Cancer, Combining classifiers, Computer aided diagnosis, Diseases, Diversity measure, Ensemble members, Ensemble-classifier, Heterogeneous ensembles, Member selection, Performance, Selection based, Voting, Voting machines}, doi = {10.1007/978-3-030-86960-1_19}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85125278232\&doi=10.1007\%2f978-3-030-86960-1_19\&partnerID=40\&md5=58285ac3fe1b29564700bef136a94771}, author = {El Ouassif, B. and Idri, A. and Hosni, M.} } @article {Kharbouch2021, title = {Mobile Technology for Improved Contraceptive Care in Morocco}, journal = {Journal of Medical Systems}, volume = {45}, number = {2}, year = {2021}, note = {cited By 0}, abstract = {The fulfillment of unmet needs for contraception can help women reach their reproductive goals. Therefore, there is a growing concern worldwide about contraception and women{\textquoteright}s knowledge of making an advised choice about it. In this aspect, an outgrown number of apps are now available providing information concerning contraception whether it concerns natural contraception or modern contraception. However, vast amounts of these apps contain inaccurate sexual health facts and non-evidence-based information concerning contraception. On these bases, and in respect to: (1) the needs of women to effectively prevent unintended pregnancies while conducting a stress-free healthy lifestyle. (2) the World Health Organization (WHO) Medical Eligibility Criteria (MEC) for contraception{\textquoteright}s recommendations, and (3) the results/recommendations of a field study conducted in the reproductive health center {\textquoteleft}Les Orangers{\textquoteright} in Rabat to collect the app{\textquoteright}s requirements, we developed an evidence-based patient-centered contraceptive app referred to as {\textquoteleft}MyContraception{\textquoteright}. Thereafter, we conducted a set of functional tests to ensure that the MyContraception solution is performing as expected and is conform to the software functional requirements previously set before moving to non-functional requirements evaluation. Since customer{\textquoteright}s feedback is valuable to non-functional testing, we choose to evaluate potential users{\textquoteright} feedback. Moreover, giving that mobile app testing is a complex process involving different skill sets, we elaborated a rigorous experimental design to conduct an empirical evaluation of the MyContraception solution, which will exhaustively assess the overall quality of this solution and examine its effects on improving the quality of patient-centered contraception care. {\textcopyright} 2021, Springer Science+Business Media, LLC, part of Springer Nature.}, keywords = {adult, Article, awareness, contraception, contraceptive agent, Contraceptive Agents, contraceptive behavior, eligibility criteria, evidence based practice center, experimental design, female, female fertility, field study, health care quality, healthy lifestyle, human, Humans, injectable contraceptive agent, long-acting reversible contraception, luteinizing hormone, menstrual cycle, Methodology, mobile application, Mobile applications, Morocco, oral contraceptive agent, patient care, pill, pregnancy, prescription, Privacy, questionnaire, reliability, reproductive health, risk factor, sexual health, technology, unplanned pregnancy, women{\textquoteright}s health, World Health Organization}, doi = {10.1007/s10916-020-01684-6}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85099086862\&doi=10.1007\%2fs10916-020-01684-6\&partnerID=40\&md5=91a540c873cdd78725ec3f6987f5b90f}, author = {Kharbouch, M. and Idri, A. and Rachad, T. and Alami, H. and Redman, L. and Stelate, Y.} } @conference {Boujida2021102, title = {Neural networks based software development effort estimation: A systematic mapping study}, booktitle = {Proceedings of the 16th International Conference on Software Technologies, ICSOFT 2021}, year = {2021}, note = {cited By 0}, pages = {102-110}, abstract = {Developing an efficient model that accurately predicts the development effort of a software project is an important task in software project management. Artificial neural networks (ANNs) are promising for building predictive models since their ability to learn from previous data, adapt and produce more accurate results. In this paper, we conducted a systematic mapping study of papers dealing with the estimation of software development effort based on artificial neural networks. In total, 80 relevant studies were identified between 1993 and 2020 and classified with respect to five criteria: publication source, research approach, contribution type, techniques used in combination with ANN models and type of the neural network used. The results showed that, most ANN-based software development effort estimation (SDEE) studies applied the history-based evaluation (HE) and solution proposal (SP) approaches. Besides, the feedforward neural network was the most frequently used ANN type among SDEE researchers. To improve the performance of ANN models, most papers employed optimization methods such as Genetic Algorithms (GA) and Particle Swarm Optimization (PSO) in combination with ANN models. Copyright {\textcopyright} 2021 by SCITEPRESS {\textendash} Science and Technology Publications, Lda. All rights reserved}, keywords = {Ann models, Feedforward neural networks, Genetic algorithms, Mapping, Optimization method, Particle swarm optimization (PSO), Predictive analytics, Predictive models, Project management, Research approach, Software design, Software development effort, Software project, Software project management, Systematic mapping studies}, doi = {10.5220/0010603701020110}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85111770639\&doi=10.5220\%2f0010603701020110\&partnerID=40\&md5=c4592c4b704daf3823eb5e9e3e1e5693}, author = {Boujida, F.E. and Amazal, F.A. and Idri, A.} } @article {ElFhel2021324, title = {A Requirements Catalog of Mobile Geographic Information System for Data Collection}, journal = {Advances in Intelligent Systems and Computing}, volume = {1366 AISC}, year = {2021}, note = {cited By 2}, pages = {324-336}, abstract = {Mobile Geographic Information System (mGIS) for data collection are designed to capture, analyze and manage geographical data. The aim of this paper is to define a requirements catalog for mGIS for data collection based on the main software engineering standards, GIS standards and literature. The catalog contains functional requirements in addition to requirements about usability, internationalization (i18n), performance efficiency, reliability and sustainability. This catalog can be useful for stakeholders and developers to evaluate their apps and/or identify potential requirements for new mGIS apps for data collection. {\textcopyright} 2021, The Author(s), under exclusive license to Springer Nature Switzerland AG.}, keywords = {Data acquisition, Data collection, Functional requirement, Geographic information systems, Geographical data, Information management, Information systems, Information use, Mobile geographic information systems, Performance efficiency, Software engineering, Software engineering standards}, doi = {10.1007/978-3-030-72651-5_32}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85107324425\&doi=10.1007\%2f978-3-030-72651-5_32\&partnerID=40\&md5=16bcf6a546df5b7bf9d750897b7ad643}, author = {El Fhel, B. and Sardi, L. and Idri, A.} } @article {Zerouaoui2021, title = {Reviewing Machine Learning and Image Processing Based Decision-Making Systems for Breast Cancer Imaging}, journal = {Journal of Medical Systems}, volume = {45}, number = {1}, year = {2021}, note = {cited By 19}, abstract = {Breast cancer (BC) is the leading cause of death among women worldwide. It affects in general women older than 40~years old. Medical images analysis is one of the most promising research areas since it provides facilities for diagnosis and decision-making of several diseases such as BC. This paper conducts a Structured Literature Review (SLR) of the use of Machine Learning (ML) and Image Processing (IP) techniques to deal with BC imaging. A set of 530 papers published between 2000 and August 2019 were selected and analyzed according to ten criteria: year and publication channel, empirical type, research type, medical task, machine learning techniques, datasets used, validation methods, performance measures and image processing techniques which include image pre-processing, segmentation, feature extraction and feature selection. Results showed that diagnosis was the most used medical task and that Deep Learning techniques (DL) were largely used to perform classification. Furthermore, we found out that classification was the most ML objective investigated followed by prediction and clustering. Most of the selected studies used Mammograms as imaging modalities rather than Ultrasound or Magnetic Resonance Imaging with the use of public or private datasets with MIAS as the most frequently investigated public dataset. As for image processing techniques, the majority of the selected studies pre-process their input images by reducing the noise and normalizing the colors, and some of them use segmentation to extract the region of interest with the thresholding method. For feature extraction, we note that researchers extracted the relevant features using classical feature extraction techniques (e.g. Texture features, Shape features, etc.) or DL techniques (e. g. VGG16, VGG19, ResNet, etc.), and finally few papers used feature selection techniques in particular the filter methods. {\textcopyright} 2021, The Author(s), under exclusive licence to Springer Science+Business Media, LLC part of Springer Nature.}, keywords = {Article, breast, Breast Cancer, cancer diagnosis, classifier, Computer-Assisted, Convolutional neural network, Decision making, Deep learning, deep neural network, diagnostic accuracy, diagnostic imaging, echography, Feature extraction, feature selection, female, human, Humans, image processing, image segmentation, Machine learning, Magnetic Resonance Imaging, mammography, multilayer perceptron, nuclear magnetic resonance imaging}, doi = {10.1007/s10916-020-01689-1}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85100125842\&doi=10.1007\%2fs10916-020-01689-1\&partnerID=40\&md5=3074b3d443373f75f5a3c1c6134512ee}, author = {Zerouaoui, H. and Idri, A.} } @article {Hosni20212827, title = {A systematic mapping study for ensemble classification methods in cardiovascular disease}, journal = {Artificial Intelligence Review}, volume = {54}, number = {4}, year = {2021}, note = {cited By 5}, pages = {2827-2861}, abstract = {Ensemble methods overcome the limitations of single machine learning techniques by combining different techniques, and are employed in the quest to achieve a high level of accuracy. This approach has been investigated in various fields, one of them being that of bioinformatics. One of the most frequent applications of ensemble techniques involves research into cardiovascular diseases, which are considered the leading cause of death worldwide. The purpose of this research work is to identify the papers that investigate ensemble classification techniques applied to cardiology diseases, and to analyse them according to nine aspects: their publication venues, the medical tasks tackled, the empirical and research types adopted, the types of ensembles proposed, the single techniques used to construct the ensembles, the validation frameworks adopted to evaluate the proposed ensembles, the tools used to build the ensembles, and the optimization methods employed for the single techniques. This paper reports the carrying out of a systematic mapping study. An extensive automatic search in four digital libraries: IEEE Xplore, ACM Digital Library, PubMed, and Scopus, followed by a study selection process, resulted in the identification of 351 papers that were used to address our mapping questions. This study found that the papers selected had been published in a large number of different resources. The medical task addressed most frequently by the selected studies was diagnosis. In addition, the experiment-based empirical type and evaluation-based research type were the most dominant approaches adopted by the selected studies. Homogeneous ensembles were the ensemble type that was developed most often in literature, while decision trees, artificial neural networks and Bayesian classifiers were the single techniques used most frequently to develop ensemble classification methods. The weighted majority and majority voting rules were adopted to obtain the final decision of the ensembles developed. With regard to evaluation frameworks, the datasets obtained from the UCI and PhysioBank repositories were those used most often to evaluate the ensemble methods, while the k-fold cross-validation method was the most frequently-employed validation technique. Several tools with which to build ensemble classifiers were identified, and the type of software adopted with the greatest frequency was open source. Finally, only a few researchers took into account the optimization of the parameter settings of either single or meta ensemble classifiers. This mapping study attempts to provide a greater insight into the application of ensemble classification methods in cardiovascular diseases. The majority of the selected papers reported positive feedback as regards the ability of ensemble methods to perform better than single methods. Further analysis is required to aggregate the evidence reported in literature. {\textcopyright} 2020, Springer Nature B.V.}, keywords = {Bayesian networks, Cardio-vascular disease, Cardiology, Decision trees, Diagnosis, Digital libraries, Diseases, Ensemble classification, Ensemble classifiers, Evaluation framework, K fold cross validations, Learning systems, Majority voting rules, Mapping, Open source software, Open systems, Optimization method, Systematic mapping studies}, doi = {10.1007/s10462-020-09914-6}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85091735819\&doi=10.1007\%2fs10462-020-09914-6\&partnerID=40\&md5=69ea4b02de420c3ec6a85e1f3c7dddaf}, author = {Hosni, M. and Carrillo de Gea, J.M. and Idri, A. and El Bajta, M. and Fern{\'a}ndez Alem{\'a}n, J.L. and Garc{\'\i}a-Mateos, G. and Abnane, I.} } @article {Hosni2021, title = {On the value of filter feature selection techniques in homogeneous ensembles effort estimation}, journal = {Journal of Software: Evolution and Process}, volume = {33}, number = {6}, year = {2021}, note = {cited By 1}, abstract = {Software development effort estimation (SDEE) remains as the principal activity in software project management planning. Over the past four decades, several methods have been proposed to estimate the effort required to develop a software system, including more recently machine learning (ML) techniques. Because ML performance accuracy depends on the features that feed the ML technique, selecting the appropriate features in the preprocessing data step is important. This paper investigates three filter feature selection techniques to check the predictive capability of four single ML techniques: K-nearest neighbor, support vector regression, multilayer perceptron, and decision trees and their homogeneous ensembles over six well-known datasets. Furthermore, the single and ensembles techniques were optimized using the grid search optimization method. The results suggest that the three filter feature selection techniques investigated improve the reasonability and the accuracy performance of the four single techniques. Moreover, the homogeneous ensembles are statistically more accurate than the single techniques. Finally, adopting a random process (i.e., random subspace method) to select the inputs feature for ML technique is not always effective to generate an accurate homogeneous ensemble. {\textcopyright} 2021 John Wiley \& Sons, Ltd.}, keywords = {Decision trees, Effort Estimation, Feature extraction, K-nearest neighbors, Multilayer neural networks, Nearest neighbor search, Object oriented programming, Predictive capabilities, Project management, Random processes, Random subspace method, Selection techniques, Software design, Software development effort, Software project management, Software systems, Support vector regression}, doi = {10.1002/smr.2343}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85103402056\&doi=10.1002\%2fsmr.2343\&partnerID=40\&md5=0f4c04b547f3d628d6db8d65b74912e5}, author = {Hosni, M. and Idri, A. and Abran, A.} } @article {Hakkoum202015, title = {Artificial Neural Networks Interpretation Using LIME for Breast Cancer Diagnosis}, journal = {Advances in Intelligent Systems and Computing}, volume = {1161 AISC}, year = {2020}, note = {cited By 6}, pages = {15-24}, abstract = {Breast Cancer (BC) is the most common type of cancer among women. Thankfully early detection and treatment improvements helped decrease its number of deaths. Data Mining techniques (DM), which discover hidden and potentially useful patterns from data, particularly for breast cancer diagnosis, are witnessing a new era, where the main objective is no longer replacing humans or just assisting them in their tasks but enhancing and augmenting their capabilities and this is where interpretability comes into play. This paper aims to investigate the Local Interpretable Model-agnostic Explanations (LIME) technique to interpret a Multilayer perceptron (MLP) trained on the Wisconsin Original Data-set. The results show that LIME explanations are a sort of real-time interpretation that helps understanding how the constructed neural network {\textquotedblleft}thinks{\textquotedblright} and thus can increase trust and help oncologists, as the domain experts, learn new patterns. {\textcopyright} 2020, The Editor(s) (if applicable) and The Author(s), under exclusive license to Springer Nature Switzerland AG.}, keywords = {Breast Cancer, Breast cancer diagnosis, Data mining, Diseases, Domain experts, Information systems, Information use, Interpretability, Lime, Multi layer perceptron, Neural networks, Real time, Useful patterns, WISCONSIN}, doi = {10.1007/978-3-030-45697-9_2}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85085506923\&doi=10.1007\%2f978-3-030-45697-9_2\&partnerID=40\&md5=cf809f0c6b9c95cd37d48038f1dfa79a}, author = {Hakkoum, H. and Idri, A. and Abnane, I.} } @article {Idri20201239, title = {Assessing the impact of parameters tuning in ensemble based breast Cancer classification}, journal = {Health and Technology}, volume = {10}, number = {5}, year = {2020}, note = {cited By 12}, pages = {1239-1255}, abstract = {Breast cancer is one of the major causes of death among women. Different decision support systems were proposed to assist oncologists to accurately diagnose their patients. These decision support systems mainly used classification techniques to categorize the diagnosis into Malign or Benign tumors. Given that no consensus has been reached on the classifier that can perform best in all circumstances, ensemble-based classification, which classifies patients by combining more than one single classification technique, has recently been investigated. In this paper, heterogeneous ensembles based on three well-known machine learning techniques (support vector machines, multilayer perceptron, and decision trees) were developed and evaluated by investigating the impact of parameter values of the ensemble members on classification performance. In particular, we investigate three parameters tuning techniques: Grid Search (GS), Particle Swarm Optimization (PSO) and the default parameters of the Weka Tool to evaluate whether setting ensemble parameters permits more accurate classification in breast cancer over four datasets obtained from the Machine Learning repository. The heterogeneous ensembles of this study were built using the majority voting technique as a combination rule. The overall results obtained suggest that: (1) Using GS or PSO techniques for single techniques provide more accurate classification; (2) In general, ensembles generate more accurate classification than their single techniques regardless of the optimization techniques used. (3) Heterogeneous ensembles based on optimized single classifiers generate better results than the Uniform Configuration of Weka (UC-WEKA) ensembles, and (4) PSO and GS slightly have the same impact on the performances of ensembles. {\textcopyright} 2020, IUPESM and Springer-Verlag GmbH Germany, part of Springer Nature.}, keywords = {accuracy, Article, Breast Cancer, cancer classification, classifier, decision tree, experimental design, grid search, human, multilayer perceptron, particle swarm optimization, recall, support vector machine}, doi = {10.1007/s12553-020-00453-2}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85087361107\&doi=10.1007\%2fs12553-020-00453-2\&partnerID=40\&md5=7398903f3007d71e535b12c2ef9a90a6}, author = {Idri, A. and Bouchra, E.O. and Hosni, M. and Abnane, I.} } @article {Chlioui202061, title = {Comparing Statistical and Machine Learning Imputation Techniques in Breast Cancer Classification}, journal = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)}, volume = {12252 LNCS}, year = {2020}, note = {cited By 1}, pages = {61-76}, abstract = {Missing data imputation is an important task when dealing with crucial data that cannot be discarded such as medical data. This study evaluates and compares the impacts of two statistical and two machine learning imputation techniques when classifying breast cancer patients, using several evaluation metrics. Mean, Expectation-Maximization (EM), Support Vector Regression (SVR) and K-Nearest Neighbor (KNN) were applied to impute 18\% of missing data missed completely at random in the two Wisconsin datasets. Thereafter, we empirically evaluated these four imputation techniques when using five classifiers: decision tree (C4.5), Case Based Reasoning (CBR), Random Forest (RF), Support Vector Machine (SVM) and Multi-Layer Perceptron (MLP). In total, 1380 experiments were conducted and the findings confirmed that classification using imputation based machine learning outperformed classification using statistical imputation. Moreover, our experiment showed that SVR was the best imputation method for breast cancer classification. {\textcopyright} 2020, Springer Nature Switzerland AG.}, keywords = {Breast cancer classifications, Case based reasoning, Casebased reasonings (CBR), Decision trees, Diseases, Expectation Maximization, Imputation techniques, K nearest neighbor (KNN), Learning systems, Maximum principle, Missing data imputations, Multi layer perceptron, Multilayer neural networks, Nearest neighbor search, Support vector machines, Support vector regression, Support vector regression (SVR)}, doi = {10.1007/978-3-030-58811-3_5}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85092259110\&doi=10.1007\%2f978-3-030-58811-3_5\&partnerID=40\&md5=fd5730e8014d40306df07238106d32ff}, author = {Chlioui, I. and Abnane, I. and Idri, A.} } @article {Bachiri202036, title = {A Complete Prenatal Solution for a Reproductive Health Unit in Morocco}, journal = {Advances in Intelligent Systems and Computing}, volume = {1161 AISC}, year = {2020}, note = {cited By 0}, pages = {36-43}, abstract = {A prenatal mobile Personal Health Records (mPHR), along with an Electronic Health Records (EHR) are, respectively, exploited in order to permit both the pregnant women and gynecologists or obstetricians monitor the pregnancy progress in the best conditions. For this intent, a complete solution consisting of a prenatal mPHR and an EHR were developed for the maternity {\textquotedblleft}Les Orangers{\textquotedblright} of the Avicenne University Hospital in Rabat. The complete solution provides the main functionalities of a prenatal service. Thereafter, the solution will be validated by conducting an experiment for quality and potential assessment. Hence, a recruitment process has been determined to identify the eligibility criteria to enroll participants (pregnant women and gynecologists), in addition to planning the course of the experiment. {\textcopyright} 2020, The Editor(s) (if applicable) and The Author(s), under exclusive license to Springer Nature Switzerland AG.}, keywords = {Complete solutions, electronic health record, Eligibility criterion, Health, Information systems, Information use, Personal health record, Pregnant woman, Recruitment process, reproductive health}, doi = {10.1007/978-3-030-45697-9_4}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85085481969\&doi=10.1007\%2f978-3-030-45697-9_4\&partnerID=40\&md5=e3d4adffd953495adbfb14b0d483c067}, author = {Bachiri, M. and Idri, A. and Rachad, T. and Alami, H. and Redman, L.M.} } @article {Benhar2020, title = {Data preprocessing for heart disease classification: A systematic literature review.}, journal = {Computer Methods and Programs in Biomedicine}, volume = {195}, year = {2020}, note = {cited By 25}, abstract = {Context: Early detection of heart disease is an important challenge since 17.3 million people yearly lose their lives due to heart diseases. Besides, any error in diagnosis of cardiac disease can be dangerous and risks an individual{\textquoteright}s life. Accurate diagnosis is therefore critical in cardiology. Data Mining (DM) classification techniques have been used to diagnosis heart diseases but still limited by some challenges of data quality such as inconsistencies, noise, missing data, outliers, high dimensionality and imbalanced data. Data preprocessing (DP) techniques were therefore used to prepare data with the goal of improving the performance of heart disease DM based prediction systems. Objective: The purpose of this study is to review and summarize the current evidence on the use of preprocessing techniques in heart disease classification as regards: (1) the DP tasks and techniques most frequently used, (2) the impact of DP tasks and techniques on the performance of classification in cardiology, (3) the overall performance of classifiers when using DP techniques, and (4) comparisons of different combinations classifier-preprocessing in terms of accuracy rate. Method: A systematic literature review is carried out, by identifying and analyzing empirical studies on the application of data preprocessing in heart disease classification published in the period between January 2000 and June 2019. A total of 49 studies were therefore selected and analyzed according to the aforementioned criteria. Results: The review results show that data reduction is the most used preprocessing task in cardiology, followed by data cleaning. In general, preprocessing either maintained or improved the performance of heart disease classifiers. Some combinations such as (ANN + PCA), (ANN + CHI) and (SVM + PCA) are promising terms of accuracy. However the deployment of these models in real-world diagnosis decision support systems is subject to several risks and limitations due to the lack of interpretation. {\textcopyright} 2020 Elsevier B.V.}, keywords = {Cardiology, Classification (of information), Classification technique, classifier, clinical practice, clinical research, Computer aided diagnosis, data classification, Data mining, Data preprocessing, data processing, Decision support systems, Deep learning, Diagnosis decision, diagnostic accuracy, disease classification, Diseases, empiricism, evidence based practice, feature selection, Heart, heart disease, Heart Diseases, High dimensionality, human, Humans, intermethod comparison, Machine learning, Performance of classifier, prediction, Prediction systems, Preprocessing techniques, publication, Review, Support vector machines, Systematic literature review, Systematic Review, task performance}, doi = {10.1016/j.cmpb.2020.105635}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85087500300\&doi=10.1016\%2fj.cmpb.2020.105635\&partnerID=40\&md5=cae53ce36903d5d8b817ec96deb39b45}, author = {Benhar, H. and Idri, A. and L Fern{\'a}ndez-Alem{\'a}n, J.} } @article {Chlioui2020547, title = {Data preprocessing in knowledge discovery in breast cancer: systematic mapping study}, journal = {Computer Methods in Biomechanics and Biomedical Engineering: Imaging and Visualization}, volume = {8}, number = {5}, year = {2020}, note = {cited By 5}, pages = {547-561}, abstract = {Data Mining (DM) is a set of techniques that allow to analyse data from different perspectives and summarising it into useful information. Data mining has been increasingly used in medicine, especially in oncology. Data preprocessing is the most important step of knowledge extraction process and allows to improve the performance of the DM models. Breast cancer (BC) becomes the most common cancer among females worldwide and the leading cause of women{\textquoteright}s death. This paper aims to perform a systematic mapping study to analyse and synthesise studies on the application of preprocessing techniques for a DM task in breast cancer.Therefore, 66 relevant articles published between 2000 and October 2018 were selected and analysed according to five criteria: year/channel of publication, research type, medical task, empirical type and preprocessing task. The results show that Conferences and journals are the most targeted publication sources, researchers were more interested in applying preprocessing techniques for the diagnosis of BC, historical-based evaluation was the most used empirical type in the evaluation of preprocessing techniques in BC, and data reduction was the most investigated task of preprocessing in BC. However, A low number of papers discussed treatment which encourages researchers to devote more efforts to this task. {\textcopyright} 2020 Informa UK Limited, trading as Taylor \& Francis Group.}, keywords = {algorithm, Article, Breast Cancer, cancer classification, cancer prognosis, clinical assessment, clinical outcome, Data mining, Data mining models, Data mining tasks, Data preprocessing, Diagnosis, diagnostic accuracy, Diseases, Extraction process, health promotion, human, image analysis, knowledge, knowledge discovery, Knowledge extraction, Machine learning, Mapping, Medical informatics, nerve cell network, neural crest cell, Performance, Pre-processing techniques, processing, screening test, Systematic mapping studies, Systematic Review, validity}, doi = {10.1080/21681163.2020.1730974}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85080112312\&doi=10.1080\%2f21681163.2020.1730974\&partnerID=40\&md5=befb1bc3f31f676a8e95bbc5bff5ab6d}, author = {Chlioui, I. and Idri, A. and Abnane, I.} } @article {ElIdrissi2020379, title = {Deep Learning for Blood Glucose Prediction: CNN vs LSTM}, journal = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)}, volume = {12250 LNCS}, year = {2020}, note = {cited By 6}, pages = {379-393}, abstract = {To manage their disease, diabetic patients need to control the blood glucose level (BGL) by monitoring it and predicting its future values. This allows to avoid high or low BGL by taking recommended actions in advance. In this study, we propose a Convolutional Neural Network (CNN) for BGL prediction. This CNN is compared with Long-short-term memory (LSTM) model for both one-step and multi-steps prediction. The objectives of this work are: 1) Determining the best configuration of the proposed CNN, 2) Determining the best strategy of multi-steps forecasting (MSF) using the obtained CNN for a prediction horizon of 30~min, and 3) Comparing the CNN and LSTM models for one-step and multi-steps prediction. Toward the first objective, we conducted series of experiments through parameter selection. Then five MSF strategies are developed for the CNN to reach the second objective. Finally, for the third objective, comparisons between CNN and LSTM models are conducted and assessed by the Wilcoxon statistical test. All the experiments were conducted using 10 patients{\textquoteright} datasets and the performance is evaluated through the Root Mean Square Error. The results show that the proposed CNN outperformed significantly the LSTM model for both one-step and multi-steps prediction and no MSF strategy outperforms the others for CNN. {\textcopyright} 2020, Springer Nature Switzerland AG.}, keywords = {Blood, Blood glucose, Blood glucose level, Convolutional neural networks, Deep learning, Diabetic patient, Disease control, Forecasting, Glucose, Long short-term memory, Mean square error, Multi-step, Parameter selection, Prediction horizon, Root mean square errors}, doi = {10.1007/978-3-030-58802-1_28}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85093089262\&doi=10.1007\%2f978-3-030-58802-1_28\&partnerID=40\&md5=f16d576a801dcdbbac35f8593e4a50ea}, author = {El Idrissi, T. and Idri, A.} } @article {Abnane2020, title = {Fuzzy case-based-reasoning-based imputation for incomplete data in software engineering repositories}, journal = {Journal of Software: Evolution and Process}, volume = {32}, number = {9}, year = {2020}, note = {cited By 6}, abstract = {Missing data is a serious issue in software engineering because it can lead to information loss and bias in data analysis. Several imputation techniques have been proposed to deal with both numerical and categorical missing data. However, most of those techniques used is simple reuse techniques originally designed for numerical data, which is a problem when the missing data are related to categorical attributes. This paper aims (a) to propose a new fuzzy case-based reasoning (CBR) imputation technique designed for both numerical and categorical data and (b) to evaluate and compare the performance of the proposed technique with the k-nearest neighbor (KNN) imputation technique in terms of error and accuracy under different missing data percentages and missingness mechanisms in four software engineering data sets. The results suggest that the proposed fuzzy CBR technique outperformed KNN in terms of imputation error and accuracy regardless of the missing data percentage, missingness mechanism, and data set used. Moreover, we found that the missingness mechanism has an important impact on the performance of both techniques. The results are encouraging in the sense that using an imputation technique designed for both categorical and numerical data is better than reusing methods originally designed for numerical data. {\textcopyright} 2020 John Wiley \& Sons, Ltd.}, keywords = {accuracy, Case based reasoning, Categorical data, Clustering algorithms, Empirical Software Engineering, Fuzzy analogy, imputation, Missing data, Nearest neighbor search, Numerical methods, Software engineering}, doi = {10.1002/smr.2260}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85081904769\&doi=10.1002\%2fsmr.2260\&partnerID=40\&md5=762cb4270c6a55d209feaa8eb6df5c5f}, author = {Abnane, I. and Idri, A. and Abran, A.} } @article {Sardi2020931, title = {Gamified e-Health Solution to Promote Postnatal Care in Morocco}, journal = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)}, volume = {12252 LNCS}, year = {2020}, note = {cited By 0}, pages = {931-946}, abstract = {The postnatal period is a critical phase in both the lives of the mothers and the newborns. Due to all the inherent changes that occur during this period, quality care is crucial during this period to enhance the wellbeing of the mothers and the newborns. In Morocco, the neglection of postnatal care services are often associated to poor communication, financial difficulties and cultural barriers. Mobile technology constitutes therefore a promising approach to bridge this gap and promote postnatal care. In order to improve the effectiveness of mobile technology, gamification has become a powerful feature to boost motivation and induce fun and interactivity into the mobile solutions{\textquoteright} tasks. Based on a previous review on mobile applications for postnatal care available in app repositories, a set of requirements have been identified to build a comprehensive mobile solution that cater the needs of both the mothers and the newborns. These requirements have, then, been enriched with real needs elicited at maternity Les orangers that belongs to the University Hospital Avicenne of Rabat. Along with the functional and non-functional requirements, gamification aspects have been also analyzed. After the analysis and design phases, a pilot version of the solution called {\textquoteleft}Mamma\&Baby{\textquoteright} has been implemented using android framework. {\textquoteleft}Mamma\&Baby{\textquoteright} is a mobile solution dedicated to assist new mothers during their postnatal period. As future work, it is expected to fully integrate the gamification elements into the solution and conduct an empirical evaluation of the overall quality and the potential of the solution with real puerperal women. {\textcopyright} 2020, Springer Nature Switzerland AG.}, keywords = {E-health solutions, Empirical evaluations, Financial difficulties, Gamification, Mobile applications, Mobile solutions, Mobile Technology, Non-functional requirements, Overall quality, Quality control, Telecommunication equipment}, doi = {10.1007/978-3-030-58811-3_66}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85092252943\&doi=10.1007\%2f978-3-030-58811-3_66\&partnerID=40\&md5=2a3490959b88f98611929a0ad0efd78b}, author = {Sardi, L. and Idri, A. and Rachad, T. and Redman, L. and Alami, H.} } @conference {ElBajta2020239, title = {Identifying software cost attributes of software project management in global software development: An integrative framework}, booktitle = {ACM International Conference Proceeding Series}, year = {2020}, note = {cited By 0}, pages = {239-244}, abstract = {The management of global and distributed software projects is a very difficult task further complicated by the emergence of new challenges inherent in stakeholder dispersion. Software cost estimation plays a central role to face challenges in the context of Global Software Development (GSD). The objective of this study is to identify software cost attributes related to GSD context to present an integrative framework encompassing these attributes. Thirty cost attributes were identified using a Systematic Literature Review (SLR) and later compiled into a framework inspired by the Software Engineering Institute (SEI) taxonomy. {\textcopyright} 2020 ACM.}, keywords = {Cost engineering, Cost estimating, Distributed software, global software development, Integrative framework, Intelligent systems, Project management, Software cost, Software cost estimations, Software design, Software engineering institutes, Software project management, Systematic literature review (SLR)}, doi = {10.1145/3419604.3419780}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85096416219\&doi=10.1145\%2f3419604.3419780\&partnerID=40\&md5=54b147fb2d591d8cef6a98304201c1e5}, author = {El Bajta, M. and Idri, A.} } @conference {Benhar2020391, title = {Impact of threshold values for filter-based univariate feature selection in heart disease classification}, booktitle = {HEALTHINF 2020 - 13th International Conference on Health Informatics, Proceedings; Part of 13th International Joint Conference on Biomedical Engineering Systems and Technologies, BIOSTEC 2020}, year = {2020}, note = {cited By 2}, pages = {391-398}, abstract = {In the last decade, feature selection (FS), was one of the most investigated preprocessing tasks for heart disease prediction. Determining the optimal features which contribute more towards the diagnosis of heart disease can reduce the number of clinical tests needed to be taken by a patient, decrease the model cost, reduce the storage requirements and improve the comprehensibility of the induced model. In this study a comparison of three filter feature ranking methods was carried out. Feature ranking methods need to set a threshold (i.e. the percentage of the number of relevant features to be selected) in order to select the final subset of features. Thus, the aim of this study is to investigate if there is a threshold value which is an optimal choice for three different feature ranking methods and four classifiers used for heart disease classification in four heart disease datasets. The used feature ranking methods and selection thresholds resulted in optimal classification performance for one or more classifiers over small and large heart disease datasets. The size of the dataset takes an important role in the choice of the selection threshold. {\textcopyright} 2020 by SCITEPRESS - Science and Technology Publications, Lda. All rights reserved.}, keywords = {Biomedical engineering, Cardiology, Classification (of information), Clinical tests, Diagnosis, Diseases, Feature extraction, Feature ranking, Heart, heart disease, Large dataset, Medical informatics, Optimal choice, Optimal classification, Relevant features, Storage requirements, Threshold-value}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85083712586\&partnerID=40\&md5=4656d8b952f7c60387d4495c737c5a6d}, author = {Benhar, H. and Idri, A. and Hosni, M.} } @article {Rachad2020, title = {Intelligent Mobile Applications: A Systematic Mapping Study}, journal = {Mobile Information Systems}, volume = {2020}, year = {2020}, note = {cited By 3}, abstract = {Smart mobiles as the most affordable and practical ubiquitous devices participate heavily in the enhancement of our daily life by the use of many convenient applications. However, the significant number of mobile users in addition to their heterogeneity (different profiles and contexts) obligates developers to enhance the quality of their apps by making them more intelligent and more flexible. This is realized mainly by analyzing mobile user{\textquoteright}s data. Machine learning (ML) technology provides the methodology and techniques needed to extract knowledge from data to facilitate decision-making. Therefore, both developers and researchers affirm the benefits of combining ML techniques and mobile technology in several application fields as e-health, e-learning, e-commerce, and e-coaching. Thus, the purpose of this paper is to have an overview of the use of ML techniques in the design and development of mobile applications. Therefore, we performed a systematic mapping study of papers published on this subject in the period between 1 January 2007 and 31 December 2019. A total number of 71 papers were selected, studied, and analyzed according to the following criteria, year, sources and channel of publication, research type, and methods, kind of collected data, and finally adopted ML models, tasks, and techniques. {\textcopyright} 2020 Taoufik Rachad and Ali Idri.}, keywords = {Application fields, Decision making, Design and Development, Mapping, Methodology and techniques, Mobile applications, Mobile commerce, Mobile computing, Mobile Technology, Mobile users, Systematic mapping studies, Ubiquitous devices}, doi = {10.1155/2020/6715363}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85083095554\&doi=10.1155\%2f2020\%2f6715363\&partnerID=40\&md5=d474ab3cece2179d34f54fa7087a4cfb}, author = {Rachad, T. and Idri, A.} } @article {Zerouaoui202044, title = {Machine Learning and Image Processing for Breast Cancer: A Systematic Map}, journal = {Advances in Intelligent Systems and Computing}, volume = {1161 AISC}, year = {2020}, note = {cited By 3}, pages = {44-53}, abstract = {Machine Learning (ML) combined with Image Processing (IP) gives a powerful tool to help physician, doctors and radiologist to make more accurate decisions. Breast cancer (BC) is a largely common disease among women worldwide; it is one of the medical sub-field that are experiencing an emergence of the use of ML and IP techniques. This paper explores the use of ML and IP techniques for BC in the form of a systematic mapping study. 530 papers published between 2000 and August 2019 were selected and analyzed according to 6 criteria: year and publication channel, empirical type, research type, medical task, machine learning objectives and datasets used. The results show that classification was the most used ML objective. As for the datasets most of the articles used private datasets belonging to hospitals, although papers using public data choose MIAS (Mammographic Image Analysis Society) which make it as the most used public dataset. {\textcopyright} 2020, The Editor(s) (if applicable) and The Author(s), under exclusive license to Springer Nature Switzerland AG.}, keywords = {Breast Cancer, Common disease, Diseases, Information systems, Information use, Machine learning, Mammographic image analysis, mammography, Medical imaging, Public data, Public dataset, Sub fields, Systematic mapping studies, Systematic maps}, doi = {10.1007/978-3-030-45697-9_5}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85085473168\&doi=10.1007\%2f978-3-030-45697-9_5\&partnerID=40\&md5=e980b38be1c8a8499c579b990b79b909}, author = {Zerouaoui, H. and Idri, A. and El Asnaoui, K.} } @article {Hosni20202177, title = {A mapping study of ensemble classification methods in lung cancer decision support systems}, journal = {Medical and Biological Engineering and Computing}, volume = {58}, number = {10}, year = {2020}, note = {cited By 8}, pages = {2177-2193}, abstract = {Achieving a high level of classification accuracy in medical datasets is a capital need for researchers to provide effective decision systems to assist doctors in work. In many domains of artificial intelligence, ensemble classification methods are able to improve the performance of single classifiers. This paper reports the state of the art of ensemble classification methods in lung cancer detection. We have performed a systematic mapping study to identify the most interesting papers concerning this topic. A total of 65 papers published between 2000 and 2018 were selected after an automatic search in four digital libraries and a careful selection process. As a result, it was observed that diagnosis was the task most commonly studied; homogeneous ensembles and decision trees were the most frequently adopted for constructing ensembles; and the majority voting rule was the predominant combination rule. Few studies considered the parameter tuning of the techniques used. These findings open several perspectives for researchers to enhance lung cancer research by addressing the identified gaps, such as investigating different classification methods, proposing other heterogeneous ensemble methods, and using new combination rules. [Figure not available: see fulltext.] {\textcopyright} 2020, International Federation for Medical and Biological Engineering.}, keywords = {Artificial intelligence, Automatic searches, Biological organs, cancer classification, Classification (of information), Classification accuracy, Classification methods, classifier, decision support system, Decision support systems, decision tree, Decision trees, Diagnosis, Digital libraries, Diseases, Ensemble classification, Heterogeneous ensembles, human, lung cancer, Lung cancer detections, Majority voting rules, Mapping, priority journal, Review, Systematic mapping studies}, doi = {10.1007/s11517-020-02223-8}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85087501825\&doi=10.1007\%2fs11517-020-02223-8\&partnerID=40\&md5=bdb2bd3f923da33ea3b564a0bd6e739e}, author = {Hosni, M. and Garc{\'\i}a-Mateos, G. and Carrillo-De-Gea, J.M. and Idri, A. and Fernandez-Aleman, J.L.} } @article {Sardi2020, title = {Mobile health applications for postnatal care: Review and analysis of functionalities and technical features}, journal = {Computer Methods and Programs in Biomedicine}, volume = {184}, year = {2020}, note = {cited By 19}, abstract = {Background: Providing a continuum of care from antenatal, childbirth and postnatal period results in reduced maternal and neonatal morbidity and mortality. Timely, high quality postnatal care is crucial for maximizing maternal and newborn health. In this vein, the use of postnatal mobile applications constitutes a promising strategy. Methods: A Systematic Literature Review (SLR) protocol was adopted to perform the selection, data extraction and functional evaluation of the available postnatal apps on iOS and Android platforms. The analysis of the functionalities and technical features of the apps selected was performed according to a 37-items assessment questionnaire developed on the basis of the scientific literature of postnatal care and a preliminary analysis of available postnatal apps Results: A total of 48 postnatal apps were retrieved from the app repositories of the iOS and Android platforms. The results of the functional content analysis show that the postnatal apps selected relatively achieved low scores owing to the complexity and the ramification of the postnatal care. Conclusions: The present study helps in identifying areas related to the postnatal care that require further endeavors to be properly addressed. It also provides directions for developers to leverage the advancement and innovation on mobile technology to build complete and well-suited postnatal apps {\textcopyright} 2019}, keywords = {Android (operating system), Application programs, Article, childbirth, content analysis, data extraction, evaluation study, female, Functional evaluation, Functionality, human, Humans, Infant, iOS (operating system), mHealth, mobile application, Mobile applications, Mobile health application, newborn, newborn morbidity, perinatal period, Postnatal care, Postpartum Period, pregnancy, Preliminary analysis, procedures, puerperium, questionnaire, Scientific literature, Systematic literature review (SLR), Systematic Review, telemedicine}, doi = {10.1016/j.cmpb.2019.105114}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85073719026\&doi=10.1016\%2fj.cmpb.2019.105114\&partnerID=40\&md5=df137ca63d506f88c53e16c63f3e0c65}, author = {Sardi, L. and Idri, A. and Readman, L.M. and Alami, H. and Bezad, R. and Fernandez-Aleman, J.L.} } @article {Kharbouch202086, title = {MyContraception: An Evidence-Based Contraception mPHR for Better Contraceptive Fit}, journal = {Advances in Intelligent Systems and Computing}, volume = {1161 AISC}, year = {2020}, note = {cited By 1}, pages = {86-94}, abstract = {The fulfillment of unmet needs for contraception can help women reach their reproductive goals. It was proven to have a significant impact on reducing the rates of unintended pregnancies, and thereby cut the number of morbidity and mortality resulting from these pregnancies, and improving the lives of women and children in general. Therefore, there is a growing concern worldwide about contraception and women{\textquoteright}s knowledge of making an advised-choice about it. In this aspect, an outgrown number of apps are now available providing clinical resources, digital guides, or educational information concerning contraception whether it concerns natural contraception or modern contraception. However, vast amounts of these apps contain inaccurate sexual health facts and non-evidence based information concerning contraception. On these bases, and in respect to the needs of women to effectively prevent unintended pregnancies while conducting a stress-free healthy lifestyle, the World Health Organization (WHO) Medical Eligibility Criteria (MEC) for contraception{\textquoteright}s recommendations, and the results and recommendations of a field study conducted in the reproductive health center Les Oranges in Rabat to collect the app{\textquoteright}s requirements, we developed an Android app named {\textquoteleft}MyContraception{\textquoteright}. Our solution is an evidence-based patient-centered contraceptive app that has been developed in an attempt to facilitate: (1) Seeking evidence-based information along with recommendations concerning the best contraceptive fit (according to one{\textquoteright}s medical characteristics, preferences and priorities) helping users make informed decisions about their contraceptive choices. (2) Monitoring one{\textquoteright}s own menstrual cycle, fertility window, contraceptive methods usage, and the correlation between these different elements and everyday symptoms in one app. (3) Keeping record of one{\textquoteright}s family medical history, medical appointments, analyses, diagnoses, procedures and notes within the same app. In future work, conducting an empirical evaluation of MyContraception solution is intended, to exhaustively examine the effects of this solution in improving the quality of patient-centered contraception care. {\textcopyright} 2020, The Editor(s) (if applicable) and The Author(s), under exclusive license to Springer Nature Switzerland AG.}, keywords = {Clinical resources, Diagnosis, Eligibility criterion, Empirical evaluations, Healthy lifestyles, Information concerning, Information systems, Information use, Informed decision, Obstetrics, reproductive health, World Health Organization}, doi = {10.1007/978-3-030-45697-9_9}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85085491736\&doi=10.1007\%2f978-3-030-45697-9_9\&partnerID=40\&md5=18f9987c1855619561fe9cc080957ade}, author = {Kharbouch, M. and Idri, A. and Rachad, T. and Alami, H. and Redman, L. and Stelate, Y.} } @conference {Elmidaoui2020, title = {Predicting software maintainability using ensemble techniques and stacked generalization}, booktitle = {CEUR Workshop Proceedings}, volume = {2725}, year = {2020}, note = {cited By 1}, abstract = {The prediction of software maintainability has emerged as an important research topic to address industry expectations for reducing costs, in particular maintenance costs. In the last decades, many studies have used single techniques to predict software maintainability but there is no agreement as to which technique can achieve the best prediction. Ensemble techniques, which combine two or more techniques, have been investigated in recent years. This study investigates ensemble techniques (homogeneous as well as heterogeneous) for predicting maintainability in terms of line code changes. To this end, well-known homogeneous ensembles such as Bagging, Boosting, Extra Trees, Gradient Boosting, and Random Forest are investigated first. Then the stacked generalization method is used to construct heterogeneous ensembles by combining the most accurate ones per dataset. The empirical results suggest that Gradient Boosting and Extra Trees are the best ensembles for all datasets, since they ranked first and second, respectively. Moreover, the findings of the evaluation of heterogeneous ensembles constructed using stacked generalization showed that they gave better prediction accuracy compared to all homogeneous ensembles. Copyright {\textcopyright} 2020 for this paper by its authors.}, keywords = {Decision trees, Ensemble techniques, Forecasting, Forestry, Gradient boosting, Heterogeneous ensembles, Maintainability, Maintenance cost, Prediction accuracy, Research topics, Software engineering, Software maintainability, Stacked generalization}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85098182236\&partnerID=40\&md5=32bab56e3a64ff6efa7e8717d9ee67c4}, author = {Elmidaoui, S. and Cheikhi, L. and Idri, A. and Abran, A.} } @conference {Miloudi2020251, title = {A Review of Open Source Software Maintenance Effort Estimation}, booktitle = {ACM International Conference Proceeding Series}, year = {2020}, note = {cited By 0}, pages = {251-256}, abstract = {Open Source Software (OSS) is gaining interests of software engineering community as well as practitioners from industry with the growth of the internet. Studies in estimating maintenance effort (MEE) of such software product have been published in the literature in order to provide better estimation. The aim of this study is to provide a review of studies related to maintenance effort estimation for open source software (OSSMEE). To this end, a set of 60 primary empirical studies are selected from six electronic databases and a discussion is provided according to eight research questions (RQs) related to: publication year, publication source, datasets (OSS projects), metrics (independent variables), techniques, maintenance effort (dependent variable), validation methods, and accuracy criteria used in the empirical validation. This study has found that popular OSS projects have been used, Linear Regression, Na{\"\i}ve Bayes and k Nearest Neighbors were frequently used, and bug resolution was the most used regarding the estimation of maintenance effort for the future releases. A set of gaps are identified and recommendations for researchers are also provided. {\textcopyright} 2020 ACM.}, keywords = {Computer software maintenance, Dependent variables, Electronic database, Empirical validation, Engineering community, Independent variables, Intelligent systems, K-nearest neighbors, Maintenance efforts, Nearest neighbor search, Open source software, Open systems, Research questions, Sodium compounds}, doi = {10.1145/3419604.3419809}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85096413214\&doi=10.1145\%2f3419604.3419809\&partnerID=40\&md5=7ad5b9eacb605d7875e2123cfec6ddeb}, author = {Miloudi, C. and Cheikhi, L. and Idri, A.} } @article {Ezzat202065, title = {Reviewing Data Analytics Techniques in Breast Cancer Treatment}, journal = {Advances in Intelligent Systems and Computing}, volume = {1161 AISC}, year = {2020}, note = {cited By 1}, pages = {65-75}, abstract = {Data mining (DM) or Data Analytics is the process of extracting new valuable information from large quantities of data; it is reshaping many industries including the medical one. Its contribution to medicine is very important particularly in oncology. Breast cancer is the most common type of cancer in the world and it occurs almost entirely in women, but men can get attacked too. Researchers over the world are trying every day to improve, prevention, detection and treatment of Breast Cancer (BC) in order to provide more effective treatments to patients. In this vein, the present paper carried out a systematic map of the use of data mining technique in breast cancer treatment. The aim was to analyse and synthetize studies on DM applied to breast cancer treatment. In this regard, 44 relevant articles published between 1991 and 2019 were selected and classified according to three criteria: year and channel of publication, research type through DM contribution in BC treatment and DM techniques. Of course, there are not many articles for treatment, because the researchers have been interested in the diagnosis with the different classification techniques, and it may be because of the importance of early diagnosis to avoid danger. Results show that papers were published in different channels (especially journals or conferences), researchers follow the DM pipeline to deal with a BC treatment, the challenge is to reduce the number of non-classified patients, and affect them in the most appropriate group to follow the suitable treatment, and classification was the most used task of DM applied to BC treatment. {\textcopyright} 2020, The Editor(s) (if applicable) and The Author(s), under exclusive license to Springer Nature Switzerland AG.}, keywords = {Breast Cancer, Breast cancer treatment, Classification technique, Computer aided diagnosis, Data Analytics, Data mining, Diseases, early diagnosis, Information systems, Information use, Medical computing, Patient treatment, Systematic maps}, doi = {10.1007/978-3-030-45697-9_7}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85085470786\&doi=10.1007\%2f978-3-030-45697-9_7\&partnerID=40\&md5=cc91fa4ca2e05ed50ac292abaf968494}, author = {EZZAT, M. and Idri, A.} } @article {Zakrani202072, title = {Software Effort Estimation Using an Optimal Trees Ensemble: An Empirical Comparative Study}, journal = {Smart Innovation, Systems and Technologies}, volume = {146}, year = {2020}, note = {cited By 0}, pages = {72-82}, abstract = {Since information systems have become the heartbeat of many organizations, the investment in software is growing rapidly and consuming then a significant portion of the company budget. In this context, both the software engineering practitioners and researchers are more interested than ever about accurately estimating the effort and the quality of software product under development. Accurate estimates are desirable but no technique has demonstrated to be successful at effectively and reliably estimating software development effort. In this paper, we propose the use of an optimal trees ensemble (OTE) to predict the software development effort. The ensemble employed is built by combining only the top ranked trees, one by one, from a set of random forests. Each included tree must decrease the unexplained variance of the ensemble for software development effort estimation (SDEE). The effectiveness of the OTE model is compared with other techniques such as regression trees, random forest, RBF neural networks, support vector regression and multiple linear regression in terms of the mean magnitude relative error (MMRE), MdMRE and Pred(l) obtained on five well known datasets namely: ISBSG R8, COCOMO, Tukutuku, Desharnais and Albrecht. According to the results obtained from the experiments, it is shown that the proposed ensemble of optimal trees outperformed almost all the other techniques. Also, OTE model outperformed statistically the other techniques at least in one dataset. {\textcopyright} 2020, Springer Nature Switzerland AG.}, keywords = {Accuracy evaluation, Budget control, Decision trees, Forestry, Linear regression, Multiple linear regressions, Optimal trees ensemble, Radial basis function networks, Random forests, RBF Neural Network, Regression trees, Software design, Software development effort, Support vector regression (SVR)}, doi = {10.1007/978-3-030-21005-2_7}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85069482179\&doi=10.1007\%2f978-3-030-21005-2_7\&partnerID=40\&md5=8d9d107d9b52c20f6cd93bb86477a6a0}, author = {Zakrani, A. and Idri, A. and Hain, M.} } @article {Kharbouch2020894, title = {Software Requirement Catalog on Acceptability, Usability, Internationalization and Sustainability for Contraception mPHRs}, journal = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)}, volume = {12252 LNCS}, year = {2020}, note = {cited By 0}, pages = {894-905}, abstract = {Contraception Mobile Personal Health Records (mPHRs) are efficient mobile health applications (apps) to increase awareness about fertility and contraception and to allow women to access, track, manage, and share their health data with healthcare providers. This paper aims to develop a requirements catalog, according to standards, guidelines, and relevant literature to e-health technology and psychology. The requirements covered by this catalog are Acceptability, Usability, Sustainability, and Internationalization (i18n). This catalog can be very useful for developing, evaluating, and auditing contraceptive apps, as well as helping stakeholders and developers identify potential requirements for their mPHRs to improve them. {\textcopyright} 2020, Springer Nature Switzerland AG.}, keywords = {Health, Health care providers, Health data, Mobile health application, Personal health record, Software requirements, Sustainable development}, doi = {10.1007/978-3-030-58811-3_63}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85092222585\&doi=10.1007\%2f978-3-030-58811-3_63\&partnerID=40\&md5=119f051c4582d8079abdb813399d89d1}, author = {Kharbouch, M. and Idri, A. and Redman, L. and Alami, H. and Fernandez-Aleman, J.L. and Toval, A.} } @conference {ElIdrissi2020337, title = {Strategies of multi-step-ahead forecasting for blood glucose level using LSTM neural networks: A comparative study}, booktitle = {HEALTHINF 2020 - 13th International Conference on Health Informatics, Proceedings; Part of 13th International Joint Conference on Biomedical Engineering Systems and Technologies, BIOSTEC 2020}, year = {2020}, note = {cited By 3}, pages = {337-344}, abstract = {Predicting the blood glucose level (BGL) is crucial for self-management of Diabetes. In general, a BGL prediction is done based on the previous measurements of BGL, which can be taken either (manually) by using sticks or (automatically) by using continuous glucose monitoring (CGM) devices. To allow the diabetic patients to take appropriate actions, the BGL predictions should be done ahead of time; thus a multi-step ahead prediction is suitable. Therefore, many Multi-Step-ahead Forecasting (MSF) strategies have been developed and evaluated, and can be categorized in five types: Recursive, Direct, MIMO (for Multiple Input Multiple Output), DirMO (combining Direct and MIMO) and DirRec (combining Direct and Recursive). However, none of them is known to be the best strategy in all contexts. The present study aims at: 1) reviewing the MSF strategies, and 2) determining the best strategy to fit with a LSTM Neural Network model. Hence, we evaluated and compared in terms of two performance criteria: Root-Mean-Square Error (RMSE) and Mean Absolute Error (MAE), the five MSF strategies using a LSTM Neural Network with an horizon of 30 minutes. The results show that there is no strategy that significantly outperformed others when using the Wilcoxon statistical test. However, when using the Sum Ranking Differences method, MIMO is the best strategy for both RMSE and MAE criteria. {\textcopyright} 2020 by SCITEPRESS - Science and Technology Publications, Lda. All rights reserved.}, keywords = {Biomedical engineering, Blood, Blood glucose level, Comparative studies, Continuous glucosemonitoring (CGM), Forecasting, Glucose, Long short-term memory, Mean absolute error, Mean square error, Medical informatics, MIMO systems, Multi-step-ahead predictions, Neural network model, Performance criterion, Root mean square errors}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85083722214\&partnerID=40\&md5=bd9e45a8538559492b02432e5567d10e}, author = {El Idrissi, T. and Idri, A. and Kadi, I. and Bakkoury, Z.} } @article {ElKoutbi2019, title = {Empirical evaluation of an entropy-based approach to estimation variation of software development effort}, journal = {Journal of Software: Evolution and Process}, volume = {31}, number = {3}, year = {2019}, note = {cited By 0}, doi = {10.1002/smr.2149}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85063377815\&doi=10.1002\%2fsmr.2149\&partnerID=40\&md5=f8f11d4ac305f27600a3f8dec5af170c}, author = {El Koutbi, S. and Idri, A. and Abran, A.} } @article {Hosni2019, title = {Evaluating filter fuzzy analogy homogenous ensembles for software development effort estimation}, journal = {Journal of Software: Evolution and Process}, volume = {31}, number = {2}, year = {2019}, note = {cited By 0}, doi = {10.1002/smr.2117}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85061585023\&doi=10.1002\%2fsmr.2117\&partnerID=40\&md5=72983b70d540ccb14da59885e96cc4ab}, author = {Hosni, M. and Idri, A. and Abran, A.} } @article {Nassif2019, title = {Software development effort estimation using regression fuzzy models}, journal = {Computational Intelligence and Neuroscience}, volume = {2019}, year = {2019}, note = {cited By 0}, doi = {10.1155/2019/8367214}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85062599935\&doi=10.1155\%2f2019\%2f8367214\&partnerID=40\&md5=efd8130615f3d9857ff368174c7a2db1}, author = {Nassif, A.B. and Azzeh, M. and Idri, A. and Abran, A.} } @article {Benhar2019, title = {A Systematic Mapping Study of Data Preparation in Heart Disease Knowledge Discovery}, journal = {Journal of Medical Systems}, volume = {43}, number = {1}, year = {2019}, note = {cited By 0}, doi = {10.1007/s10916-018-1134-z}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85058606352\&doi=10.1007\%2fs10916-018-1134-z\&partnerID=40\&md5=f92d6f6b431ad7798a78fb05c7a5624d}, author = {Benhar, H. and Idri, A. and Fernandez-Aleman, J.L.} } @article {Elmidaoui2019823, title = {Towards a Taxonomy of Software Maintainability Predictors}, journal = {Advances in Intelligent Systems and Computing}, volume = {930}, year = {2019}, note = {cited By 0}, pages = {823-832}, doi = {10.1007/978-3-030-16181-1_77}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85064883521\&doi=10.1007\%2f978-3-030-16181-1_77\&partnerID=40\&md5=0a2c124cf59cc02604b4d34b5ed98a5a}, author = {Elmidaoui, S. and Cheikhi, L. and Idri, A.} } @article {Elmidaoui201826, title = {Accuracy comparison of empirical studies on software product maintainability prediction}, journal = {Advances in Intelligent Systems and Computing}, volume = {746}, year = {2018}, pages = {26-35}, doi = {10.1007/978-3-319-77712-2_3}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85045339755\&doi=10.1007\%2f978-3-319-77712-2_3\&partnerID=40\&md5=7400e19626fc0aa4658b0ac497b89bda}, author = {Elmidaoui, S. and Cheikhi, L. and Idri, A.} } @article {ElIdrissi20181142, title = {Data mining techniques in diabetes self-management: A systematic map}, journal = {Advances in Intelligent Systems and Computing}, volume = {746}, year = {2018}, pages = {1142-1152}, doi = {10.1007/978-3-319-77712-2_109}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85045346296\&doi=10.1007\%2f978-3-319-77712-2_109\&partnerID=40\&md5=627778cf3f0df5fe749b6e457a0eef1f}, author = {El Idrissi, T. and Idri, A. and Bakkoury, Z.} } @article {Benhar20181208, title = {Data preprocessing for decision making in medical informatics: Potential and analysis}, journal = {Advances in Intelligent Systems and Computing}, volume = {746}, year = {2018}, pages = {1208-1218}, doi = {10.1007/978-3-319-77712-2_116}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85045323949\&doi=10.1007\%2f978-3-319-77712-2_116\&partnerID=40\&md5=86d2827399aedd6683b71d8893a2d49c}, author = {Benhar, H. and Idri, A. and Fernandez-Aleman, J.L.} } @article {Idri2018, title = {Evaluating Pred(p) and standardized accuracy criteria in software development effort estimation}, journal = {Journal of Software: Evolution and Process}, volume = {30}, number = {4}, year = {2018}, doi = {10.1002/smr.1925}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85045686530\&doi=10.1002\%2fsmr.1925\&partnerID=40\&md5=3a3d72a1a9b32d7a365db2168b5e3a45}, author = {Idri, A. and Abnane, I. and Abran, A.} } @article {Bachiri2018, title = {Evaluating the Privacy Policies of Mobile Personal Health Records for Pregnancy Monitoring}, journal = {Journal of Medical Systems}, volume = {42}, number = {8}, year = {2018}, doi = {10.1007/s10916-018-1002-x}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85049316852\&doi=10.1007\%2fs10916-018-1002-x\&partnerID=40\&md5=39f7d1b8a34c6a17937d6935246cf95b}, author = {Bachiri, M. and Idri, A. and Fernandez-Aleman, J.L. and Toval, A.} } @conference {Mimouni20181, title = {The impact of query precision on returned data precision criterion}, booktitle = {2017 International Conference on Electrical and Computing Technologies and Applications, ICECTA 2017}, volume = {2018-January}, year = {2018}, pages = {1-6}, doi = {10.1109/ICECTA.2017.8252061}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85045990243\&doi=10.1109\%2fICECTA.2017.8252061\&partnerID=40\&md5=553344ed3926f1d6f3b01b121907b491}, author = {Mimouni, L. and Zellou, A. and Idri, A.} } @conference {Abnane20181015, title = {Improved analogy-based effort estimation with incomplete mixed data}, booktitle = {Proceedings of the 2018 Federated Conference on Computer Science and Information Systems, FedCSIS 2018}, year = {2018}, pages = {1015-1024}, doi = {10.15439/2018F95}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85057257109\&doi=10.15439\%2f2018F95\&partnerID=40\&md5=04ce7873db15a5eed3fd50a320b73e4f}, author = {Abnane, I. and Idri, A.} } @conference {Mimouni2018319, title = {MDQM: Mediation data quality model aligned data quality model for mediation systems}, booktitle = {IC3K 2018 - Proceedings of the 10th International Joint Conference on Knowledge Discovery, Knowledge Engineering and Knowledge Management}, volume = {2}, year = {2018}, pages = {319-326}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85059040350\&partnerID=40\&md5=c41fc36d4914e0611e070779b607b7e1}, author = {Mimouni, L. and Zellou, A. and Idri, A.} } @article {Cheikhi201836, title = {Measurement based E-government portals{\textquoteright} benchmarking framework: Architectural and procedural views}, journal = {Advances in Intelligent Systems and Computing}, volume = {746}, year = {2018}, pages = {36-45}, doi = {10.1007/978-3-319-77712-2_4}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85045330135\&doi=10.1007\%2f978-3-319-77712-2_4\&partnerID=40\&md5=941334f001bfb6ab10e687f93e3f62a7}, author = {Cheikhi, L. and Fath-Allah, A. and Idri, A. and Al-Qutaish, R.E.} } @conference {Saissi2018111, title = {A new clustering approach to identify the values to query the deep web access forms}, booktitle = {2018 4th International Conference on Computer and Technology Applications, ICCTA 2018}, year = {2018}, pages = {111-116}, doi = {10.1109/CATA.2018.8398666}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85050215563\&doi=10.1109\%2fCATA.2018.8398666\&partnerID=40\&md5=9301955ef076b676312ef8b2d6fa454a}, author = {Saissi, Y. and Zellou, A. and Idri, A.} } @conference {Selma2018212, title = {A New Mapping Approach between XML Schemas in a P2P Environment}, booktitle = {Proceedings - International Conference on Developments in eSystems Engineering, DeSE}, year = {2018}, pages = {212-217}, doi = {10.1109/DeSE.2017.16}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85056190704\&doi=10.1109\%2fDeSE.2017.16\&partnerID=40\&md5=9968d6ce7b4ae7d6fb9f91cb21c0281a}, author = {Selma, E.Y.E.I. and Zellou, A. and Idri, A.} } @conference {Idri2018607, title = {Quality evaluation of gamified blood donation apps using ISO/IEC 25010 standard}, booktitle = {HEALTHINF 2018 - 11th International Conference on Health Informatics, Proceedings; Part of 11th International Joint Conference on Biomedical Engineering Systems and Technologies, BIOSTEC 2018}, volume = {5}, year = {2018}, pages = {607-614}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85051728605\&partnerID=40\&md5=e296eb3debca9cb6efd2a8d6fbe18690}, author = {Idri, A. and Sardi, L. and Fernandez-Aleman, J.L.} } @article {CruzZapata2018, title = {Reusable Software Usability Specifications for mHealth Applications}, journal = {Journal of Medical Systems}, volume = {42}, number = {3}, year = {2018}, note = {cited By 0}, abstract = {One of the key factors for the adoption of mobile technologies, and in particular of mobile health applications, is usability. A usable application will be easier to use and understand by users, and will improve user{\textquoteright}s interaction with it. This paper proposes a software requirements catalog for usable mobile health applications, which can be used for the development of new applications, or the evaluation of existing ones. The catalog is based on the main identified sources in literature on usability and mobile health applications. Our catalog was organized according to the ISO/IEC/IEEE 29148:2011 standard and follows the SIREN methodology to create reusable catalogs. The applicability of the catalog was verified by the creation of an audit method, which was used to perform the evaluation of a real app, S Health, application created by Samsung Electronics Co. The usability requirements catalog, along with the audit method, identified several usability flaws on the evaluated app, which scored 83\%. Some flaws were detected in the app related to the navigation pattern. Some more issues related to the startup experience, empty screens or writing style were also found. The way a user navigates through an application improves or deteriorates user{\textquoteright}s experience with the application. We proposed a reusable usability catalog and an audit method. This proposal was used to evaluate a mobile health application. An audit report was created with the usability issues identified on the evaluated application. {\textcopyright} 2018, Springer Science+Business Media, LLC, part of Springer Nature.}, doi = {10.1007/s10916-018-0902-0}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85040984885\&doi=10.1007\%2fs10916-018-0902-0\&partnerID=40\&md5=32ddf999ef891e8986a8a19c7271249a}, author = {Cruz Zapata, B. and Fernandez-Aleman, J.L. and Toval, A. and Idri, A.} } @article {Bajta2018690, title = {Software project management approaches for global software development: A systematic mapping study}, journal = {Tsinghua Science and Technology}, volume = {23}, number = {6}, year = {2018}, pages = {690-714}, doi = {10.26599/TST.2018.9010029}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85055661732\&doi=10.26599\%2fTST.2018.9010029\&partnerID=40\&md5=7601769b8094dafca96fea06d11fd3db}, author = {Bajta, M.E. and Idri, A. and Ros, J.N. and Fernandez-Aleman, J.L. and de Gea, J.M.C. and Garc{\'\i}a, F. and Toval, A.} } @article {Idri2018, title = {Support vector regression-based imputation in analogy-based software development effort estimation}, journal = {Journal of Software: Evolution and Process}, volume = {30}, number = {12}, year = {2018}, doi = {10.1002/smr.2114}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85058493764\&doi=10.1002\%2fsmr.2114\&partnerID=40\&md5=09059bd4e9087fda5b7225f1c8aeea9d}, author = {Idri, A. and Abnane, I. and Abran, A.} } @article {Chadli2018408, title = {A survey on the impact of risk factors and mitigation strategies in global software development}, journal = {Advances in Intelligent Systems and Computing}, volume = {746}, year = {2018}, pages = {408-417}, doi = {10.1007/978-3-319-77712-2_39}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85045296980\&doi=10.1007\%2f978-3-319-77712-2_39\&partnerID=40\&md5=9b40aafda48fa8eff4f79973caf81f38}, author = {Chadli, S.Y. and Idri, A.} } @article {Ouhbi2018, title = {Sustainability requirements for connected health applications}, journal = {Journal of Software: Evolution and Process}, volume = {30}, number = {7}, year = {2018}, doi = {10.1002/smr.1922}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85050354987\&doi=10.1002\%2fsmr.1922\&partnerID=40\&md5=70c90bb949fb18d52004aec275734df0}, author = {Ouhbi, S. and Fernandez-Aleman, J.L. and Toval, A. and Rivera~Pozo, J. and Idri, A.} } @conference {Idri2018, title = {A systematic map of data analytics in breast cancer}, booktitle = {ACM International Conference Proceeding Series}, year = {2018}, doi = {10.1145/3167918.3167930}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85044739616\&doi=10.1145\%2f3167918.3167930\&partnerID=40\&md5=4cfbf5c917bfa294988a996c72ff2234}, author = {Idri, A. and Chlioui, I. and El Ouassif, B.} } @article {Idri201869, title = {A systematic map of medical data preprocessing in knowledge discovery}, journal = {Computer Methods and Programs in Biomedicine}, volume = {162}, year = {2018}, pages = {69-85}, doi = {10.1016/j.cmpb.2018.05.007}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85046706780\&doi=10.1016\%2fj.cmpb.2018.05.007\&partnerID=40\&md5=30324a8f7b2702347d51c7c56b55b294}, author = {Idri, A. and Benhar, H. and Fernandez-Aleman, J.L. and Kadi, I.} } @article {Moumane201858, title = {A systematic map of mobile software usability evaluation}, journal = {Advances in Intelligent Systems and Computing}, volume = {746}, year = {2018}, pages = {58-67}, doi = {10.1007/978-3-319-77712-2_6}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85045321817\&doi=10.1007\%2f978-3-319-77712-2_6\&partnerID=40\&md5=c77c64a151faac6ec168b3914e6eb0dc}, author = {Moumane, K. and Idri, A.} } @article {Hosni20185977, title = {On the value of parameter tuning in heterogeneous ensembles effort estimation}, journal = {Soft Computing}, volume = {22}, number = {18}, year = {2018}, pages = {5977-6010}, doi = {10.1007/s00500-017-2945-4}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85035807309\&doi=10.1007\%2fs00500-017-2945-4\&partnerID=40\&md5=83d094f56b80a21295cf4230621f445d}, author = {Hosni, M. and Idri, A. and Abran, A. and Nassif, A.B.} } @conference {Fath-ALLAH2017326, title = {A Best Practice Based E-Government Portals{\textquoteright} Maturity Model-A Case Study}, booktitle = {IEEE CIT 2017 - 17th IEEE International Conference on Computer and Information Technology}, year = {2017}, note = {cited By 0}, pages = {326-331}, abstract = {An e-government portal{\textquoteright}s maturity model is a set of incremental stages that defines the maturity of e-government portals. In fact, these models can be used by e-government agencies to identify the maturity rank of their portal and at the same time provide recommendations and guidelines for agencies to improve their portals{\textquoteright} maturity. In previous research studies, we have built an e-government portals{\textquoteright} best practice model and a maturity model that is based on the former model. Moreover, we have evaluated the model by a set of experts in the e-government field. The model has proved its validity and reliability within the set of the participants. The aim of this paper is to perform a case study on this maturity model on a real portal. Our findings show that the portal being measured is in the presence stage of maturity. {\textcopyright} 2017 IEEE.}, doi = {10.1109/CIT.2017.23}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85032359695\&doi=10.1109\%2fCIT.2017.23\&partnerID=40\&md5=765abc46de7761960104adbb2b22b013}, author = {Fath-Allah, A. and Cheikhi, L. and Idri, A. and Al-Qutaish, R.} } @conference {Idri2017245, title = {A Data Mining-Based Approach for Cardiovascular Dysautonomias Diagnosis and Treatment}, booktitle = {IEEE CIT 2017 - 17th IEEE International Conference on Computer and Information Technology}, year = {2017}, note = {cited By 0}, pages = {245-252}, abstract = {Autonomic nervous system (ANS) is a control system that acts largely unconsciously and regulates bodily functions. An autonomic malfunction can lead to serious problems related to blood pressure, heart, swallowing, breathing and others. A set of dynamic tests are therefore adopted in ANS units to diagnose and treat patients with cardiovascular dysautonomias. These tests generate big amount of data which are very well suited to be processed using data mining techniques. The purpose of this study is to develop a cardiovascular dysautonomias prediction system to identify the appropriate diagnosis and treatment for patients with cardiovascular dysautonomias using a dataset extracted from the ANS unit of the university hospital Avicenne in Morocco. Classification techniques and association rules were used for the diagnosis and treatment stages respectively. In fact, K-nearest neighbors, C4.5 decision tree algorithm, Random forest, Na{\"\i}ve bayes and Support vector machine were applied to generate the diagnosis classification models and Apriori algorithm was used for generating the association rules. The results obtained for each classifier were analyzed and compared to identify the most efficient one. {\textcopyright} 2017 IEEE.}, doi = {10.1109/CIT.2017.28}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85032340028\&doi=10.1109\%2fCIT.2017.28\&partnerID=40\&md5=8c8a9ba277b01051d8429283af6bb318}, author = {Idri, A. and Kadi, I.} } @article {Ouhbi201749, title = {E-health internationalization requirements for audit purposes}, journal = {Computer Methods and Programs in Biomedicine}, volume = {144}, year = {2017}, note = {cited By 1}, pages = {49-60}, abstract = {Background and objective: In the 21st century, e-health is proving to be one of the strongest drivers for the global transformation of the health care industry. Health information is currently truly ubiquitous and widespread, but in order to guarantee that everyone can appropriately access and understand this information, regardless of their origin, it is essential to bridge the international gap. The diversity of health information seekers languages and cultures signifies that e-health applications must be adapted to satisfy their needs. Methods: In order to achieve this objective, current and future e-health programs should take into account the internationalization aspects. This paper presents an internationalization requirements specification in the form of a reusable requirements catalog, obtained from the principal related standards, and describes the key methodological elements needed to perform an e-health software audit by using the internationalization knowledge previously gathered. Results: S Health, a relevant, well-known Android application that has more than 150 million users in over 130 countries, was selected as a target for the e-health internationalization audit method and requirements specification presented above. This application example helped us to put into practice the proposal and show that the procedure is realistic and effective. Conclusions: The approach presented in this study is subject to continuous improvement through the incorporation of new knowledge originating from additional information sources, such as other standards or stakeholders. The application example is useful for early evaluation and serves to assess the applicability of the internationalization catalog and audit methodology, and to improve them. It would be advisable to develop of an automated tool with which to carry out the audit method. {\textcopyright} 2017 Elsevier B.V.}, doi = {10.1016/j.cmpb.2017.03.014}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85016069397\&doi=10.1016\%2fj.cmpb.2017.03.014\&partnerID=40\&md5=4cdb24852f0764dc64c9ed73f5614eda}, author = {Ouhbi, S. and Fernandez-Aleman, J.L. and Carrillo-De-Gea, J.M. and Toval, A. and Idri, A.} } @conference {Abnane20171302, title = {Empirical evaluation of fuzzy analogy for Software Development Effort Estimation}, booktitle = {Proceedings of the ACM Symposium on Applied Computing}, volume = {Part F128005}, year = {2017}, note = {cited By 0}, pages = {1302-1304}, abstract = {Software Development Effort Estimation (SDEE) plays a primary role in software project management. Among several techniques suggested for estimating software development effort, analogybased software effort estimation approaches stand out as promising techniques. In this paper, the performance of Fuzzy Analogy is compared with that of six other SDEE techniques (Linear Regression, Support Vector Regression, Multi-Layer Perceptron, M5P and Classical Analogy). The first step of the evaluation aimed to ensure that the SDEE techniques outperformed random guessing by using the Standardized Accuracy (SA). Then, we used a set of reliable performance measures and Borda count to rank them and identify which techniques are the most accurate. The results suggest that Fuzzy Analogy statistically outperformed the other SDEE techniques regardless of the dataset used. {\textcopyright} 2017 ACM.}, doi = {10.1145/3019612.3019905}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85020898807\&doi=10.1145\%2f3019612.3019905\&partnerID=40\&md5=df809b8bee59594505af4961af714d5e}, author = {Abnane, I. and Idri, A. and Abran, A.} } @conference {Koutbi2017195, title = {Entropy-based framework dealing with error in software development effort estimation}, booktitle = {ENASE 2017 - Proceedings of the 12th International Conference on Evaluation of Novel Approaches to Software Engineering}, year = {2017}, note = {cited By 0}, pages = {195-202}, abstract = {Software engineering community often investigates the error concerning software development effort estimation as a part, and sometimes, as an improvement of an effort estimation technique. The aim of this paper is to propose an approach dealing with both model and attributes measurement error sources whatever the effort estimation technique used. To do that, we explore the concepts of entropy and fuzzy clustering to propose a new framework to cope with both error sources. The proposed framework has been evaluated with the COCOMO{\textquoteright}81 dataset and the Fuzzy Analogy effort estimation technique. The results are promising since the actual confidence interval percentages are closer to those proposed by the framework. {\textcopyright} 2017 by SCITEPRESS - Science and Technology Publications, Lda. All Rights Reserved.}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85025448467\&partnerID=40\&md5=59f6479705cc067ea76a34a1d85422dd}, author = {Koutbi, S.E. and Idri, A.} } @conference {Abnane2017, title = {Evaluating Fuzzy Analogy on incomplete software projects data}, booktitle = {2016 IEEE Symposium Series on Computational Intelligence, SSCI 2016}, year = {2017}, note = {cited By 0}, abstract = {Missing Data (MD) is a widespread problem that can affect the ability to use data to construct effective software development effort prediction systems. This paper investigates the use of missing data (MD) techniques with Fuzzy Analogy. More specifically, this study analyze the predictive performance of this analogy-based technique when using toleration, deletion or k-nearest neighbors (KNN) imputation techniques using the Pred(0.25) accuracy criterion and thereafter compares the results with the findings when using the Standardized Accuracy (SA) measure. A total of 756 experiments were conducted involving seven data sets, three MD techniques (toleration, deletion and KNN imputation), three missingness mechanisms (MCAR: missing completely at random, MAR: missing at random, NIM: non-ignorable missing), and MD percentages from 10 percent to 90 percent. The results of accuracy measured in terms of Pred(0.25) confirm the findings of a study which used the SA measure. Moreover, we found that SA and Pred(0.25) measure different aspects of technique performance. Hence, SA is not sufficient to conclude about the technique accuracy and it should be used with other metrics, especially Pred(0.25). {\textcopyright} 2016 IEEE.}, doi = {10.1109/SSCI.2016.7849922}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85016044171\&doi=10.1109\%2fSSCI.2016.7849922\&partnerID=40\&md5=9ece9d331d7bf0bbef07ed39c97ca2d1}, author = {Abnane, I. and Idri, A.} } @conference {Idri2017114, title = {Fuzzy Analogy Based Effort Estimation: An Empirical Comparative Study}, booktitle = {IEEE CIT 2017 - 17th IEEE International Conference on Computer and Information Technology}, year = {2017}, note = {cited By 0}, pages = {114-121}, abstract = {Software Development Effort Estimation (SDEE) plays a primary role in software project management. Among several techniques suggested for estimating software development effort, analogy-based software effort estimation approaches stand out as promising techniques.In this paper, the performance of Fuzzy Analogy is compared with that of six other SDEE techniques (Linear Regression, Support Vector Regression, Multi-Layer Perceptron, M5P and Classical Analogy). The evaluation of the SDEE techniques was performed over seven datasets with two evaluation techniques (All-in and Jackknife). The first step of the evaluation aimed to ensure that the SDEE techniques outperformed random guessing by using the Standardized Accuracy (SA). Then, we used a set of reliable performance measures (Pred(0.25), MAE, MBRE, MIBRE and LSD) and Borda count to rank them and identify which techniques are the most accurate.The results suggest that when using All-in evaluation, Fuzzy Analogy statistically outperformed the other SDEE techniques regardless of the dataset used. However, when using Jackknife evaluation, the results obtained depended on the dataset and the SDEE technique used. The results suggest that Fuzzy Analogy is a promising technique for software development effort estimation. {\textcopyright} 2017 IEEE.}, doi = {10.1109/CIT.2017.29}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85032383384\&doi=10.1109\%2fCIT.2017.29\&partnerID=40\&md5=2d4f527b6750abd19f4a3787348a493a}, author = {Idri, A. and Abnane, I.} } @article {Sardi2017165, title = {Gamified mobile blood donation applications}, journal = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)}, volume = {10208 LNCS}, year = {2017}, note = {cited By 1}, pages = {165-176}, abstract = {Unpaid blood donation is a selfless act of citizenship and the usage of gamification elements in blood donation apps can enhance the donors{\textquoteright} experience, especially among youth. This paper analyses the functionalities and explores gamification elements of the existing blood donation apps in the mobile market. A search in Google Play, Apple Apps store, Blackberry App World, and Windows Mobile App store was performed to select 10 gamified BD apps with three duplicates out of 801 pinpointed. The results show that the majority of the blood donation apps selected do not support multiple languages and that the predominant authentication methods are traditional and social logins. Moreover, all the apps were intended for more than one purpose among helping users to find donors and blood centres, track their records and check their eligibility to donate. Most apps installed include notification features and built-in geolocation services to instantly inform the users of donation need in nearby locations. Badges and redeemable points were the most recurrent gamification elements in the blood donation apps selected. There is a need for better incentives in order to not only retain the potential donors but also to recruit non-willing ones. {\textcopyright} Springer International Publishing AG 2017.}, doi = {10.1007/978-3-319-56148-6_14}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85018706816\&doi=10.1007\%2f978-3-319-56148-6_14\&partnerID=40\&md5=0e71bd08afeb528b9c0f612b667abd3d}, author = {Sardi, L. and Idri, A. and Fernandez-Aleman, J.L.} } @conference {Hosni2017174, title = {Heterogeneous Ensembles for Software Development Effort Estimation}, booktitle = {Proceedings - 2016 3rd International Conference on Soft Computing and Machine Intelligence, ISCMI 2016}, year = {2017}, note = {cited By 0}, pages = {174-178}, abstract = {Software effort estimation influences almost all the process of software development such as: bidding, planning, and budgeting. Hence, delivering an accurate estimation in early stages of the software life cycle may be the key of success of any project. To this aim, many solo techniques have been proposed to predict the effort required to develop a software system. Nevertheless, none of them proved to be suitable in all circumstances. Recently, Ensemble Effort Estimation has been investigated to estimate software effort and consists on generating the software effort by combining more than one solo estimation technique by means of a combination rule. In this study, a heterogeneous EEE based on four machine learning techniques was investigated using three linear rules and two well-known datasets. The results of this study suggest that the proposed heterogeneous EEE yields a very promising performance and there is no best combiner rule that can be recommended. {\textcopyright} 2016 IEEE.}, doi = {10.1109/ISCMI.2016.15}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85034656098\&doi=10.1109\%2fISCMI.2016.15\&partnerID=40\&md5=86bd61c4459af4a3491046df1925ab2c}, author = {Hosni, M. and Idri, A. and Nassif, A.B. and Abran, A.} } @conference {Chadli201712, title = {Identifying and mitigating risks of software project management in global software development}, booktitle = {ACM International Conference Proceeding Series}, volume = {Part F131936}, year = {2017}, note = {cited By 0}, pages = {12-22}, abstract = {Managing global software projects is a difficult task further complicated by the emergence of new risks inherent to the dispersion of stakeholders. Project managers of Global Software Development (GSD) projects deal with challenges related to geographical, temporal and socio-cultural distance. The aim of this paper is to identify mitigation strategies intended to counter partially or fully the effects of risk factors related to the management of GSD projects that are available in literature and update the list of risk factors proposed in a previous research. This study proposes a framework for the Software Risk Management (SRM) of GSD projects designed to help practitioners identify risk factors and alleviate their effects through a list of recommended mitigation strategies. Using a systematic literature review (SLR), 39 risk factors and 58 mitigation strategies were identified and classified using a framework inspired from Leavitt{\textquoteright}s model of organizational change. Results show that the mitigation strategies identified in this SLR target 38 out of 39 risk factors, indicating a high academic interest in resolving the challenges of managing GSD projects. Results also reveal that the list of risk factors submitted in this paper and compiled using a different set of selected studies, concurs with the list introduced in a previous research. {\textcopyright} 2017 Association for Computing Machinery.}, doi = {10.1145/3143434.3143453}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85038388343\&doi=10.1145\%2f3143434.3143453\&partnerID=40\&md5=4d1ca1bb448c86a4078da3a7727dc70c}, author = {Chadli, S.Y. and Idri, A.} } @conference {Idri2017262, title = {ISO/IEC 25010 Based Evaluation of Free Mobile Personal Health Records for Pregnancy Monitoring}, booktitle = {Proceedings - International Computer Software and Applications Conference}, volume = {1}, year = {2017}, note = {cited By 0}, pages = {262-267}, abstract = {Background: Mobile Personal Health Records (mPHRs) are mobile applications that allow organizing and saving the patients{\textquoteright} health data in order to ease their management and access for both patients and doctors. Objective: This study aims to evaluate, based on ISO/IEC 25010 quality standard, the software quality of mPHRs for pregnancy monitoring. Method: An External quality evaluation was carried out on 17 pregnancy monitoring mPHRs for iOS and Android. The evaluation consisted of performing a set of tasks and filling in a quality assessment questionnaire that covers the four External quality characteristics: Functional suitability, Operability, Performance efficiency and Reliability. Moreover, 14 mPHRs for pregnancy monitoring were used to study the relationship between the External quality and the Quality in-use by means of four classification techniques: Iterative Dichotomiser 3 (ID3), C4.5, K-nearest neighbors (K-NN) and Na{\"\i}ve Bayes. Results: Functional suitability and Reliability are the most covered quality characteristics by the 17 apps compared to Operability and Performance efficiency. Furthermore, K-NN and C4.5 obtained the highest accuracy rates, as evaluating the accuracy rate of each classification technique can determine if a classifier can be designed to predict the Quality in-use from the External Quality for the apps used in this study. Conclusion: The need of studying the relationship between fulfilling the External quality and the Quality in-use for pregnancy monitoring mPHRs, is of great importance for the developers of these apps for future improvements. {\textcopyright} 2017 IEEE.}, doi = {10.1109/COMPSAC.2017.159}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85031943834\&doi=10.1109\%2fCOMPSAC.2017.159\&partnerID=40\&md5=33e58a2940bb96d4a2c3c7a272a519e5}, author = {Idri, A. and Bachiri, M. and Fernandez-Aleman, J.L. and Toval, A.} } @article {Kadi201712, title = {Knowledge discovery in cardiology: A systematic literature review}, journal = {International Journal of Medical Informatics}, volume = {97}, year = {2017}, note = {cited By 6}, pages = {12-32}, abstract = {Context Data mining (DM) provides the methodology and technology needed to transform huge amounts of data into useful information for decision making. It is a powerful process employed to extract knowledge and discover new patterns embedded in large data sets. Data mining has been increasingly used in medicine, particularly in cardiology. In fact, DM applications can greatly benefit all those involved in cardiology, such as patients, cardiologists and nurses. Objective The purpose of this paper is to review papers concerning the application of DM techniques in cardiology so as to summarize and analyze evidence regarding: (1) the DM techniques most frequently used in cardiology; (2) the performance of DM models in cardiology; (3) comparisons of the performance of different DM models in cardiology. Method We performed a systematic literature review of empirical studies on the application of DM techniques in cardiology published in the period between 1 January 2000 and 31 December 2015. Results A total of 149 articles published between 2000 and 2015 were selected, studied and analyzed according to the following criteria: DM techniques and performance of the approaches developed. The results obtained showed that a significant number of the studies selected used classification and prediction techniques when developing DM models. Neural networks, decision trees and support vector machines were identified as being the techniques most frequently employed when developing DM models in cardiology. Moreover, neural networks and support vector machines achieved the highest accuracy rates and were proved to be more efficient than other techniques. {\textcopyright} 2016 Elsevier Ireland Ltd}, doi = {10.1016/j.ijmedinf.2016.09.005}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84988689033\&doi=10.1016\%2fj.ijmedinf.2016.09.005\&partnerID=40\&md5=9bd4f89bacb9b8dea94dd1f7eefef431}, author = {Kadi, I. and Idri, A. and Fernandez-Aleman, J.L.} } @article { ISI:000390485500002, title = {Knowledge discovery in cardiology: A systematic literature review}, journal = {INTERNATIONAL JOURNAL OF MEDICAL INFORMATICS}, volume = {97}, year = {2017}, month = {JAN}, pages = {12-32}, abstract = {Context: Data mining (DM) provides the methodology and technology needed to transform huge amounts of data into useful information for decision making. It is a powerful process employed to extract knowledge and discover new patterns embedded in large data sets. Data mining has been increasingly used in medicine, particularly in cardiology. In fact, DM applications can greatly benefit all those involved in cardiology, such as patients, cardiologists and nurses. Objective: The purpose of this paper is to review papers concerning the application of DM techniques in cardiology so as to summarize and analyze evidence regarding: (1) the DM techniques most frequently used in cardiology; (2) the performance of DM models in cardiology; (3) comparisons of the performance of different DM models in cardiology. Method: We performed a systematic literature review of empirical studies on the application of DM techniques in cardiology published in the period between 1 January 2000 and 31 December 2015. Results: A total of 149 articles published between 2000 and 2015 were selected, studied and analyzed according to the following criteria: DM techniques and performance of the approaches developed. The results obtained showed that a significant number of the studies selected used classification and prediction techniques when developing DM models. Neural networks, decision trees and support vector machines were identified as being the techniques most frequently employed when developing DM models in cardiology. Moreover, neural networks and support vector machines achieved the highest accuracy rates and were proved to be more efficient than other techniques. (C) 2016 Elsevier Ireland Ltd. All rights reserved.}, issn = {1386-5056}, doi = {10.1016/j.ijmedinf.2016.09.005}, author = {Kadi, I. and Idri, A. and Fernandez-Aleman, J. L.} } @conference {Kadi2017, title = {Quality evaluation of cardiac decision support systems using ISO 25010 standard}, booktitle = {Proceedings of IEEE/ACS International Conference on Computer Systems and Applications, AICCSA}, year = {2017}, note = {cited By 0}, abstract = {Clinical decision support system (CDSS) is a technology designed to assist physicians and medical stuff with clinical decision-making tasks. It aims to better the safety and quality of patient care, improve patient care treatments and outcomes, decrease the dependence on memory, lower error rates, and decrease response time. CDSS is not intended to replace clinician judgment, but it supports users with the detection and prevention of possible risks to patient safety and encouraging the appropriate usage of evidence-based practice and guidelines. The benefits of the CDSSs contribute in increasing the number of e-health applications in recent years. Cardiology healthcare institutions are adopting such systems in their departments in order to assist cardiologists when making their decisions. This study proposes a set of cardiac DSS requirements and measures the influence of these requirements on software product quality using the ISO/IEC 25010 software quality model. A checklist of cardiac DSS requirements is also presented. As a result, Functional suitability, Reliability, Performance efficiency and Operability external characteristics were identified as the most influenced by cardiac DSS requirements. {\textcopyright} 2016 IEEE.}, doi = {10.1109/AICCSA.2016.7945657}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85021912425\&doi=10.1109\%2fAICCSA.2016.7945657\&partnerID=40\&md5=92b7b2292f1248fc9c7033b5d5bfe20e}, author = {Kadi, I. and Idri, A. and Ouhbi, S.} } @conference {Ouhbi2017557, title = {Requirements for a mobile personal health record to improve cardiovascular healthcare services}, booktitle = {Proceedings of the ACM Symposium on Applied Computing}, volume = {Part F128005}, year = {2017}, note = {cited By 1}, pages = {557-562}, abstract = {Analyzing health records of patients suffering from chronic diseases such as cardiovascular disease can help understanding their conditions and therefore improving healthcare services offered to them. To improve the cardiology healthcare quality and efficiency in the Autonomic Nervous System centre in Avicenne Hospital in Rabat in Morocco, a mobile personal health record (mPHR) along with an electronic health record have been developed. This paper presents our mPHR{\textquoteright}s functional requirements used in the product development. It also presents some aspects of quality evaluation of the mPHR. An experiment has been conducted to assess and improve the mPHR{\textquoteright}s quality using the ISO/IEC 25010 standard. Copyright 2017 ACM.}, doi = {10.1145/3019612.3019725}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85020871426\&doi=10.1145\%2f3019612.3019725\&partnerID=40\&md5=6cf797c1b1a37c7309de64935405f2e8}, author = {Ouhbi, S. and Idri, A. and Hakmi, R. and Benjelloun, H. and Fernandez-Aleman, J.L. and Toval, A.} } @conference {Ouhbi2017285, title = {A reusable requirements catalog for internationalized and sustainable blood donation apps}, booktitle = {ENASE 2017 - Proceedings of the 12th International Conference on Evaluation of Novel Approaches to Software Engineering}, year = {2017}, note = {cited By 0}, pages = {285-292}, abstract = {Blood donation mobile applications are efficient tools to increase awareness about the importance of blood donation acts and to attract blood donors. The aim of this paper is to define a reusable requirements repository (catalog) for blood donation applications based on the main related software engineering standards, e-health technology standards and literature. The catalog contains requirements regarding internationalization to bridge the cultural and language barriers among blood donors. It includes also requirements for sustainable blood donation applications which cover the individual, social, environmental, and technical dimensions of sustainability. This catalog can be very useful to develop, evaluate and audit blood donation applications and it can be adapted to other m-health applications. {\textcopyright} 2017 by SCITEPRESS - Science and Technology Publications, Lda. All Rights Reserved.}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85025445810\&partnerID=40\&md5=b3c2b8763e5f5a729522529e5ca3e148}, author = {Ouhbi, S. and Fernandez-Aleman, J.L. and Idri, A. and Toval, A. and Pozo, J.R. and Bajta, M.E.} } @conference {Hosni20171251, title = {Software effort estimation using classical analogy ensembles based on random subspace}, booktitle = {Proceedings of the ACM Symposium on Applied Computing}, volume = {Part F128005}, year = {2017}, note = {cited By 1}, pages = {1251-1258}, abstract = {Software effort estimation is one of the important and complex tasks in software project management. It influences almost all the process of software development such as: bidding, planning, and budgeting. Hence, estimating the software project effort in early stages of the software life cycle is considered the key of success of any project. To this goal, many techniques have been proposed to predict the effort required to develop a software system. Unfortunately, there is no consensus about the single best technique. Recently, Ensemble Effort Estimation has been investigated to estimate software effort and consists on generating the software effort by combining more than one solo estimation technique by means of a combination rule. In this paper, we have developed different homogeneous ensembles based on combination of Random Subspace method and Classical Analogy technique using two linear rules over seven datasets. The results confirm that the Random Space ensembles outperform the solo Classical Analogy regardless of the dataset used and that the median rule generates better estimation than the average one. {\textcopyright} 2017 ACM.}, doi = {10.1145/3019612.3019784}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85020917571\&doi=10.1145\%2f3019612.3019784\&partnerID=40\&md5=8b1b6f9ac3cb26187929f17a78dd5669}, author = {Hosni, M. and Idri, A.} } @conference {Idri2017, title = {A survey of secondary studies in software process improvement}, booktitle = {Proceedings of IEEE/ACS International Conference on Computer Systems and Applications, AICCSA}, year = {2017}, note = {cited By 0}, abstract = {Software Process Improvement (SPI) has become one of the main strategic objectives in software industry. Companies make more investments in implementing software quality standards and models that focus on process assessment to improve their performance and productivity. To achieve these goals, companies focus on improving their process by means of improvement initiatives which may be implemented. To help practitioners find more innovative ways to manage and implement software process improvement initiatives efficiently, an important number of studies related to this topic have been emerged in recent years. Some of them, referred to as secondary studies, focused on the interpretation and synthesis of available published research works by giving an up to date state of art about SPI. This state of the art is provided in a form of literature surveys or in a methodological form using well established approaches such as systematic reviews or systematic mappings or tertiary studies. The objective of this paper is to identify and present the current secondary studies on SPI. The purpose is to discuss methods that these literature reviews of SPI use, their quality, and specific subjects that they cover. A set of survey research questions have been proposed and discussed through the investigation of 70 selected secondary studies collected from different digital libraries. The results show that success factors and issues related to implementation of SPI initiatives are the most studied, and there is a need to address in depth the measurement aspects in SPI. {\textcopyright} 2016 IEEE.}, doi = {10.1109/AICCSA.2016.7945655}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85021871351\&doi=10.1109\%2fAICCSA.2016.7945655\&partnerID=40\&md5=e6b8548942846520db1fe2f2d41c4ecc}, author = {Idri, A. and Cheikhi, L.} } @article {Sardi201731, title = {A systematic review of gamification in e-Health}, journal = {Journal of Biomedical Informatics}, volume = {71}, year = {2017}, note = {cited By 5}, pages = {31-48}, abstract = {Gamification is a relatively new trend that focuses on applying game mechanics to non-game contexts in order to engage audiences and to inject a little fun into mundane activities besides generating motivational and cognitive benefits. While many fields such as Business, Marketing and e-Learning have taken advantage of the potential of gamification, the digital healthcare domain has also started to exploit this emerging trend. This paper aims to summarize the current knowledge regarding gamified e-Health applications. A systematic literature review was therefore conducted to explore the various gamification strategies employed in e-Health and to address the benefits and the pitfalls of this emerging discipline. A total of 46 studies from multiple sources were then considered and thoroughly investigated. The results show that the majority of the papers selected reported gamification and serious gaming in health and wellness contexts related specifically to chronic disease rehabilitation, physical activity and mental health. Although gamification in e-Health has attracted a great deal of attention during the last few years, there is still a dearth of valid empirical evidence in this field. Moreover, most of the e-Health applications and serious games investigated have been proven to yield solely short-term engagement through extrinsic rewards. For gamification to reach its full potential, it is therefore necessary to build e-Health solutions on well-founded theories that exploit the core experience and psychological effects of game mechanics. {\textcopyright} 2017 Elsevier Inc.}, doi = {10.1016/j.jbi.2017.05.011}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85019895406\&doi=10.1016\%2fj.jbi.2017.05.011\&partnerID=40\&md5=9da7af1cff144bf7fab5532c2a4aea66}, author = {Sardi, L. and Idri, A. and Fernandez-Aleman, J.L.} } @conference {Saissi201794, title = {Towards XML schema extraction from deep web}, booktitle = {Colloquium in Information Science and Technology, CIST}, year = {2017}, note = {cited By 0}, pages = {94-99}, abstract = {Today, not all the web is fully accessible by the web search engines. There is a hidden and inaccessible part of the web called the deep web. Many methods exist in the literature to access and to integrate the huge structured data contained in the deep web. In this paper, we propose our approach to extract the XML schema describing a selected deep web source. Our approach is based on the static and the dynamic analysis of the HTML forms giving access to the selected deep web source. Our approach uses two knowledge database during its process: our proprietary identification tables and Wordnet. The XML schema extracted will be used to integrate the associated deep web source into a mediation system without extracting all its information. {\textcopyright} 2016 IEEE.}, doi = {10.1109/CIST.2016.7805022}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85010190133\&doi=10.1109\%2fCIST.2016.7805022\&partnerID=40\&md5=0c383caab22e43a339e6b52acca8947b}, author = {Saissi, Y. and Zellou, A. and Idri, A.} } @conference {Saissi201794, title = {Towards XML schema extraction from deep web}, booktitle = {Colloquium in Information Science and Technology, CIST}, year = {2017}, note = {cited By 0}, pages = {94-99}, abstract = {Today, not all the web is fully accessible by the web search engines. There is a hidden and inaccessible part of the web called the deep web. Many methods exist in the literature to access and to integrate the huge structured data contained in the deep web. In this paper, we propose our approach to extract the XML schema describing a selected deep web source. Our approach is based on the static and the dynamic analysis of the HTML forms giving access to the selected deep web source. Our approach uses two knowledge database during its process: our proprietary identification tables and Wordnet. The XML schema extracted will be used to integrate the associated deep web source into a mediation system without extracting all its information. {\"\i}{\textquestiondown}{\textonehalf} 2016 IEEE.}, doi = {10.1109/CIST.2016.7805022}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85010190133\&doi=10.1109\%2fCIST.2016.7805022\&partnerID=40\&md5=0c383caab22e43a339e6b52acca8947b}, author = {Saissi, Y. and Zellou, A. and Idri, A.} } @article {Hosni20171, title = {On the value of parameter tuning in heterogeneous ensembles effort estimation}, journal = {Soft Computing}, year = {2017}, note = {cited By 0; Article in Press}, pages = {1-34}, abstract = {Accurate estimation of software development effort estimation (SDEE) is fundamental for efficient management of software development projects as it assists software managers to efficiently manage their human resources. Over the last four decades, while software engineering researchers have used several effort estimation techniques, including those based on statistical and machine learning methods, no consensus has been reached on the technique that can perform best in all circumstances. To tackle this challenge, Ensemble Effort Estimation, which predicts software development effort by combining more than one solo estimation technique, has recently been investigated. In this paper, heterogeneous ensembles based on four well-known machine learning techniques (K-nearest neighbor, support vector regression, multilayer perceptron and decision trees) were developed and evaluated by investigating the impact of parameter values of the ensemble members on estimation accuracy. In particular, this paper evaluates whether setting ensemble parameters using two optimization techniques (e.g., grid search optimization and particle swarm) permits more accurate estimates of SDEE. The heterogeneous ensembles of this study were built using three combination rules (mean, median and inverse ranked weighted mean) over seven datasets. The results obtained suggest that: (1) Optimized single techniques using grid search or particle swarm optimization provide more accurate estimation; (2) in general ensembles achieve higher accuracy than their single techniques whatever the optimization technique used, even though ensembles do not dominate over all single techniques; (3) heterogeneous ensembles based on optimized single techniques provide more accurate estimation; and (4) generally, particle swarm optimization and grid search techniques generate ensembles with the same predictive capability. {\textcopyright} 2017 Springer-Verlag GmbH Germany, part of Springer Nature}, doi = {10.1007/s00500-017-2945-4}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85035807309\&doi=10.1007\%2fs00500-017-2945-4\&partnerID=40\&md5=83d094f56b80a21295cf4230621f445d}, author = {Hosni, M. and Idri, A. and Abran, A. and Nassif, A.B.} } @article {Kadi20161, title = {Cardiovascular dysautonomias diagnosis using crisp and fuzzy decision tree: A comparative study}, journal = {Studies in Health Technology and Informatics}, volume = {223}, year = {2016}, note = {cited By 1}, pages = {1-8}, abstract = {Decision trees (DTs) are one of the most popular techniques for learning classification systems, especially when it comes to learning from discrete examples. In real world, many data occurred in a fuzzy form. Hence a DT must be able to deal with such fuzzy data. In fact, integrating fuzzy logic when dealing with imprecise and uncertain data allows reducing uncertainty and providing the ability to model fine knowledge details. In this paper, a fuzzy decision tree (FDT) algorithm was applied on a dataset extracted from the ANS (Autonomic Nervous System) unit of the Moroccan university hospital Avicenne. This unit is specialized on performing several dynamic tests to diagnose patients with autonomic disorder and suggest them the appropriate treatment. A set of fuzzy classifiers were generated using FID 3.4. The error rates of the generated FDTs were calculated to measure their performances. Moreover, a comparison between the error rates obtained using crisp and FDTs was carried out and has proved that the results of FDTs were better than those obtained using crisp DTs. {\textcopyright} 2016 The authors and IOS Press.}, doi = {10.3233/978-1-61499-645-3-1}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84971622111\&doi=10.3233\%2f978-1-61499-645-3-1\&partnerID=40\&md5=cfdd88b310ce0f50679c1d487f776be3}, author = {Kadi, I. and Idri, A.} } @article {Idri20161, title = {Evaluating a decision making system for cardiovascular dysautonomias diagnosis}, journal = {SpringerPlus}, volume = {5}, number = {1}, year = {2016}, note = {cited By 1}, pages = {1-17}, abstract = {Autonomic nervous system (ANS) is the part of the nervous system that is involved in homeostasis of the whole body functions. A malfunction in this system can lead to a cardiovascular dysautonomias. Hence, a set of dynamic tests are adopted in ANS units to diagnose and treat patients with cardiovascular dysautonomias. The purpose of this study is to develop and evaluate a decision tree based cardiovascular dysautonomias prediction system on a dataset collected from the ANS unit of the Moroccan university hospital Avicenne. We collected a dataset of 263 records from the ANS unit of the Avicenne hospital. This dataset was split into three subsets: training set (123 records), test set (55 records) and validation set (85 records). C4.5 decision tree algorithm was used in this study to develop the prediction system. Moreover, Java Enterprise Edition platform was used to implement a prototype of the developed system which was deployed in the Avicenne ANS unit so as to be clinically validated. The performance of the decision tree-based prediction system was evaluated by means of the error rate criterion. The error rates were measured for each classifier and have achieved an average value of 1.46, 2.24 and 0.89~\% in training, test, and validation sets respectively. The results obtained were encouraging but further replicated studies are still needed to be performed in order to confirm the findings of this study. {\textcopyright} 2016, Idri and Kadi.}, doi = {10.1186/s40064-016-1730-7}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84955586605\&doi=10.1186\%2fs40064-016-1730-7\&partnerID=40\&md5=0c9525bf9e5c6b03de358df01c579172}, author = {Idri, A. and Kadi, I.} } @conference {Anter2016, title = {Nk-schemas: A novel algorithm for creating the view{\textquoteright} schemas to materialize in hybrid mediator}, booktitle = {Proceedings of IEEE/ACS International Conference on Computer Systems and Applications, AICCSA}, volume = {2016-July}, year = {2016}, note = {cited By 0}, abstract = {The explosion of information and telecommunications technologies, has made easy the access and production of information. Thus, a very large mass of the latter has generated. This situation has made the integration systems an immediate necessity. Among these systems, there is the hybrid mediator. The latter interrogates one part of data on demand as in the virtual approach, while charging, filtering and storing the second part, as views, in a local database. The creation of this second part is a critical task. We propose in this paper, a new algorithm for creating views{\textquoteright} schemas to materialize in the hybrid integration system. {\textcopyright} 2015 IEEE.}, doi = {10.1109/AICCSA.2015.7507200}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84980416133\&doi=10.1109\%2fAICCSA.2015.7507200\&partnerID=40\&md5=1412e0fe0867141a2f0ec5a2e0034ebb}, author = {Anter, S. and Zellou, A. and Idri, A.} } @article {Anter20162128, title = {Retrieving and materializing data in hybrid mediators}, journal = {International Journal of Applied Engineering Research}, volume = {11}, number = {3}, year = {2016}, note = {cited By 0}, pages = {2128-2134}, abstract = {With the emergence of the new generation of information technologies and telecommunications, the mass of information produced by individuals and enterprises has increased in a considerable manner. Thus, and in order to manage this diversity of information, the integration systems were proposed. Among these, we find the hybrid information integration systems. They allow materializing a part of data in a local database, while integrating virtually the other part. As these materialized one is organized as views, it becomes necessary to propose algorithms for this objective. Among the most interesting ones, there is k-schema. It allows organize the attributes into a set of views, while affecting an attribute to a single view. This choice causes the not loading some of data that are highly requested by users, and at the same time, it loads other that are rarely requested. In this paper, we propose a new algorithm in this end. In this latter, it is authorized to assign a same attribute to several views. We also proposed new functions for calculating the dependencies between attributes. {\textcopyright} Research India Publications.}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84973470278\&partnerID=40\&md5=140f0f35ca53260f11d97b5958f6ee8c}, author = {Anter, S. and Zellou, A. and Idri, A.} } @conference {Elmidaoui2016, title = {A survey of empirical studies in software product maintainability prediction models}, booktitle = {SITA 2016 - 11th International Conference on Intelligent Systems: Theories and Applications}, year = {2016}, note = {cited By 0}, abstract = {Software product maintainability is critical to the achievement of the software product quality. In order to keep the software useful as long as possible, software product maintainability prediction (SPMP) has become an important endeavor. The objective of this paper is to identify and present the current research on SPMP. The search was conducted using digital libraries to And as much research papers as possible. Selected papers are classified according to the following survey classification criteria (SCs): research type, empirical type, publication year and channel. Based on the results of the survey, we provide a discussion of the current state of the art in software maintainability prediction models or techniques. We believe that this study will be a reliable basis for further research in software maintainability studies. {\textcopyright} 2016 IEEE.}, doi = {10.1109/SITA.2016.7772267}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85010468522\&doi=10.1109\%2fSITA.2016.7772267\&partnerID=40\&md5=8184d51d9fbbba990544f8dc158d2375}, author = {Elmidaoui, S. and Cheikhi, L. and Idri, A.} } @conference {Anter2016, title = {Towards a generic model of a user profile}, booktitle = {SITA 2016 - 11th International Conference on Intelligent Systems: Theories and Applications}, year = {2016}, note = {cited By 0}, abstract = {User modelling is an old research discipline. The main concern of this discipline is to improve the quality of human-computer interaction predictive goals, preferences and context. Thus, adaptation and personalization of a document or an application for a particular user need to have information on the latter. It often referred to as {\textquoteright}user profile{\textquoteright}. A user profile modelling process must be done in two stages. These can be expressed by two questions: (1) {\textquoteright}what data?{\textquoteright} and (2) {\textquoteright}In what form will they be organized?{\textquoteright} The answer to the first question will determine all relevant information that best represents the interests and needs of the user. As for the second, it will determine the logical structure in which a profile will be modelled. This paper aims to provide answers to both questions. To do this, we begin with a presentation of different areas where the user profile can make a major contribution. In the second step, we define all the information to be included in the user profile as well as a generic model that we can adapt to different areas. {\textcopyright} 2016 IEEE.}, doi = {10.1109/SITA.2016.7772265}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85010301085\&doi=10.1109\%2fSITA.2016.7772265\&partnerID=40\&md5=2f2efa11599627d6b4c5ad967869851f}, author = {Anter, S. and Yazidi, M.H.E. and Zellou, A. and Idri, A.} } @conference {Kadi2015816, title = {A decision tree-based approach for cardiovascular dysautonomias diagnosis: A case study}, booktitle = {Proceedings - 2015 IEEE Symposium Series on Computational Intelligence, SSCI 2015}, year = {2015}, note = {cited By 1}, pages = {816-823}, abstract = {Terms as knowledge Discovery from Databases (KDD), Data Mining (DM) and Machine Learning (ML), gain from day to day, an increasing significance in medical data analysis. They permit the identification, evaluation, and quantification of some less visible, intuitively unpredictable, by using generally large sets of data. Researchers have long been concerned with applying DM tools to improve data analysis on large data sets. DM has been increasingly used in medicine, particularly in cardiology. In fact, data mining applications can greatly benefits all parts involved in cardiology. Autonomic nervous system (ANS) is the part of the nervous system that is involved in homeostasis of the whole body functions. A malfunction in this system can lead to a cardiovascular dysautonomias. Thereby, a set of dynamic tests are adopted in ANS units to diagnose and treat patients with cardiovascular dysautonomias. In this paper, a case study was performed in order to construct a cardiovascular dysautonomias prediction system using data mining techniques and a dataset collected from an ANS unit of the Moroccan university hospital Avicenne. The prediction system is a decision tree-based classifier that was developed using C4.5 decision tree algorithm to automate the analysis procedure of ANS{\textquoteright}s test results and make it easier for specialists. The performance of the generated decision trees was evaluated and the results obtained achieved high accuracy rates which were very promising. In addition, a clinical validation of the developed system was carried out on new patients. In fact, a prototype of the developed system was implemented on JEE platform and deployed in the ANS unit so as to be validated clinically. The results were analyzed and thus the prototype was approved to be highly accurate, interpretable, time saving and easy to use. {\textcopyright} 2015 IEEE.}, doi = {10.1109/SSCI.2015.121}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84964940639\&doi=10.1109\%2fSSCI.2015.121\&partnerID=40\&md5=f0d227a9f5e18f21b2c5aebad30b41f8}, author = {Kadi, I. and Idri, A.} } @conference {ElIdrissiSelma2015, title = {Mapping discovery methodology in a pure P2P mediation system for XML schemas}, booktitle = {2015 10th International Conference on Intelligent Systems: Theories and Applications, SITA 2015}, year = {2015}, note = {cited By 0}, abstract = {The exponential development of information exchange today has updated the difficulties to find the relevant information wished by an end user. Indeed, the information is represented and stored in a multitude of information sources in a very heterogeneous way. The type of these information sources can be varied (textual data, relational, multimedia, semi structured ...) and their very different storage systems (file system, DBMS, applications). It is thus necessary to offer an information sources integration system, while assuring the transparency to the distribution and the heterogeneousness. In this work, we focus to integration of information sources via the mediation approach in a pur P2P environment. We are particularly interested to the Mapping discovery in this type of system integration. {\textcopyright} 2015 IEEE.}, doi = {10.1109/SITA.2015.7358380}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84962376117\&doi=10.1109\%2fSITA.2015.7358380\&partnerID=40\&md5=cdb3cd7ca4bce869b36a15714aae4901}, author = {El Idrissi Selma, E.Y. and Zellou, A. and Idri, A.} } @conference {ElYazidi2015, title = {Mapping in GAV context}, booktitle = {2015 10th International Conference on Intelligent Systems: Theories and Applications, SITA 2015}, year = {2015}, note = {cited By 0}, abstract = {Mediation is an approach that allows integrating different heterogeneous, distributed and scalable data sources. The objective of our paper is to define an approach of mapping, one of the essential operations that can describe and define the set of links between the two levels of mediation system: mediator schema (global) and schemas of sources to be integrated (local). Our approach allows to define the mapping for systems with complex schemas (schemas with complexes elements) and this by using functions developed that taking into account two aspects: syntactic and semantic. {\textcopyright} 2015 IEEE.}, doi = {10.1109/SITA.2015.7358376}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84962421014\&doi=10.1109\%2fSITA.2015.7358376\&partnerID=40\&md5=44fa100d239873e3ce44cc7228d15aa7}, author = {El Yazidi, M.H. and Zellou, A. and Idri, A.} } @conference {Anter2015, title = {Nk-schemas: A novel algorithm for creating the views{\textquoteright} sch{\'e}mas to materialize in hybrid mediator}, booktitle = {Proceedings of 2015 IEEE World Conference on Complex Systems, WCCS 2015}, year = {2015}, note = {cited By 0}, abstract = {The explosion of information and telecommunications technologies, has made easy the access and production of information. Thus, a very large mass of the latter has generated. This situation has made the integration systems an immediate necessity. Among these systems, there is the hybrid mediator. The latter interrogates one part of data on demand as in the virtual approach, while charging, filtering and storing the second part, as views, in a local database. The creation of this second part is a critical task. We propose in this paper, a new algorithm for creating views{\textquoteright} schemas to materialize in the hybrid integration system. {\textcopyright} 2015 IEEE.}, doi = {10.1109/ICoCS.2015.7483282}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84978397930\&doi=10.1109\%2fICoCS.2015.7483282\&partnerID=40\&md5=c2f29f0e70dec9947e07f07a38190ec7}, author = {Anter, S. and Zellou, A. and Idri, A.} } @conference {Idri2015, title = {Preface}, booktitle = {2015 10th International Conference on Intelligent Systems: Theories and Applications, SITA 2015}, year = {2015}, note = {cited By 0}, doi = {10.1109/SITA.2015.7358374}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84962432872\&doi=10.1109\%2fSITA.2015.7358374\&partnerID=40\&md5=fd52409377ca778d3a97413740572ebd}, author = {Idri, A. and Janati Idrissi, M.A.} } @conference {Mimouni2015, title = {Quality of Data in mediation systems}, booktitle = {2015 10th International Conference on Intelligent Systems: Theories and Applications, SITA 2015}, year = {2015}, note = {cited By 0}, abstract = {Quality of Data (QoD) became a major concern which more and more considers necessary to evolve the satisfaction of final users. In this work, we are approaching a methodology which consists of classifying and listing factors impacting Data Quality in Data Integration Systems, and more literally these which influence mediation systems and which will constitute our long-term research axis. Moreover, we shall propose in this work a contribution in the form of an attempt to define main factors impacting Quality of Data. {\textcopyright} 2015 IEEE.}, doi = {10.1109/SITA.2015.7358375}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84962456193\&doi=10.1109\%2fSITA.2015.7358375\&partnerID=40\&md5=aa9e5c23e4b0cc19331495a5ca6e9a5c}, author = {Mimouni, L. and Zellou, A. and Idri, A.} } @conference {Ouaftouh2015, title = {User profile model: A user dimension based classification}, booktitle = {2015 10th International Conference on Intelligent Systems: Theories and Applications, SITA 2015}, year = {2015}, note = {cited By 0}, abstract = {As the amount of information offered by information systems is increasing exponentially, the need of personalized approaches for information access increases. This work discusses user profiles designed for providing personalized information access. We first present a general classification of research directions on adaptive systems, followed by a state-of-the-art study about user profiling. We propose then a new classification approach of user profile model. This classification is based on the user dimensions considered to build the user profile. {\textcopyright} 2015 IEEE.}, doi = {10.1109/SITA.2015.7358378}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84962408714\&doi=10.1109\%2fSITA.2015.7358378\&partnerID=40\&md5=ca06e7ab84ba76c5e6dfa5355f6faed2}, author = {Ouaftouh, S. and Zellou, A. and Idri, A.} } @conference {Saissi2014178, title = {Extraction of relational schema from deep web sources: A form driven approach}, booktitle = {2014 2nd World Conference on Complex Systems, WCCS 2014}, year = {2014}, note = {cited By 5}, pages = {178-182}, abstract = {The deep web is the biggest unexplored part of the web and we need to access directly to its entire data web sources without using any crawling or surfacing method. For this, we choose to use a virtual web integration system. However, the deep web virtual integration methods existing today, focuses only on the integration of the query interfaces giving access to the deep web. These query interfaces are integrated to build a global query interface able to query all the deep web sources. The objective of our work is to propose another vision of a deep web virtual integration system that uses a mediated schema built with a relational schema describing each deep web source. This paper proposes our approach to extract a relational schema describing a deep web source. The key idea underlying our approach is to analyze two structured information: the HTML Form and the HTML Table extracted from the deep web source to discover its data structure and to allow us to build a relational schema describing it. We use also a knowledge table to take profit of our learning experience on extracting relational schema from deep web source. {\textcopyright} 2014 IEEE.}, doi = {10.1109/ICoCS.2014.7060888}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84929193260\&doi=10.1109\%2fICoCS.2014.7060888\&partnerID=40\&md5=19d19e00ed72fa7b4c9eeae768e3b02e}, author = {Saissi, Y. and Zellou, A. and Idri, A.} } @conference {Saissi2014, title = {Form driven web source integration}, booktitle = {2014 9th International Conference on Intelligent Systems: Theories and Applications, SITA 2014}, year = {2014}, note = {cited By 0}, abstract = {The web sources contain a huge amount of data that we need to integrate and to use. The integration of the web source requires to know its source description. In general, the web sources contain a structured data like HTML form and HTML table. This paper proposes our approach to extract a relational schema, describing the web source, and using its structured information. The key idea underlying our approach is to extract the relational data structure of the HTML forms contained in the web source. And thanks to the features of the HTML form, the relational data structure extracted will not only describe the HTML form but also the web source associated. After, we propose to query the HTML forms extracted to generate interesting HTML table results. The data structure of the resulting HTML tables will be used to enhance the relational data structure of the associated HTML form. Finally, with all the relational data structure extracted from all the HTML forms and HTML tables, we build the relational schema describing the associated web source. {\textcopyright} 2014 IEEE.}, doi = {10.1109/SITA.2014.6847288}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84904632763\&doi=10.1109\%2fSITA.2014.6847288\&partnerID=40\&md5=900e69a0f5f601e9d9f86ec1fe81bd55}, author = {Saissi, Y. and Zellou, A. and Idri, A.} } @article {ElIdrissi2014261, title = {MDPM: An algorithm for mapping discovery in P2P mediation system}, journal = {Journal of Theoretical and Applied Information Technology}, volume = {63}, number = {2}, year = {2014}, note = {cited By 0}, pages = {261-273}, abstract = {The information integration systems consist in offering a uniform interface, to provide access to a set of autonomous and distributed information sources. The most important advantage of an information integration system is that allows users to specify what they want, rather than thinking about how to get the responses. The studies in this field have aimed at developing tools allowing a transparent access to data sources disseminated on a network. In particular, there are two major classes of integration systems: the mediation systems based on the paradigm mediator/Wrapper and peer to peer systems (P2P). Recently, other integration systems have emerged as P2P mediation systems. In these systems, the correspondence problem between schemas is a crucial problem. Especially as to integrate different sources requires the identification of the elements which can be dependent between the various diagrams. This is called correspondences or mapping between schemas. In this paper, we study this problem of mappings discovery and we present an approach of automatic correspondences discovery in a Pure Peer-to-Peer mediation system. {\textcopyright} 2005 - 2014 JATIT \& LLS. All rights reserved.}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84901297255\&partnerID=40\&md5=727988fcc4f07295ec2f81aef5ccbc2a}, author = {El Idrissi, S.E.Y. and Zellou, A. and Idri, A.} } @conference {Moumane2014134, title = {Using ISO 9126 with QoS DiffServ model for evaluating software quality in mobile environments}, booktitle = {2014 2nd World Conference on Complex Systems, WCCS 2014}, year = {2014}, note = {cited By 1}, pages = {134-139}, abstract = {Evaluation of software quality in mobile environments is a subject of active research and is a difficult task. This is caused by dynamic characteristics of mobile networks and the limited resources of mobile devices. This paper presents a study to help quality managers and evaluators using the ISO 9126 software quality standard, with the DiffServ QoS model to evaluate product quality in mobile environments. The effects of mobile technologies limitations are analyzed for each QoS parameter and for each Diffserv class. As a result of this study, the reliability and the efficiency are the software product quality characteristics, the most correlated with Diffserv classes in terms of influence of the mobile environment limitations. It is caused by the fact that these characteristics are dependent on the software runtime environment and the Diffserv QoS model focuses on the quality at the network level, unlike the ISO 9126 standard that applies to the application level. {\textcopyright} 2014 IEEE.}, doi = {10.1109/ICoCS.2014.7060905}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84929192929\&doi=10.1109\%2fICoCS.2014.7060905\&partnerID=40\&md5=d21fd7dfbdbd844dde3c3a557fc5b822}, author = {Moumane, K. and Idri, A.} } @conference {Anter2013201, title = {The hybrid integration system Towards a new approach for creating candidate views for materialization}, booktitle = {2013 5th International Conference on Computer Science and Information Technology, CSIT 2013 - Proceedings}, year = {2013}, note = {cited By 0}, pages = {201-209}, abstract = {The vulgarization of information technologies and telecommunications has generated an enormous amount of information. This information is generally heterogeneous, stored in autonomous and distributed sources. Thus, it becomes necessary to introduce the information integration systems. These systems must ensure an optimal query response time, and the freshness of data. Using a virtual approach cannot answer these questions. On the one hand, the query response time is very important. Indeed, the mediator must access, every time, to the sources for load the relevant information. On the other hand, the sources are not always available. The establishment of a hybrid integration system, where a portion of information is materialized in the mediator and the other portion remains in the sources and are extracted at query time, is an effective solution to these problem, provided that the materialized part has carefully chosen. Based on the distribution of user queries, we present in this paper an approach to select the information most requested by users and organize it as candidate views for materialization in the mediator. {\textcopyright} 2013 IEEE.}, doi = {10.1109/CSIT.2013.6588780}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84884866157\&doi=10.1109\%2fCSIT.2013.6588780\&partnerID=40\&md5=901ef3c27b658be16f920570e0dd9cce}, author = {Anter, S. and Zellou, A. and Idri, A.} } @article {Anter2013158, title = {K-Schema: A new approach, based on the distribution of user queries, to create views to materialize in a hybrid integration system}, journal = {Journal of Theoretical and Applied Information Technology}, volume = {47}, number = {1}, year = {2013}, note = {cited By 2}, pages = {158-170}, abstract = {The explosion of information technologies and telecommunications has made easy the access and production of information. That is how a very large mass of the latter has generated. This situation has made the integration systems a major need. Among these systems, there is the hybrid mediator. The latter interrogates one part of data on demand as in the virtual approach while charging, filtering and storing the second part, as views, in a local database. The choice of this second part is a critical task. This paper presents a selective approach, which based, essentially, to create these views, on the queries previously posed on the system. Based on the distribution of previous user queries, our approach extract all data most queried by users. The obtained data are classified as candidate views for materialization. Then selecting which one to materialize among all those created in the first step. {\textcopyright} 2005 - 2013 JATIT \& LLS. All rights reserved.}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84872374571\&partnerID=40\&md5=c84d7e37d38a4f230cdf4c3a43c22954}, author = {Anter, S. and Zellou, A. and Idri, A.} } @article {Anter2013816, title = {MATHIS: A new approach for creating views to materialize in a hybrid integration system}, journal = {International Review on Computers and Software}, volume = {8}, number = {3}, year = {2013}, note = {cited By 2}, pages = {816-825}, abstract = {The vulgarization of information and telecommunications technologies has made the integration systems an immediate necessity. Among the latter, there are the hybrid integration systems. These offer a local database where it stores a part of data while integrating the other part virtually. This in the objective to increases system performance while ensuring a tradeoff between query response time and data freshness. The approach that has proposed a complete solution is one that creates the candidate views for materialization before selecting among them those that will be materialized. To do this, it selects the attributes most requested by users. Calling the algorithm k-schema, these attributes are organized in schemas, on which we based to build the candidate views for materialization. This approach suffers from a defect. It is because in the phase of the extraction of attributes of interest, we based only on their frequencies of appearance in the user queries. This will select some attributes, even if they do not respond to any selection criterion. Thus, they cause the elimination, in the selection phase, of views to which they were assigned. In this paper, we propose a new approach, which eliminates all attributes that do not respond to the selection criteria prior to the creation of views to materialize. We also propose a new solution to calculate the values of attributes relative to different criteria. {\textcopyright} 2013 Praise Worthy Prize S.r.l. - All rights reserved.}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84881501064\&partnerID=40\&md5=5b529bb907d58895a51faec2468f9a6c}, author = {Anter, S. and Zellou, A. and Idri, A.} } @conference {ElYazidi2013, title = {A new approach of mapping}, booktitle = {2013 3rd International Symposium ISKO-Maghreb}, year = {2013}, note = {cited By 0}, abstract = {Data integration involves combining data residing in different sources, and providing the user a unified view of this data. The problem of designing data integration systems is important in current applications of the real world, and the question which arises at the mapping is how to link the global schema and the set of local schemas of sources. In this paper, we present our approach of mapping for mediation systems, a new approach that focuses on two main aspects, the syntactic aspect that takes into account the structure and shape of the elements of schemas, and semantic aspect related to the meaning of the elements of schemas. {\textcopyright} 2013 IEEE.}, doi = {10.1109/ISKO-Maghreb.2013.6728197}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84894137287\&doi=10.1109\%2fISKO-Maghreb.2013.6728197\&partnerID=40\&md5=e43d0431faff27e488fadb9353329892}, author = {El Yazidi, M.H. and Zellou, A. and Idri, A.} } @conference {Idri2013207, title = {Software cost estimation by classical and Fuzzy Analogy for Web Hypermedia Applications: A replicated study}, booktitle = {Proceedings of the 2013 IEEE Symposium on Computational Intelligence and Data Mining, CIDM 2013 - 2013 IEEE Symposium Series on Computational Intelligence, SSCI 2013}, year = {2013}, note = {cited By 1}, pages = {207-213}, abstract = {The aim of this paper is to evaluate and to compare the Classical Analogy and Fuzzy Analogy for software cost estimation on a Web software dataset. Hence, the paper aims to replicate the results of our precedent experiments on this dataset. Moreover, questions regarding the estimates accuracy, the tolerance of imprecision and uncertainty of cost drivers, and the favorable context to use estimation by analogy are discussed. This study approved the usefulness of Fuzzy Analogy for software cost estimation. {\textcopyright} 2013 IEEE.}, doi = {10.1109/CIDM.2013.6597238}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84885622172\&doi=10.1109\%2fCIDM.2013.6597238\&partnerID=40\&md5=a7c6b5315989675548f9310b89df6f32}, author = {Idri, A. and Zahi, A.} } @conference {ElIdrissi2013134, title = {Towards a dynamic mapping in P2P mediation system}, booktitle = {2013 5th International Conference on Computer Science and Information Technology, CSIT 2013 - Proceedings}, year = {2013}, note = {cited By 0}, pages = {134-141}, abstract = {The data sources integration, disseminated on a network is a major preoccupation in many sectors. Problems due to share and to integrate information Interests a lot of researcher{\textquoteright}s communities since last years. Works were carried to develop tools and techniques for transparent access to data sources available in a network. In this context, two types of integration systems are distinguished: the virtual integration based on mediation systems and the integration based on Peer to Peer systems (P2P). In this paper, we are interested to the correspondence discovery problem, or Mappings, between global schema in a third type of architecture such as the mediation systems based on P2P architecture. {\textcopyright} 2013 IEEE.}, doi = {10.1109/CSIT.2013.6588771}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84884899567\&doi=10.1109\%2fCSIT.2013.6588771\&partnerID=40\&md5=f694b43a6730e442a9f28c5c4d562a85}, author = {El Idrissi, S.E.Y. and Zellou, A. and Idri, A.} } @article {ElYazidi20131650, title = {Towards a fuzzy mapping for virtual integration system}, journal = {International Review on Computers and Software}, volume = {8}, number = {7}, year = {2013}, note = {cited By 1}, pages = {1650-1657}, abstract = {To access a unified way to different information sources while hiding the user autonomy, heterogeneity, distribution and evolution of these sources, we thought to integrate its different sources of information, one of the existing integration approaches appear mediation as virtual approach. The integration approach mediation is performed using a global schema, and a set of mapping (links) associating schemas of sources to integrate with the global schema. The problem arises in creating the set of correspondences between the elements of the schema global and all the elements of local schemas, in this paper we present our approach: A New Fuzzy Mapping Approach for Mediation Systems, a new approach based on fundamental principles of the theory of fuzzy sets. Our approach is to define and associate to each link between two elements a weight that reflects the degree of its existence, it is present in an analysis using two components: syntactic and semantic. {\textcopyright} 2013 Praise Worthy Prize S.r.l. - All rights reserved.}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84884268047\&partnerID=40\&md5=475d3b48645e8a3f8406303d28193d1a}, author = {El Yazidi, M.H. and Zellou, A. and Idri, A.} } @conference {Anter2012, title = {Personalization of a hybrid integration system: Creation of views to materialize based on the distribution of user queries}, booktitle = {Proceedings of 2012 International Conference on Complex Systems, ICCS 2012}, year = {2012}, note = {cited By 0}, doi = {10.1109/ICoCS.2012.6458566}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84874493599\&doi=10.1109\%2fICoCS.2012.6458566\&partnerID=40\&md5=8cb54118f3a51aadd4afa9789ecdcc3a}, author = {Anter, S. and Zellou, A. and Idri, A.} } @conference {Idri2012863, title = {Software cost estimation by fuzzy analogy for ISBSG repository}, booktitle = {World Scientific Proc. Series on Computer Engineering and Information Science 7; Uncertainty Modeling in Knowledge Engineering and Decision Making - Proceedings of the 10th International FLINS Conf.}, volume = {7}, year = {2012}, note = {cited By 0}, pages = {863-868}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84892659716\&partnerID=40\&md5=2001b51ce67b133f56f1ca779fc56611}, author = {Idri, A. and Amazal, F.A.} } @article {Cheikhi2012462, title = {Software productivity: Harmonization in ISO/IEEE software engineering standards}, journal = {Journal of Software}, volume = {7}, number = {2}, year = {2012}, note = {cited By 2}, pages = {462-470}, doi = {10.4304/jsw.7.2.462-470}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84857929017\&doi=10.4304\%2fjsw.7.2.462-470\&partnerID=40\&md5=55088a24a8af596776fe0e2ddf74892c}, author = {Cheikhi, L. and Al-Qutaish, R.E. and Idri, A.} } @conference {Yazidi2012, title = {Towards a fuzzy mapping for mediation systems}, booktitle = {Proceedings of 2012 International Conference on Complex Systems, ICCS 2012}, year = {2012}, note = {cited By 0}, doi = {10.1109/ICoCS.2012.6458573}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84874494252\&doi=10.1109\%2fICoCS.2012.6458573\&partnerID=40\&md5=f777377bdfd9a344347799d38e97941d}, author = {Yazidi, M.H.E. and Zellou, A. and Idri, A.} } @article {Zakrani2010516, title = {Applying radial basis function neural networks based on fuzzy clustering to estimate web applications effort}, journal = {International Review on Computers and Software}, volume = {5}, number = {5}, year = {2010}, note = {cited By 14}, pages = {516-524}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-78951478126\&partnerID=40\&md5=d80672acda377940a046a22117b6b033}, author = {Zakrani, A. and Idri, A.} } @conference {Idri2008, title = {Functional equivalence between radial basis function neural networks and Fuzzy analogy in software cost estimation}, booktitle = {2008 3rd International Conference on Information and Communication Technologies: From Theory to Applications, ICTTA}, year = {2008}, note = {cited By 0}, doi = {10.1109/ICTTA.2008.4530015}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-49149083210\&doi=10.1109\%2fICTTA.2008.4530015\&partnerID=40\&md5=662843c9cdc63dbc94ac6b4d87307bb2}, author = {Idri, A. and Zakrani, A. and Abran, A.} } @conference {Idri2008576, title = {Fuzzy radial basis function neural networks for web applications cost estimation}, booktitle = {Innovations{\textquoteright}07: 4th International Conference on Innovations in Information Technology, IIT}, year = {2008}, note = {cited By 0}, pages = {576-580}, doi = {10.1109/IIT.2007.4430367}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-50249168929\&doi=10.1109\%2fIIT.2007.4430367\&partnerID=40\&md5=ee74c556489187d35257cf7c8b48c2c0}, author = {Idri, A. and Zakrani, A. and Elkoutbi, M. and Abran, A.} } @article {Idri200821, title = {Software cost estimation models using radial basis function neural networks}, journal = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)}, volume = {4895 LNCS}, year = {2008}, note = {cited By 4}, pages = {21-31}, doi = {10.1007/978-3-540-85553-8_2}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-54249149525\&doi=10.1007\%2f978-3-540-85553-8_2\&partnerID=40\&md5=483abae3a29ef61079aab5da6a389786}, author = {Idri, A. and Zahi, A. and Mendes, E. and Zakrani, A.} } @conference {Jakimi2008521, title = {Use cases and scenarios engineering}, booktitle = {Innovations{\textquoteright}07: 4th International Conference on Innovations in Information Technology, IIT}, year = {2008}, note = {cited By 0}, pages = {521-525}, doi = {10.1109/IIT.2007.4430418}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-50249112803\&doi=10.1109\%2fIIT.2007.4430418\&partnerID=40\&md5=82fe395e89ce2d783684d6544100e69f}, author = {Jakimi, A. and Sabraoui, A. and Badidi, E. and Idri, A. and El Koutbi, M.} } @conference { ISI:000255983000056, title = {Use cases and scenarios engineering}, booktitle = {2007 INNOVATIONS IN INFORMATION TECHNOLOGIES, VOLS 1 AND 2}, year = {2007}, note = {4th International Conference on Innovations in Information Technology, Dubai, U ARAB EMIRATES, NOV 18-20, 2007}, pages = {274+}, publisher = {IEEE}, organization = {IEEE}, abstract = {\textbackslash{} In this paper, we suggest a requirement engineering process that composes (merge) use cases/scenarios and yields a formal specification of the system in form of a high-level Petri net. Use cases and scenarios are acquired in form of diagrams as defined by the Unified Modeling Language (UML). These diagrams are composed and transformed into Petri net specifications and merged to obtain a global Petri net specification capturing the behavior of the entire system. From the global specification, a system prototype can be generated and embedded in a user interface builder environment for further refinement. Based on end user feedback, the use cases and the input scenarios may be iteratively refined. The result of the overall process is a specification consisting of a global Petri net, together with the generated and refined prototype. This paper discusses some activities of this process.}, isbn = {978-1-4244-1840-4}, author = {Jakimi, A. and Sabraoui, A. and Badidi, E. and Idri, A. and El Koutbi, M.} }