@conference {Abnane20211, title = {Heterogeneous ensemble imputation for software development effort estimation}, booktitle = {PROMISE 2021 - Proceedings of the 17th International Conference on Predictive Models and Data Analytics in Software Engineering, co-located with ESEC/FSE 2021}, year = {2021}, note = {cited By 1}, pages = {1-10}, abstract = {Choosing the appropriate Missing Data (MD) imputation technique for a given Software development effort estimation (SDEE) technique is not a trivial task. In fact, the impact of the MD imputation on the estimation output depends on the dataset and the SDEE technique used and there is no best imputation technique in all contexts. Thus, an attractive solution is to use more than one single imputation technique and combine their results for a final imputation outcome. This concept is called ensemble imputation and can help to significantly improve the estimation accuracy. This paper develops and evaluates a heterogeneous ensemble imputation whose members were the four single imputation techniques: K-Nearest Neighbors (KNN), Expectation Maximization (EM), Support Vector Regression (SVR), and Decision Trees (DT). The impact of the ensemble imputation was evaluated and compared with those of the four single imputation techniques on the accuracy measured in terms of the standardized accuracy criterion of four SDEE techniques: Case Based Reasoning (CBR), Multi-Layers Perceptron (MLP), Support Vector Regression (SVR) and Reduced Error Pruning Tree (REPTree). The Wilcoxon statistical test was also performed in order to assess whether the results are significant. All the empirical evaluations were carried out over the six datasets, namely, ISBSG, China, COCOMO81, Desharnais, Kemerer, and Miyazaki. Results show that the use of heterogeneous ensemble-based imputation instead single imputation significantly improved the accuracy of the four SDEE techniques. Indeed, the ensemble imputation technique was ranked either first or second in all contexts. {\textcopyright} 2021 ACM.}, keywords = {Case based reasoning, Casebased reasonings (CBR), Decision trees, Empirical evaluations, Expectation Maximization, Forestry, Heterogeneous ensembles, Imputation techniques, K nearest neighbor (KNN), Maximum principle, Multilayer neural networks, Nearest neighbor search, Predictive analytics, Software design, Software development effort, Support vector regression, Support vector regression (SVR)}, doi = {10.1145/3475960.3475984}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85113586361\&doi=10.1145\%2f3475960.3475984\&partnerID=40\&md5=7bd6bbfb801a84cd7694e3713d2d0081}, author = {Abnane, I. and Idri, A. and Hosni, M. and Abran, A.} } @conference {ElOuassif2021352, title = {Homogeneous ensemble based support vector machine in breast cancer diagnosis}, booktitle = {HEALTHINF 2021 - 14th International Conference on Health Informatics; Part of the 14th International Joint Conference on Biomedical Engineering Systems and Technologies, BIOSTEC 2021}, year = {2021}, note = {cited By 5}, pages = {352-360}, abstract = {Breast Cancer (BC) is one of the most common forms of cancer and one of the leading causes of mortality among women. Hence, detecting and accurately diagnosing BC at an early stage remain a major factor for women{\textquoteright}s long-term survival. To this aim, numerous single techniques have been proposed and evaluated for BC classification. However, none of them proved to be suitable in all situations. Currently, ensemble methods have been widely investigated to help diagnosis BC and consists on generating one classification model by combining more than one single technique by means of a combination rule. This paper evaluates homogeneous ensembles whose members are four variants of the Support Vector Machine (SVM) classifier. The four SVM variants used four different kernels: Linear Kernel, Normalized Polynomial Kernel, Radial Basis Function Kernel, and Pearson VII function based Universal Kernel. A Multilayer Perceptron (MLP) classifier is used for combining the outputs of the base classifiers to produce a final decision. Four well-known available BC datasets are used from online repositories. The findings of this study suggest that: (1) ensembles provided a very promising performance compared to its base, and (2) there is no SVM ensemble with a combination of kernels that have better performance in all datasets. Copyright {\textcopyright} 2021 by SCITEPRESS {\textendash} Science and Technology Publications, Lda. All rights reserved}, keywords = {Base classifiers, Biomedical engineering, Breast cancer diagnosis, Classification models, Combination rules, Computer aided diagnosis, Diseases, Medical informatics, Multilayer neural networks, Multilayer Perceptron (MLP) classifier, Online repositories, Polynomial kernels, Radial basis function kernels, Support vector machines}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85103824221\&partnerID=40\&md5=2340dec93a42872a9ece0b6f6b9fccbc}, author = {El Ouassif, B. and Idri, A. and Hosni, M.} } @conference {Hosni2017174, title = {Heterogeneous Ensembles for Software Development Effort Estimation}, booktitle = {Proceedings - 2016 3rd International Conference on Soft Computing and Machine Intelligence, ISCMI 2016}, year = {2017}, note = {cited By 0}, pages = {174-178}, abstract = {Software effort estimation influences almost all the process of software development such as: bidding, planning, and budgeting. Hence, delivering an accurate estimation in early stages of the software life cycle may be the key of success of any project. To this aim, many solo techniques have been proposed to predict the effort required to develop a software system. Nevertheless, none of them proved to be suitable in all circumstances. Recently, Ensemble Effort Estimation has been investigated to estimate software effort and consists on generating the software effort by combining more than one solo estimation technique by means of a combination rule. In this study, a heterogeneous EEE based on four machine learning techniques was investigated using three linear rules and two well-known datasets. The results of this study suggest that the proposed heterogeneous EEE yields a very promising performance and there is no best combiner rule that can be recommended. {\textcopyright} 2016 IEEE.}, doi = {10.1109/ISCMI.2016.15}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85034656098\&doi=10.1109\%2fISCMI.2016.15\&partnerID=40\&md5=86bd61c4459af4a3491046df1925ab2c}, author = {Hosni, M. and Idri, A. and Nassif, A.B. and Abran, A.} }