@article {Benhar2022791, title = {Univariate and Multivariate Filter Feature Selection for Heart Disease Classification}, journal = {Journal of Information Science and Engineering}, volume = {38}, number = {4}, year = {2022}, note = {cited By 0}, pages = {791-803}, abstract = {Feature selection (FS) is a data preprocessing task that can be applied before the classification phase, and aims at improving the performance and interpretability of classifiers by finding only a few highly informative features. The present study aims at evaluating and comparing the performances of six univariate and two multivariate filter FS techniques for heart disease classification. The FS techniques were evaluated with two white-box and two black-box classification techniques using five heart disease datasets. Furthermore, this study deals with the setting of the hyperparameters{\textquoteright} values of the four classifiers. This study evaluates 600 variants of classifiers. Results show that white-box classification techniques such as K-Nearest Neighbors and Decision Trees can be very competitive with black-box ones when hyperparameters{\textquoteright} optimization and feature selection were applied. {\textcopyright} 2022 Institute of Information Science. All rights reserved.}, keywords = {Black boxes, Cardiology, Classification (of information), Data preprocessing, Decision trees, disease classification, Diseases, feature selection, Features selection, Filter, Heart, heart disease, Nearest neighbor search, Performance, Selection techniques, Univariate, White box}, doi = {10.6688/JISE.202207_38(4).0006}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85137170374\&doi=10.6688\%2fJISE.202207_38\%284\%29.0006\&partnerID=40\&md5=f75bca8ff78e6782d4c11e88d338784f}, author = {Benhar, H. and Hosni, M. and Idri, A.} } @article {ElOuassif202150, title = {Classification techniques in breast cancer diagnosis: A systematic literature review}, journal = {Computer Methods in Biomechanics and Biomedical Engineering: Imaging and Visualization}, volume = {9}, number = {1}, year = {2021}, note = {cited By 12}, pages = {50-77}, abstract = {Data mining (DM) consists in analysing a~set of observations to find unsuspected relationships and then summarising the data in new ways that are both understandable and useful. It has become widely used in various medical fields including breast cancer (BC), which is the most common cancer and the leading cause of death among women~worldwide.~BC diagnosis is a~challenging medical task and many studies have attempted to apply classification techniques to it. The objective of the present study is to identify studies on classification techniques~in~BC diagnosis and to analyse them from~three perspectives: classification techniques used, accuracy of the classifiers, and comparison of performance. We performed a~systematic literature review (SLR) of 176 selected studies published between January~2000 and November~2018. The results show that, of the nine classification techniques investigated, artificial neural networks, support vector machines and decision trees were the most frequently used. Moreover, artificial neural networks, support vector machines and ensemble classifiers performed better than the other techniques, with median accuracy values of 95\%, 95\% and 96\% respectively. Most of the selected studies (57.4\%) used datasets containing different types of images such as mammographic, ultrasound, and microarray images. {\textcopyright} 2021 Informa UK Limited, trading as Taylor \& Francis Group.}, keywords = {Article, Artificial intelligence, artificial neural network, Breast Cancer, Breast cancer diagnosis, cancer diagnosis, cause of death, Causes of death, Classification (of information), Classification technique, Comparison of performance, Computer aided diagnosis, data extraction, Data mining, data synthesis, decision tree, Decision trees, Diseases, human, k nearest neighbor, Machine learning, Medical fields, Medical informatics, Network support, Neural networks, publication, qualitative research, Quality control, support vector machine, Support vector machine classifiers, Support vector machines, Support vectors machine, Systematic literature review, Systematic Review, validity}, doi = {10.1080/21681163.2020.1811159}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85098842973\&doi=10.1080\%2f21681163.2020.1811159\&partnerID=40\&md5=9a48998b1c44d263863efcfb25f9920f}, author = {ElOuassif, B. and Idri, A. and Hosni, M. and Abran, A.} } @conference {Abnane20211, title = {Heterogeneous ensemble imputation for software development effort estimation}, booktitle = {PROMISE 2021 - Proceedings of the 17th International Conference on Predictive Models and Data Analytics in Software Engineering, co-located with ESEC/FSE 2021}, year = {2021}, note = {cited By 1}, pages = {1-10}, abstract = {Choosing the appropriate Missing Data (MD) imputation technique for a given Software development effort estimation (SDEE) technique is not a trivial task. In fact, the impact of the MD imputation on the estimation output depends on the dataset and the SDEE technique used and there is no best imputation technique in all contexts. Thus, an attractive solution is to use more than one single imputation technique and combine their results for a final imputation outcome. This concept is called ensemble imputation and can help to significantly improve the estimation accuracy. This paper develops and evaluates a heterogeneous ensemble imputation whose members were the four single imputation techniques: K-Nearest Neighbors (KNN), Expectation Maximization (EM), Support Vector Regression (SVR), and Decision Trees (DT). The impact of the ensemble imputation was evaluated and compared with those of the four single imputation techniques on the accuracy measured in terms of the standardized accuracy criterion of four SDEE techniques: Case Based Reasoning (CBR), Multi-Layers Perceptron (MLP), Support Vector Regression (SVR) and Reduced Error Pruning Tree (REPTree). The Wilcoxon statistical test was also performed in order to assess whether the results are significant. All the empirical evaluations were carried out over the six datasets, namely, ISBSG, China, COCOMO81, Desharnais, Kemerer, and Miyazaki. Results show that the use of heterogeneous ensemble-based imputation instead single imputation significantly improved the accuracy of the four SDEE techniques. Indeed, the ensemble imputation technique was ranked either first or second in all contexts. {\textcopyright} 2021 ACM.}, keywords = {Case based reasoning, Casebased reasonings (CBR), Decision trees, Empirical evaluations, Expectation Maximization, Forestry, Heterogeneous ensembles, Imputation techniques, K nearest neighbor (KNN), Maximum principle, Multilayer neural networks, Nearest neighbor search, Predictive analytics, Software design, Software development effort, Support vector regression, Support vector regression (SVR)}, doi = {10.1145/3475960.3475984}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85113586361\&doi=10.1145\%2f3475960.3475984\&partnerID=40\&md5=7bd6bbfb801a84cd7694e3713d2d0081}, author = {Abnane, I. and Idri, A. and Hosni, M. and Abran, A.} } @conference {ElOuassif2021352, title = {Homogeneous ensemble based support vector machine in breast cancer diagnosis}, booktitle = {HEALTHINF 2021 - 14th International Conference on Health Informatics; Part of the 14th International Joint Conference on Biomedical Engineering Systems and Technologies, BIOSTEC 2021}, year = {2021}, note = {cited By 5}, pages = {352-360}, abstract = {Breast Cancer (BC) is one of the most common forms of cancer and one of the leading causes of mortality among women. Hence, detecting and accurately diagnosing BC at an early stage remain a major factor for women{\textquoteright}s long-term survival. To this aim, numerous single techniques have been proposed and evaluated for BC classification. However, none of them proved to be suitable in all situations. Currently, ensemble methods have been widely investigated to help diagnosis BC and consists on generating one classification model by combining more than one single technique by means of a combination rule. This paper evaluates homogeneous ensembles whose members are four variants of the Support Vector Machine (SVM) classifier. The four SVM variants used four different kernels: Linear Kernel, Normalized Polynomial Kernel, Radial Basis Function Kernel, and Pearson VII function based Universal Kernel. A Multilayer Perceptron (MLP) classifier is used for combining the outputs of the base classifiers to produce a final decision. Four well-known available BC datasets are used from online repositories. The findings of this study suggest that: (1) ensembles provided a very promising performance compared to its base, and (2) there is no SVM ensemble with a combination of kernels that have better performance in all datasets. Copyright {\textcopyright} 2021 by SCITEPRESS {\textendash} Science and Technology Publications, Lda. All rights reserved}, keywords = {Base classifiers, Biomedical engineering, Breast cancer diagnosis, Classification models, Combination rules, Computer aided diagnosis, Diseases, Medical informatics, Multilayer neural networks, Multilayer Perceptron (MLP) classifier, Online repositories, Polynomial kernels, Radial basis function kernels, Support vector machines}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85103824221\&partnerID=40\&md5=2340dec93a42872a9ece0b6f6b9fccbc}, author = {El Ouassif, B. and Idri, A. and Hosni, M.} } @article {ElOuassif2021263, title = {Investigating Accuracy and Diversity in Heterogeneous Ensembles for Breast Cancer Classification}, journal = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)}, volume = {12950 LNCS}, year = {2021}, note = {cited By 2}, pages = {263-281}, abstract = {Breast Cancer (BC) is one of the most common forms of cancer among women. Detecting and accurately diagnosing breast cancer at an early phase increase the chances of women{\textquoteright}s survival. For this purpose, various single classification techniques have been investigated to diagnosis BC. Nevertheless, none of them proved to be accurate in all circumstances. Recently, a promising approach called ensemble classifiers have been widely used to assist physicians accurately diagnose BC. Ensemble classifiers consist on combining a set of single classifiers by means of an aggregation layer. The literature in general shows that ensemble techniques outperformed single ones when ensemble members are accurate (i.e. have the lowest percentage error) and diverse (i.e. the single classifiers make uncorrelated errors on new~instances). Hence, selecting ensemble members is often a crucial task since it can lead to the opposite: single techniques outperformed their ensemble. This paper evaluates and compares ensemble members{\textquoteright} selection based on accuracy and diversity with ensemble members{\textquoteright} selection based on accuracy only. A comparison with ensembles without member selection was also performed. Ensemble performance was assessed in terms of accuracy, F1-score. Q statistics diversity measure was used to calculate the classifiers diversity. The experiments were carried out on three well-known BC datasets available from online repositories. Seven single classifiers were used in our experiments. Skott Knott test and Borda Count voting system were used to assess the significance of the performance differences and rank ensembles according to theirs performances. The findings of this study suggest that: (1) Investigating both accuracy and diversity to select ensemble members often led to better performance, and (2) In general, selecting ensemble members using accuracy and/or diversity led to better ensemble performance than constructing ensembles without members{\textquoteright} selection. {\textcopyright} 2021, Springer Nature Switzerland AG.}, keywords = {Breast Cancer, Combining classifiers, Computer aided diagnosis, Diseases, Diversity measure, Ensemble members, Ensemble-classifier, Heterogeneous ensembles, Member selection, Performance, Selection based, Voting, Voting machines}, doi = {10.1007/978-3-030-86960-1_19}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85125278232\&doi=10.1007\%2f978-3-030-86960-1_19\&partnerID=40\&md5=58285ac3fe1b29564700bef136a94771}, author = {El Ouassif, B. and Idri, A. and Hosni, M.} } @article {Hosni20212827, title = {A systematic mapping study for ensemble classification methods in cardiovascular disease}, journal = {Artificial Intelligence Review}, volume = {54}, number = {4}, year = {2021}, note = {cited By 5}, pages = {2827-2861}, abstract = {Ensemble methods overcome the limitations of single machine learning techniques by combining different techniques, and are employed in the quest to achieve a high level of accuracy. This approach has been investigated in various fields, one of them being that of bioinformatics. One of the most frequent applications of ensemble techniques involves research into cardiovascular diseases, which are considered the leading cause of death worldwide. The purpose of this research work is to identify the papers that investigate ensemble classification techniques applied to cardiology diseases, and to analyse them according to nine aspects: their publication venues, the medical tasks tackled, the empirical and research types adopted, the types of ensembles proposed, the single techniques used to construct the ensembles, the validation frameworks adopted to evaluate the proposed ensembles, the tools used to build the ensembles, and the optimization methods employed for the single techniques. This paper reports the carrying out of a systematic mapping study. An extensive automatic search in four digital libraries: IEEE Xplore, ACM Digital Library, PubMed, and Scopus, followed by a study selection process, resulted in the identification of 351 papers that were used to address our mapping questions. This study found that the papers selected had been published in a large number of different resources. The medical task addressed most frequently by the selected studies was diagnosis. In addition, the experiment-based empirical type and evaluation-based research type were the most dominant approaches adopted by the selected studies. Homogeneous ensembles were the ensemble type that was developed most often in literature, while decision trees, artificial neural networks and Bayesian classifiers were the single techniques used most frequently to develop ensemble classification methods. The weighted majority and majority voting rules were adopted to obtain the final decision of the ensembles developed. With regard to evaluation frameworks, the datasets obtained from the UCI and PhysioBank repositories were those used most often to evaluate the ensemble methods, while the k-fold cross-validation method was the most frequently-employed validation technique. Several tools with which to build ensemble classifiers were identified, and the type of software adopted with the greatest frequency was open source. Finally, only a few researchers took into account the optimization of the parameter settings of either single or meta ensemble classifiers. This mapping study attempts to provide a greater insight into the application of ensemble classification methods in cardiovascular diseases. The majority of the selected papers reported positive feedback as regards the ability of ensemble methods to perform better than single methods. Further analysis is required to aggregate the evidence reported in literature. {\textcopyright} 2020, Springer Nature B.V.}, keywords = {Bayesian networks, Cardio-vascular disease, Cardiology, Decision trees, Diagnosis, Digital libraries, Diseases, Ensemble classification, Ensemble classifiers, Evaluation framework, K fold cross validations, Learning systems, Majority voting rules, Mapping, Open source software, Open systems, Optimization method, Systematic mapping studies}, doi = {10.1007/s10462-020-09914-6}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85091735819\&doi=10.1007\%2fs10462-020-09914-6\&partnerID=40\&md5=69ea4b02de420c3ec6a85e1f3c7dddaf}, author = {Hosni, M. and Carrillo de Gea, J.M. and Idri, A. and El Bajta, M. and Fern{\'a}ndez Alem{\'a}n, J.L. and Garc{\'\i}a-Mateos, G. and Abnane, I.} } @article {Hosni2021, title = {On the value of filter feature selection techniques in homogeneous ensembles effort estimation}, journal = {Journal of Software: Evolution and Process}, volume = {33}, number = {6}, year = {2021}, note = {cited By 1}, abstract = {Software development effort estimation (SDEE) remains as the principal activity in software project management planning. Over the past four decades, several methods have been proposed to estimate the effort required to develop a software system, including more recently machine learning (ML) techniques. Because ML performance accuracy depends on the features that feed the ML technique, selecting the appropriate features in the preprocessing data step is important. This paper investigates three filter feature selection techniques to check the predictive capability of four single ML techniques: K-nearest neighbor, support vector regression, multilayer perceptron, and decision trees and their homogeneous ensembles over six well-known datasets. Furthermore, the single and ensembles techniques were optimized using the grid search optimization method. The results suggest that the three filter feature selection techniques investigated improve the reasonability and the accuracy performance of the four single techniques. Moreover, the homogeneous ensembles are statistically more accurate than the single techniques. Finally, adopting a random process (i.e., random subspace method) to select the inputs feature for ML technique is not always effective to generate an accurate homogeneous ensemble. {\textcopyright} 2021 John Wiley \& Sons, Ltd.}, keywords = {Decision trees, Effort Estimation, Feature extraction, K-nearest neighbors, Multilayer neural networks, Nearest neighbor search, Object oriented programming, Predictive capabilities, Project management, Random processes, Random subspace method, Selection techniques, Software design, Software development effort, Software project management, Software systems, Support vector regression}, doi = {10.1002/smr.2343}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85103402056\&doi=10.1002\%2fsmr.2343\&partnerID=40\&md5=0f4c04b547f3d628d6db8d65b74912e5}, author = {Hosni, M. and Idri, A. and Abran, A.} } @article {Idri20201239, title = {Assessing the impact of parameters tuning in ensemble based breast Cancer classification}, journal = {Health and Technology}, volume = {10}, number = {5}, year = {2020}, note = {cited By 12}, pages = {1239-1255}, abstract = {Breast cancer is one of the major causes of death among women. Different decision support systems were proposed to assist oncologists to accurately diagnose their patients. These decision support systems mainly used classification techniques to categorize the diagnosis into Malign or Benign tumors. Given that no consensus has been reached on the classifier that can perform best in all circumstances, ensemble-based classification, which classifies patients by combining more than one single classification technique, has recently been investigated. In this paper, heterogeneous ensembles based on three well-known machine learning techniques (support vector machines, multilayer perceptron, and decision trees) were developed and evaluated by investigating the impact of parameter values of the ensemble members on classification performance. In particular, we investigate three parameters tuning techniques: Grid Search (GS), Particle Swarm Optimization (PSO) and the default parameters of the Weka Tool to evaluate whether setting ensemble parameters permits more accurate classification in breast cancer over four datasets obtained from the Machine Learning repository. The heterogeneous ensembles of this study were built using the majority voting technique as a combination rule. The overall results obtained suggest that: (1) Using GS or PSO techniques for single techniques provide more accurate classification; (2) In general, ensembles generate more accurate classification than their single techniques regardless of the optimization techniques used. (3) Heterogeneous ensembles based on optimized single classifiers generate better results than the Uniform Configuration of Weka (UC-WEKA) ensembles, and (4) PSO and GS slightly have the same impact on the performances of ensembles. {\textcopyright} 2020, IUPESM and Springer-Verlag GmbH Germany, part of Springer Nature.}, keywords = {accuracy, Article, Breast Cancer, cancer classification, classifier, decision tree, experimental design, grid search, human, multilayer perceptron, particle swarm optimization, recall, support vector machine}, doi = {10.1007/s12553-020-00453-2}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85087361107\&doi=10.1007\%2fs12553-020-00453-2\&partnerID=40\&md5=7398903f3007d71e535b12c2ef9a90a6}, author = {Idri, A. and Bouchra, E.O. and Hosni, M. and Abnane, I.} } @conference {Benhar2020391, title = {Impact of threshold values for filter-based univariate feature selection in heart disease classification}, booktitle = {HEALTHINF 2020 - 13th International Conference on Health Informatics, Proceedings; Part of 13th International Joint Conference on Biomedical Engineering Systems and Technologies, BIOSTEC 2020}, year = {2020}, note = {cited By 2}, pages = {391-398}, abstract = {In the last decade, feature selection (FS), was one of the most investigated preprocessing tasks for heart disease prediction. Determining the optimal features which contribute more towards the diagnosis of heart disease can reduce the number of clinical tests needed to be taken by a patient, decrease the model cost, reduce the storage requirements and improve the comprehensibility of the induced model. In this study a comparison of three filter feature ranking methods was carried out. Feature ranking methods need to set a threshold (i.e. the percentage of the number of relevant features to be selected) in order to select the final subset of features. Thus, the aim of this study is to investigate if there is a threshold value which is an optimal choice for three different feature ranking methods and four classifiers used for heart disease classification in four heart disease datasets. The used feature ranking methods and selection thresholds resulted in optimal classification performance for one or more classifiers over small and large heart disease datasets. The size of the dataset takes an important role in the choice of the selection threshold. {\textcopyright} 2020 by SCITEPRESS - Science and Technology Publications, Lda. All rights reserved.}, keywords = {Biomedical engineering, Cardiology, Classification (of information), Clinical tests, Diagnosis, Diseases, Feature extraction, Feature ranking, Heart, heart disease, Large dataset, Medical informatics, Optimal choice, Optimal classification, Relevant features, Storage requirements, Threshold-value}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85083712586\&partnerID=40\&md5=4656d8b952f7c60387d4495c737c5a6d}, author = {Benhar, H. and Idri, A. and Hosni, M.} } @article {Hosni20202177, title = {A mapping study of ensemble classification methods in lung cancer decision support systems}, journal = {Medical and Biological Engineering and Computing}, volume = {58}, number = {10}, year = {2020}, note = {cited By 8}, pages = {2177-2193}, abstract = {Achieving a high level of classification accuracy in medical datasets is a capital need for researchers to provide effective decision systems to assist doctors in work. In many domains of artificial intelligence, ensemble classification methods are able to improve the performance of single classifiers. This paper reports the state of the art of ensemble classification methods in lung cancer detection. We have performed a systematic mapping study to identify the most interesting papers concerning this topic. A total of 65 papers published between 2000 and 2018 were selected after an automatic search in four digital libraries and a careful selection process. As a result, it was observed that diagnosis was the task most commonly studied; homogeneous ensembles and decision trees were the most frequently adopted for constructing ensembles; and the majority voting rule was the predominant combination rule. Few studies considered the parameter tuning of the techniques used. These findings open several perspectives for researchers to enhance lung cancer research by addressing the identified gaps, such as investigating different classification methods, proposing other heterogeneous ensemble methods, and using new combination rules. [Figure not available: see fulltext.] {\textcopyright} 2020, International Federation for Medical and Biological Engineering.}, keywords = {Artificial intelligence, Automatic searches, Biological organs, cancer classification, Classification (of information), Classification accuracy, Classification methods, classifier, decision support system, Decision support systems, decision tree, Decision trees, Diagnosis, Digital libraries, Diseases, Ensemble classification, Heterogeneous ensembles, human, lung cancer, Lung cancer detections, Majority voting rules, Mapping, priority journal, Review, Systematic mapping studies}, doi = {10.1007/s11517-020-02223-8}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85087501825\&doi=10.1007\%2fs11517-020-02223-8\&partnerID=40\&md5=bdb2bd3f923da33ea3b564a0bd6e739e}, author = {Hosni, M. and Garc{\'\i}a-Mateos, G. and Carrillo-De-Gea, J.M. and Idri, A. and Fernandez-Aleman, J.L.} } @article {Hosni2019, title = {Evaluating filter fuzzy analogy homogenous ensembles for software development effort estimation}, journal = {Journal of Software: Evolution and Process}, volume = {31}, number = {2}, year = {2019}, note = {cited By 0}, doi = {10.1002/smr.2117}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85061585023\&doi=10.1002\%2fsmr.2117\&partnerID=40\&md5=72983b70d540ccb14da59885e96cc4ab}, author = {Hosni, M. and Idri, A. and Abran, A.} } @article {Hosni20185977, title = {On the value of parameter tuning in heterogeneous ensembles effort estimation}, journal = {Soft Computing}, volume = {22}, number = {18}, year = {2018}, pages = {5977-6010}, doi = {10.1007/s00500-017-2945-4}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85035807309\&doi=10.1007\%2fs00500-017-2945-4\&partnerID=40\&md5=83d094f56b80a21295cf4230621f445d}, author = {Hosni, M. and Idri, A. and Abran, A. and Nassif, A.B.} } @conference {Hosni2017174, title = {Heterogeneous Ensembles for Software Development Effort Estimation}, booktitle = {Proceedings - 2016 3rd International Conference on Soft Computing and Machine Intelligence, ISCMI 2016}, year = {2017}, note = {cited By 0}, pages = {174-178}, abstract = {Software effort estimation influences almost all the process of software development such as: bidding, planning, and budgeting. Hence, delivering an accurate estimation in early stages of the software life cycle may be the key of success of any project. To this aim, many solo techniques have been proposed to predict the effort required to develop a software system. Nevertheless, none of them proved to be suitable in all circumstances. Recently, Ensemble Effort Estimation has been investigated to estimate software effort and consists on generating the software effort by combining more than one solo estimation technique by means of a combination rule. In this study, a heterogeneous EEE based on four machine learning techniques was investigated using three linear rules and two well-known datasets. The results of this study suggest that the proposed heterogeneous EEE yields a very promising performance and there is no best combiner rule that can be recommended. {\textcopyright} 2016 IEEE.}, doi = {10.1109/ISCMI.2016.15}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85034656098\&doi=10.1109\%2fISCMI.2016.15\&partnerID=40\&md5=86bd61c4459af4a3491046df1925ab2c}, author = {Hosni, M. and Idri, A. and Nassif, A.B. and Abran, A.} } @conference {Hosni20171251, title = {Software effort estimation using classical analogy ensembles based on random subspace}, booktitle = {Proceedings of the ACM Symposium on Applied Computing}, volume = {Part F128005}, year = {2017}, note = {cited By 1}, pages = {1251-1258}, abstract = {Software effort estimation is one of the important and complex tasks in software project management. It influences almost all the process of software development such as: bidding, planning, and budgeting. Hence, estimating the software project effort in early stages of the software life cycle is considered the key of success of any project. To this goal, many techniques have been proposed to predict the effort required to develop a software system. Unfortunately, there is no consensus about the single best technique. Recently, Ensemble Effort Estimation has been investigated to estimate software effort and consists on generating the software effort by combining more than one solo estimation technique by means of a combination rule. In this paper, we have developed different homogeneous ensembles based on combination of Random Subspace method and Classical Analogy technique using two linear rules over seven datasets. The results confirm that the Random Space ensembles outperform the solo Classical Analogy regardless of the dataset used and that the median rule generates better estimation than the average one. {\textcopyright} 2017 ACM.}, doi = {10.1145/3019612.3019784}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85020917571\&doi=10.1145\%2f3019612.3019784\&partnerID=40\&md5=8b1b6f9ac3cb26187929f17a78dd5669}, author = {Hosni, M. and Idri, A.} } @article {Hosni20171, title = {On the value of parameter tuning in heterogeneous ensembles effort estimation}, journal = {Soft Computing}, year = {2017}, note = {cited By 0; Article in Press}, pages = {1-34}, abstract = {Accurate estimation of software development effort estimation (SDEE) is fundamental for efficient management of software development projects as it assists software managers to efficiently manage their human resources. Over the last four decades, while software engineering researchers have used several effort estimation techniques, including those based on statistical and machine learning methods, no consensus has been reached on the technique that can perform best in all circumstances. To tackle this challenge, Ensemble Effort Estimation, which predicts software development effort by combining more than one solo estimation technique, has recently been investigated. In this paper, heterogeneous ensembles based on four well-known machine learning techniques (K-nearest neighbor, support vector regression, multilayer perceptron and decision trees) were developed and evaluated by investigating the impact of parameter values of the ensemble members on estimation accuracy. In particular, this paper evaluates whether setting ensemble parameters using two optimization techniques (e.g., grid search optimization and particle swarm) permits more accurate estimates of SDEE. The heterogeneous ensembles of this study were built using three combination rules (mean, median and inverse ranked weighted mean) over seven datasets. The results obtained suggest that: (1) Optimized single techniques using grid search or particle swarm optimization provide more accurate estimation; (2) in general ensembles achieve higher accuracy than their single techniques whatever the optimization technique used, even though ensembles do not dominate over all single techniques; (3) heterogeneous ensembles based on optimized single techniques provide more accurate estimation; and (4) generally, particle swarm optimization and grid search techniques generate ensembles with the same predictive capability. {\textcopyright} 2017 Springer-Verlag GmbH Germany, part of Springer Nature}, doi = {10.1007/s00500-017-2945-4}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85035807309\&doi=10.1007\%2fs00500-017-2945-4\&partnerID=40\&md5=83d094f56b80a21295cf4230621f445d}, author = {Hosni, M. and Idri, A. and Abran, A. and Nassif, A.B.} }