@article {Hosni2021, title = {On the value of filter feature selection techniques in homogeneous ensembles effort estimation}, journal = {Journal of Software: Evolution and Process}, volume = {33}, number = {6}, year = {2021}, note = {cited By 1}, abstract = {Software development effort estimation (SDEE) remains as the principal activity in software project management planning. Over the past four decades, several methods have been proposed to estimate the effort required to develop a software system, including more recently machine learning (ML) techniques. Because ML performance accuracy depends on the features that feed the ML technique, selecting the appropriate features in the preprocessing data step is important. This paper investigates three filter feature selection techniques to check the predictive capability of four single ML techniques: K-nearest neighbor, support vector regression, multilayer perceptron, and decision trees and their homogeneous ensembles over six well-known datasets. Furthermore, the single and ensembles techniques were optimized using the grid search optimization method. The results suggest that the three filter feature selection techniques investigated improve the reasonability and the accuracy performance of the four single techniques. Moreover, the homogeneous ensembles are statistically more accurate than the single techniques. Finally, adopting a random process (i.e., random subspace method) to select the inputs feature for ML technique is not always effective to generate an accurate homogeneous ensemble. {\textcopyright} 2021 John Wiley \& Sons, Ltd.}, keywords = {Decision trees, Effort Estimation, Feature extraction, K-nearest neighbors, Multilayer neural networks, Nearest neighbor search, Object oriented programming, Predictive capabilities, Project management, Random processes, Random subspace method, Selection techniques, Software design, Software development effort, Software project management, Software systems, Support vector regression}, doi = {10.1002/smr.2343}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85103402056\&doi=10.1002\%2fsmr.2343\&partnerID=40\&md5=0f4c04b547f3d628d6db8d65b74912e5}, author = {Hosni, M. and Idri, A. and Abran, A.} } @article {Hosni20185977, title = {On the value of parameter tuning in heterogeneous ensembles effort estimation}, journal = {Soft Computing}, volume = {22}, number = {18}, year = {2018}, pages = {5977-6010}, doi = {10.1007/s00500-017-2945-4}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85035807309\&doi=10.1007\%2fs00500-017-2945-4\&partnerID=40\&md5=83d094f56b80a21295cf4230621f445d}, author = {Hosni, M. and Idri, A. and Abran, A. and Nassif, A.B.} } @article {Hosni20171, title = {On the value of parameter tuning in heterogeneous ensembles effort estimation}, journal = {Soft Computing}, year = {2017}, note = {cited By 0; Article in Press}, pages = {1-34}, abstract = {Accurate estimation of software development effort estimation (SDEE) is fundamental for efficient management of software development projects as it assists software managers to efficiently manage their human resources. Over the last four decades, while software engineering researchers have used several effort estimation techniques, including those based on statistical and machine learning methods, no consensus has been reached on the technique that can perform best in all circumstances. To tackle this challenge, Ensemble Effort Estimation, which predicts software development effort by combining more than one solo estimation technique, has recently been investigated. In this paper, heterogeneous ensembles based on four well-known machine learning techniques (K-nearest neighbor, support vector regression, multilayer perceptron and decision trees) were developed and evaluated by investigating the impact of parameter values of the ensemble members on estimation accuracy. In particular, this paper evaluates whether setting ensemble parameters using two optimization techniques (e.g., grid search optimization and particle swarm) permits more accurate estimates of SDEE. The heterogeneous ensembles of this study were built using three combination rules (mean, median and inverse ranked weighted mean) over seven datasets. The results obtained suggest that: (1) Optimized single techniques using grid search or particle swarm optimization provide more accurate estimation; (2) in general ensembles achieve higher accuracy than their single techniques whatever the optimization technique used, even though ensembles do not dominate over all single techniques; (3) heterogeneous ensembles based on optimized single techniques provide more accurate estimation; and (4) generally, particle swarm optimization and grid search techniques generate ensembles with the same predictive capability. {\textcopyright} 2017 Springer-Verlag GmbH Germany, part of Springer Nature}, doi = {10.1007/s00500-017-2945-4}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85035807309\&doi=10.1007\%2fs00500-017-2945-4\&partnerID=40\&md5=83d094f56b80a21295cf4230621f445d}, author = {Hosni, M. and Idri, A. and Abran, A. and Nassif, A.B.} }