{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,16]],"date-time":"2026-01-16T03:45:08Z","timestamp":1768535108154,"version":"3.49.0"},"reference-count":54,"publisher":"Springer Science and Business Media LLC","issue":"1-3","license":[{"start":{"date-parts":[[2014,7,4]],"date-time":"2014-07-04T00:00:00Z","timestamp":1404432000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2015,10]]},"DOI":"10.1007\/s10994-014-5455-y","type":"journal-article","created":{"date-parts":[[2014,7,3]],"date-time":"2014-07-03T20:10:01Z","timestamp":1404418201000},"page":"163-186","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":58,"title":["Feature selection in machine learning: an exact penalty approach using a Difference of Convex function Algorithm"],"prefix":"10.1007","volume":"101","author":[{"given":"Hoai An","family":"Le Thi","sequence":"first","affiliation":[]},{"given":"Hoai Minh","family":"Le","sequence":"additional","affiliation":[]},{"given":"Tao","family":"Pham Dinh","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2014,7,4]]},"reference":[{"key":"5455_CR1","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1016\/S0304-3975(97)00115-1","volume":"209","author":"E Amaldi","year":"1998","unstructured":"Amaldi, E., & Kann, V. (1998). On the approximability of minimizing non zero variables or unsatisfied relations in linear systems. Theoretical Computer Science, 209, 237\u2013260.","journal-title":"Theoretical Computer Science"},{"key":"5455_CR2","doi-asserted-by":"crossref","unstructured":"Bach, F., Jenatton, R., Mairal, J., & Obzinski, G. (2012). Optimization with sparsity-inducing penalties foundations and trends. Foundations and Trends in Machine Learning, 4(1), 1\u2013106.","DOI":"10.1561\/2200000015"},{"key":"5455_CR3","unstructured":"Bradley, P. S., & Mangasarian, O. L. (1998). Feature selection via concave minimization and support vector machines. In Proceeding of international conference on machine learning ICML\u201998."},{"key":"5455_CR4","first-page":"877","volume":"14","author":"E Candes","year":"2008","unstructured":"Candes, E., Wakin, M., & Boyd, S. (2008). Enhancing sparsity by reweighted $$l_{1}$$ l 1 minimization. Journal of Mathematical Analysis and Applications, 14, 877\u2013905.","journal-title":"Journal of Mathematical Analysis and Applications"},{"key":"5455_CR5","first-page":"3869","volume":"2008","author":"R Chartrand","year":"2008","unstructured":"Chartrand, R., & Yin, W. (2008). Iteratively reweighted algorithms for compressive sensing. Acoustics, speech and signal processing, IEEE international conference ICASSP, 2008, 3869\u20133872.","journal-title":"Acoustics, speech and signal processing, IEEE international conference ICASSP"},{"issue":"5","key":"5455_CR6","doi-asserted-by":"crossref","first-page":"2832","DOI":"10.1137\/090761471","volume":"32","author":"X Chen","year":"2010","unstructured":"Chen, X., Xu, F. M., & Ye, Y. (2010). Lower bound theory of nonzero entries in solutions of l2-lp minimization. SIAM Journal on Scientific Computing, 32(5), 2832\u20132852.","journal-title":"SIAM Journal on Scientific Computing"},{"key":"5455_CR7","doi-asserted-by":"crossref","unstructured":"Chen, Y., Li, Y., Cheng, X.-Q., & Guo, L. (2006). Survey and taxonomy of feature selection algorithms in intrusion detection system. In Proceedings of inscrypt, 2006. LNCS (Vol. 4318, 153\u2013167).","DOI":"10.1007\/11937807_13"},{"key":"5455_CR8","unstructured":"Collober, R., Sinz F., Weston, J., & Bottou, L. (2006). Trading convexity for scalability. In Proceedings of the 23rd international conference on machine learning ICML 2006 (pp. 201\u2013208). Pittsburgh, PA. ISBN:1-59593-383-2."},{"key":"5455_CR9","volume-title":"Introduction to support vector machines","author":"N Cristianini","year":"2000","unstructured":"Cristianini, N., & Shawe-Taylor, N. (2000). Introduction to support vector machines. Cambridge: Cambridge University Press."},{"key":"5455_CR10","first-page":"1","volume":"39","author":"AP Dempster","year":"1997","unstructured":"Dempster, A. P., Laird, N. M., & Rubin, D. B. (1997). Maximum likelihood from incomplete data via the EM algorithm. Journal of the Royal Statistical Society: Series B, 39, 1\u201338.","journal-title":"Journal of the Royal Statistical Society: Series B"},{"issue":"456","key":"5455_CR11","doi-asserted-by":"crossref","first-page":"1348","DOI":"10.1198\/016214501753382273","volume":"96","author":"J Fan","year":"2001","unstructured":"Fan, J., & Li, R. (2001). Variable selection via nonconcave penalized likelihood and its oracle properties. Journal of the American Statistical Association, 96(456), 1348\u20131360.","journal-title":"Journal of the American Statistical Association"},{"key":"5455_CR12","doi-asserted-by":"crossref","first-page":"397","DOI":"10.1080\/10618600.1998.10474784","volume":"7","author":"WJ Fu","year":"1998","unstructured":"Fu, W. J. (1998). Penalized regression: The bridge versus the lasso. Journal of Computational and Graphical Statistics, 7, 397\u2013416.","journal-title":"Journal of Computational and Graphical Statistics"},{"key":"5455_CR13","doi-asserted-by":"crossref","first-page":"4686","DOI":"10.1109\/TSP.2009.2026004","volume":"57","author":"G Gasso","year":"2009","unstructured":"Gasso, G., Rakotomamonjy, A., & Canu, S. (2009). Recovering sparse signals with a certain family of nonconvex penalties and dc programming. IEEE Transactions on Signal Processing, 57, 4686\u20134698.","journal-title":"IEEE Transactions on Signal Processing"},{"key":"5455_CR14","doi-asserted-by":"crossref","first-page":"600","DOI":"10.1109\/78.558475","volume":"45","author":"IF Gorodnitsky","year":"1997","unstructured":"Gorodnitsky, I. F., & Rao, B. D. (1997). Sparse signal reconstructions from limited data using FOCUSS: A re-weighted minimum norm algorithm. IEEE Transactions on Signal Processing, 45, 600\u2013616.","journal-title":"IEEE Transactions on Signal Processing"},{"key":"5455_CR15","doi-asserted-by":"crossref","first-page":"136","DOI":"10.1016\/j.csda.2013.01.020","volume":"67","author":"W Guan","year":"2013","unstructured":"Guan, W., & Gray, A. (2013). Sparse high-dimensional fractional-norm support vector machine via DC programming. Computational Statistics and Data Analysis, 67, 136\u2013148.","journal-title":"Computational Statistics and Data Analysis"},{"key":"5455_CR16","doi-asserted-by":"crossref","first-page":"3320","DOI":"10.1109\/TIT.2003.820031","volume":"49","author":"R Gribonval","year":"2003","unstructured":"Gribonval, R., & Nielsen, M. (2003). Sparse representation in union of bases. IEEE Transactions on Information Theory, 49, 3320\u20133325.","journal-title":"IEEE Transactions on Information Theory"},{"key":"5455_CR17","doi-asserted-by":"crossref","unstructured":"Hastie, T., Tibshirani, R., & Friedman, J. (2009). The elements of statistical learning (2nd ed.). Heidelberg: Springer.","DOI":"10.1007\/978-0-387-84858-7"},{"key":"5455_CR18","doi-asserted-by":"crossref","unstructured":"Huang, J., Horowitz, J., & Ma, S. (2008). Asymptotic properties of bridge estimators in sparse high-dimensional regression models. Annals of Statistics, 36, 587\u2013613.","DOI":"10.1214\/009053607000000875"},{"issue":"484","key":"5455_CR19","doi-asserted-by":"crossref","first-page":"1665","DOI":"10.1198\/016214508000001066","volume":"103","author":"Y Kim","year":"2008","unstructured":"Kim, Y., Choi, H., & Oh, H. S. (2008). Smoothly clipped absolute deviation on high dimensions. Journal of the American Statistical Association, 103(484), 1665\u20131673.","journal-title":"Journal of the American Statistical Association"},{"key":"5455_CR20","doi-asserted-by":"crossref","first-page":"1356","DOI":"10.1214\/aos\/1015957397","volume":"28","author":"K Knight","year":"2000","unstructured":"Knight, K., & Fu, W. (2000). Asymptotics for lasso-type estimators. Annals of Statistics, 28, 1356\u20131378.","journal-title":"Annals of Statistics"},{"key":"5455_CR21","doi-asserted-by":"crossref","unstructured":"Krause, N., & Singer, Y. (2004). Leveraging the margin more carefully. In Proceedings of the 21 international conference on Machine learning ICML 2004. Banff, Alberta, Canada, 63.ISBN:1-58113-828-5.","DOI":"10.1145\/1015330.1015344"},{"key":"5455_CR22","unstructured":"Le Thi, H.A. DC Programming and DCA. http:\/\/lita.sciences.univ-metz.fr\/~lethi ."},{"key":"5455_CR23","unstructured":"Le Thi, H. A. (1997). Contribution \u00e0 l\u2019optimisation non convexe et l\u2019optimisation globale: Th\u00e9orie. Algorithmes et Applications: Habilitation \u00e0 Diriger des Recherches, Universit\u00e9 de Rouen."},{"key":"5455_CR24","doi-asserted-by":"crossref","unstructured":"Le Thi, H. A., & Pham Dinh, T. (1997). Solving a class of linearly constrained indefinite quadratic problems by DC algorithms. Journal of Global Optimization, 11(3), 253\u2013285.","DOI":"10.1023\/A:1008288411710"},{"key":"5455_CR25","doi-asserted-by":"crossref","unstructured":"Le Thi, H. A., & Pham Dinh, T. (2005). The DC (difference of convex functions) programming and DCA revisited with DC models of real-world nonconvex optimization problems. Annals of Operations Research, 133, 23\u201346.","DOI":"10.1007\/s10479-004-5022-1"},{"key":"5455_CR26","doi-asserted-by":"crossref","unstructured":"Le Thi, H. A., Belghiti, T., Pham Dinh, T. (2007) A new efficient algorithm based on DC programming and DCA for clustering. Journal of Global Optimization, 37, 593\u2013608.","DOI":"10.1007\/s10898-006-9066-4"},{"key":"5455_CR27","unstructured":"Le Thi, H. A., Le, H. M. & Pham Dinh, T. (2006). Optimization based DC programming and DCA for hierarchical clustering. European Journal of Operational Research, 183(3), 1067\u20131085."},{"key":"5455_CR28","doi-asserted-by":"crossref","unstructured":"Le Thi, H. A., Le, H. M., Nguyen, V. V., & Pham Dinh, T. (2008). A dc programming approach for feature selection in support vector machines learning. Journal of Advances in Data Analysis and Classification, 2, 259\u2013278.","DOI":"10.1007\/s11634-008-0030-7"},{"key":"5455_CR29","unstructured":"Le Thi, H. A., Nguyen, V. V., & Ouchani, S. (2009). Gene selection for cancer classification using DCA. Journal of Fonctiers of Computer Science and Technology, 3(6), 62\u201372."},{"key":"5455_CR30","doi-asserted-by":"crossref","unstructured":"Le Thi, H. A., Huynh, V. N., & Pham Dinh, T. (2012). Exact penalty and error bounds in DC programming. Journal of Global Optimization dedicated to Reiner Horst, 52(3), 509\u2013535.","DOI":"10.1007\/s10898-011-9765-3"},{"key":"5455_CR31","doi-asserted-by":"crossref","first-page":"219","DOI":"10.1198\/106186005X37238","volume":"14","author":"Y Liu","year":"2005","unstructured":"Liu, Y., Shen, X., & Doss, H. (2005). Multicategory $$\\psi $$ \u03c8 -learning and support vector machine: Computational tools. Journal of Computational and Graphical Statistics, 14, 219\u2013236.","journal-title":"Journal of Computational and Graphical Statistics"},{"key":"5455_CR32","doi-asserted-by":"crossref","first-page":"500","DOI":"10.1198\/016214505000000781","volume":"101","author":"Y Liu","year":"2006","unstructured":"Liu, Y., & Shen, X. (2006). Multicategory $$\\psi $$ \u03c8 -learning. Journal of the American Statistical Association, 101, 500\u2013509.","journal-title":"Journal of the American Statistical Association"},{"key":"5455_CR33","doi-asserted-by":"crossref","unstructured":"Mangasarian, O. L. (1996). Machine learning via polyhedral concave minimization. In H. Fischer, B. Riedmueller, & S. Schaeffler (Eds.), Applied mathematics and parallel computing\u2014Festschrift for Klaus Ritter (pp. 175\u2013188). Heidelberg: Physica.","DOI":"10.1007\/978-3-642-99789-1_13"},{"issue":"12","key":"5455_CR34","doi-asserted-by":"crossref","first-page":"3397","DOI":"10.1109\/78.258082","volume":"41","author":"S Mallat","year":"1993","unstructured":"Mallat, S., & Zhang, Z. (1993). Matching pursuit in a time-frequency dictionary. IEEE Transactions on Signal Processing, 41(12), 3397\u20133415.","journal-title":"IEEE Transactions on Signal Processing"},{"issue":"1","key":"5455_CR35","doi-asserted-by":"crossref","first-page":"374","DOI":"10.1016\/j.csda.2006.12.019","volume":"52","author":"N Meinshausen","year":"2007","unstructured":"Meinshausen, N. (2007). Relaxed Lasso. Computational Statistics and Data Analysis, 52(1), 374\u2013393.","journal-title":"Computational Statistics and Data Analysis"},{"key":"5455_CR36","doi-asserted-by":"crossref","first-page":"227","DOI":"10.1137\/S0097539792240406","volume":"24","author":"BK Natarajan","year":"1995","unstructured":"Natarajan, B. K. (1995). Sparse approximate solutions to linear systems. SIAM Journal on Computing, 24, 227\u2013234.","journal-title":"SIAM Journal on Computing"},{"issue":"1\u20133","key":"5455_CR37","doi-asserted-by":"crossref","first-page":"129","DOI":"10.1007\/s10994-005-1505-9","volume":"61","author":"J Neumann","year":"2005","unstructured":"Neumann, J., Schn\u00f6rr, C., & Steidl, G. (2005). Combined SVM-based feature selection and classification. Machine Learning, 61(1\u20133), 129\u2013150.","journal-title":"Machine Learning"},{"key":"5455_CR38","doi-asserted-by":"crossref","unstructured":"Ong, C. S., & Le Thi, H. A. (2013). Learning sparse classifiers with Difference of Convex functions algorithms. Optimization Methods and Software, 28(4), 830\u2013854.","DOI":"10.1080\/10556788.2011.652630"},{"issue":"2","key":"5455_CR39","doi-asserted-by":"crossref","first-page":"375","DOI":"10.1016\/j.sigpro.2007.08.015","volume":"8","author":"D Peleg","year":"2008","unstructured":"Peleg, D., & Meir, R. (2008). A bilinear formulation for vector sparsity optimization. Signal Processing, 8(2), 375\u2013389.","journal-title":"Signal Processing"},{"key":"5455_CR40","doi-asserted-by":"crossref","unstructured":"Pham Dinh, T., & Le Thi, H. A. (1998). DC optimization algorithms for solving the trust region subproblem. SIAM Journal on Optimization, 8, 476\u2013505.","DOI":"10.1137\/S1052623494274313"},{"key":"5455_CR41","unstructured":"Pham Dinh, T., & Le Thi, H. A (2014). Recent advances in DC programming and DCA. Transactions on Computational Collective. Intelligence., 8342, 1\u201337."},{"issue":"8","key":"5455_CR42","doi-asserted-by":"crossref","first-page":"13071320","DOI":"10.1109\/TNN.2011.2157521","volume":"22","author":"A Rakotomamonjy","year":"2011","unstructured":"Rakotomamonjy, A., Flamary, R., Gasso, G., & Canu, S. (2011). $$\\ell _p-\\ell _q$$ \u2113 p - \u2113 q penalty for sparse linear and sparse multiple kernel multi-task learning. IEEE Transactions on Neural Networks, 22(8), 13071320.","journal-title":"IEEE Transactions on Neural Networks"},{"key":"5455_CR43","doi-asserted-by":"crossref","first-page":"187","DOI":"10.1109\/78.738251","volume":"47","author":"BD Rao","year":"1999","unstructured":"Rao, B. D., & Kreutz-Delgado, K. (1999). An affine scaling methodology for best basis selection. IEEE Transactions on Signal Processing, 47, 187\u2013200.","journal-title":"IEEE Transactions on Signal Processing"},{"issue":"3","key":"5455_CR44","doi-asserted-by":"crossref","first-page":"760","DOI":"10.1109\/TSP.2002.808076","volume":"51","author":"BD Rao","year":"2003","unstructured":"Rao, B. D., Engan, K., Cotter, S. F., Palmer, J., & KreutzDelgado, K. (2003). Subset selection in noise based on diversity measure minimization. IEEE Transactions on Signal Processing, 51(3), 760\u2013770.","journal-title":"IEEE Transactions on Signal Processing"},{"key":"5455_CR45","unstructured":"Rinaldi, F. (2000). Mathematical Programming Methods for minimizing the zero-norm over polyhedral sets, PhD thesis, Sapienza, University of Rome (2009)"},{"key":"5455_CR46","unstructured":"Thiao, M., Pham Dinh, T., & Le Thi, H. A. (2010). A DC programming approach for sparse eigenvalue problem. Proceeding of ICML, 2010, 1063\u20131070."},{"key":"5455_CR47","first-page":"431","volume":"46","author":"R Tibshirani","year":"1996","unstructured":"Tibshirani, R. (1996). Regression shrinkage and selection via the lasso. Journal of the Royal Statistical Society, 46, 431\u2013439.","journal-title":"Journal of the Royal Statistical Society"},{"issue":"4","key":"5455_CR48","doi-asserted-by":"crossref","first-page":"915","DOI":"10.1162\/08997660360581958","volume":"15","author":"AL Yuille","year":"2003","unstructured":"Yuille, A. L., & Rangarajan, A. (2003). The convex concave procedure. Neural Computation, 15(4), 915\u2013936.","journal-title":"Neural Computation"},{"key":"5455_CR49","first-page":"589","volume":"16","author":"L Wang","year":"2006","unstructured":"Wang, L., Zhu, J., & Zou, H. (2006). The doubly regularized support vector machine. Statistica Sinica, 16, 589\u2013615.","journal-title":"Statistica Sinica"},{"key":"5455_CR50","first-page":"1439","volume":"3","author":"J Weston","year":"2003","unstructured":"Weston, J., Elisseeff, A., Scholkopf, B., & Tipping, M. (2003). Use of the zero-norm with linear models and kernel methods. Journal of Machine Learning Research., 3, 1439\u20131461.","journal-title":"Journal of Machine Learning Research."},{"issue":"1","key":"5455_CR51","doi-asserted-by":"crossref","first-page":"88","DOI":"10.1093\/bioinformatics\/bti736","volume":"2","author":"HH Zhang","year":"2006","unstructured":"Zhang, H. H., Ahn, J., Lin, X., & Park, C. (2006). Gene selection using support vector machines with non-convex penalty. Bioinformatics, 2(1), 88\u201395.","journal-title":"Bioinformatics"},{"key":"5455_CR52","doi-asserted-by":"crossref","first-page":"301","DOI":"10.1111\/j.1467-9868.2005.00503.x","volume":"67","author":"H Zou","year":"2005","unstructured":"Zou, H., & Hastie, T. (2005). Regularization and variable selection via the elastic net. Journal of the Royal Statistical Society: Series B, 67, 301\u2013320.","journal-title":"Journal of the Royal Statistical Society: Series B"},{"key":"5455_CR53","doi-asserted-by":"crossref","first-page":"1418","DOI":"10.1198\/016214506000000735","volume":"101","author":"H Zou","year":"2006","unstructured":"Zou, H. (2006). The adaptive lasso and its oracle properties. Journal of the American Statistical Association, 101, 1418\u20131429.","journal-title":"Journal of the American Statistical Association"},{"issue":"4","key":"5455_CR54","doi-asserted-by":"crossref","first-page":"1509","DOI":"10.1214\/009053607000000802","volume":"36","author":"H Zou","year":"2008","unstructured":"Zou, H., & Li, R. (2008). One-step sparse estimates in nonconcave penalized likelihood models. Annals of Statistics, 36(4), 1509\u20131533.","journal-title":"Annals of Statistics"}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-014-5455-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10994-014-5455-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-014-5455-y","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,28]],"date-time":"2024-05-28T22:25:01Z","timestamp":1716935101000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10994-014-5455-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,7,4]]},"references-count":54,"journal-issue":{"issue":"1-3","published-print":{"date-parts":[[2015,10]]}},"alternative-id":["5455"],"URL":"https:\/\/doi.org\/10.1007\/s10994-014-5455-y","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"value":"0885-6125","type":"print"},{"value":"1573-0565","type":"electronic"}],"subject":[],"published":{"date-parts":[[2014,7,4]]}}}