{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,21]],"date-time":"2026-05-21T17:02:11Z","timestamp":1779382931754,"version":"3.53.1"},"reference-count":217,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2021,3,3]],"date-time":"2021-03-03T00:00:00Z","timestamp":1614729600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,3,3]],"date-time":"2021-03-03T00:00:00Z","timestamp":1614729600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Artif Intell Rev"],"published-print":{"date-parts":[[2021,12]]},"DOI":"10.1007\/s10462-021-09975-1","type":"journal-article","created":{"date-parts":[[2021,3,3]],"date-time":"2021-03-03T18:20:09Z","timestamp":1614795609000},"page":"6391-6438","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":483,"title":["A systematic review on overfitting control in shallow and deep neural networks"],"prefix":"10.1007","volume":"54","author":[{"given":"Mohammad Mahdi","family":"Bejani","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9558-8286","authenticated-orcid":false,"given":"Mehdi","family":"Ghatee","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2021,3,3]]},"reference":[{"issue":"9","key":"9975_CR1","doi-asserted-by":"publisher","first-page":"1182","DOI":"10.1016\/j.compbiomed.2013.05.017","volume":"43","author":"E Abbasi","year":"2013","unstructured":"Abbasi E, Ghatee M, Shiri ME (2013) FRAN and RBF-PSO as two components of a hyper framework to recognize protein folds. Comput Biol Med 43(9):1182\u20131191","journal-title":"Comput Biol Med"},{"key":"9975_CR2","doi-asserted-by":"publisher","first-page":"98","DOI":"10.1016\/j.knosys.2016.07.018","volume":"110","author":"E Abbasi","year":"2016","unstructured":"Abbasi E, Shiri ME, Ghatee M (2016) A regularized root-quartic mixture of experts for complex classification problems. Knowl-Based Syst 110:98\u2013109","journal-title":"Knowl-Based Syst"},{"key":"9975_CR3","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1016\/j.neunet.2019.12.029","volume":"124","author":"S Abpeikar","year":"2020","unstructured":"Abpeikar S, Ghatee M, Foresti GL, Micheloni C (2020) Adaptive neural tree exploiting expert nodes to classify high-dimensional data. Neural Netw 124:20\u201338","journal-title":"Neural Netw"},{"key":"9975_CR4","doi-asserted-by":"publisher","first-page":"281","DOI":"10.1016\/j.eswa.2019.07.003","volume":"137","author":"S Abpeykar","year":"2019","unstructured":"Abpeykar S, Ghatee M (2019) Neural trees with peer-to-peer and server-to-client knowledge transferring models for high-dimensional data classification. Expert Syst Appl 137:281\u2013291","journal-title":"Expert Syst Appl"},{"issue":"11","key":"9975_CR5","doi-asserted-by":"publisher","first-page":"7131","DOI":"10.1007\/s00521-018-3543-9","volume":"31","author":"S Abpeykar","year":"2019","unstructured":"Abpeykar S, Ghatee M (2019) An ensemble of RBF neural networks in decision tree structure with knowledge transferring to accelerate multi-classification. Neural Comput Appl 31(11):7131\u20137151","journal-title":"Neural Comput Appl"},{"key":"9975_CR6","doi-asserted-by":"publisher","first-page":"12","DOI":"10.1016\/j.csda.2018.08.015","volume":"131","author":"S Abpeykar","year":"2019","unstructured":"Abpeykar S, Ghatee M, Zare H (2019) Ensemble decision forest of RBF networks via hybrid feature clustering approach for high-dimensional data classification. Comput Stat Data Anal 131:12\u201336","journal-title":"Comput Stat Data Anal"},{"key":"9975_CR7","doi-asserted-by":"publisher","first-page":"101063","DOI":"10.1016\/j.ecoinf.2020.101063","volume":"57","author":"A Ahmed","year":"2020","unstructured":"Ahmed A, Yousif H, Kays R, He Z (2020) Animal species classification using deep neural networks with noise labels. Ecol Inform 57:101063","journal-title":"Ecol Inform"},{"key":"9975_CR8","doi-asserted-by":"crossref","unstructured":"Alani AA, Cosma G, Taherkhani A, McGinnity T (2018) Hand gesture recognition using an adapted convolutional neural network with data augmentation. In: 4th international conference on information management (ICIM). IEEE, pp 5\u201312","DOI":"10.1109\/INFOMAN.2018.8392660"},{"issue":"1","key":"9975_CR9","doi-asserted-by":"publisher","first-page":"527","DOI":"10.1007\/s10462-019-09706-7","volume":"52","author":"M Amer","year":"2019","unstructured":"Amer M, Maul T (2019) A review of modularization techniques in artificial neural networks. Artif Intell Rev 52(1):527\u2013561","journal-title":"Artif Intell Rev"},{"key":"9975_CR10","unstructured":"Antoniou A, Storkey A, Edwards H (2017) Data augmentation generative adversarial networks, 1\u201314. arXiv:1711.04340"},{"issue":"3","key":"9975_CR11","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3005348","volume":"13","author":"S Anwar","year":"2017","unstructured":"Anwar S, Hwang K, Sung W (2017) Structured pruning of deep convolutional neural networks. ACM J Emerg Technol Comput Syst 13(3):1\u201318","journal-title":"ACM J Emerg Technol Comput Syst"},{"key":"9975_CR12","unstructured":"Arpit D, Zhou Y, Kota BU, Govindaraju V (2016) Normalization propagation: a parametric technique for removing internal covariate shift in deep networks. In: Proceedings of the 33rd international conference on international conference on machine learning (ICML). JMLR, pp 1168\u20131176"},{"issue":"139","key":"9975_CR13","first-page":"1","volume":"21","author":"AR Asadi","year":"2020","unstructured":"Asadi AR, Abbe E (2020) Chaining meets chain rule: multilevel entropic regularization and training of neural networks. J Mach Learn Res 21(139):1\u201332","journal-title":"J Mach Learn Res"},{"issue":"9","key":"9975_CR14","doi-asserted-by":"publisher","first-page":"2650","DOI":"10.1109\/TNNLS.2018.2885972","volume":"30","author":"BO Ayinde","year":"2019","unstructured":"Ayinde BO, Inanc T, Zurada JM (2019) Regularizing deep neural networks by enhancing diversity in feature extraction. IEEE Trans Neural Netw Learn Syst 30(9):2650\u20132661","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"9975_CR15","unstructured":"Ba JL, Kiros JR, Hinton GE (2016) Layer normalization. In: Advances in neural information processing systems (NIPS)-deep learning symposium, 1\u201314"},{"issue":"1","key":"9975_CR16","doi-asserted-by":"publisher","first-page":"98","DOI":"10.1137\/050624418","volume":"17","author":"A Beck","year":"2006","unstructured":"Beck A, Ben-Tal A (2006) On the solution of the tikhonov regularization of the total least squares problem. SIAM J Optim 17(1):98\u2013118","journal-title":"SIAM J Optim"},{"key":"9975_CR17","unstructured":"Bejani MM, Ghatee M (2019) Regularized deep networks in intelligent transportation systems: a taxonomy and a case study, 1\u20138. arXiv:1911.03010"},{"key":"9975_CR18","doi-asserted-by":"crossref","unstructured":"Bejani MM, Ghatee M (2021) Least auxiliary loss-functions with impact growth adaptation (laliga) for convolutional neural networks. Neurocomputing (in press)","DOI":"10.1016\/j.neucom.2021.01.106"},{"key":"9975_CR19","unstructured":"Bejani MM, Ghatee M (2020) Adaptive low-rank factorization to regularize shallow and deep neural networks. arXiv:2005.01995"},{"key":"9975_CR20","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1016\/j.trc.2018.02.009","volume":"89","author":"MM Bejani","year":"2018","unstructured":"Bejani MM, Ghatee M (2018) A context aware system for driving style evaluation by an ensemble learning on smartphone sensors data. Transp Res Part C Emerg Technol 89:303\u2013320","journal-title":"Transp Res Part C Emerg Technol"},{"issue":"2","key":"9975_CR21","doi-asserted-by":"publisher","first-page":"543","DOI":"10.1109\/TITS.2019.2896672","volume":"21","author":"MM Bejani","year":"2020","unstructured":"Bejani MM, Ghatee M (2020) Convolutional neural network with adaptive regularization to classify driving styles on smartphones. IEEE Trans Intell Transp Syst 21(2):543\u2013552","journal-title":"IEEE Trans Intell Transp Syst"},{"key":"9975_CR22","doi-asserted-by":"publisher","first-page":"33","DOI":"10.1016\/j.neunet.2020.04.021","volume":"128","author":"MM Bejani","year":"2020","unstructured":"Bejani MM, Ghatee M (2020) Theory of adaptive SVD regularization for deep neural networks. Neural Netw 128:33\u201346","journal-title":"Neural Netw"},{"issue":"6","key":"9975_CR23","doi-asserted-by":"publisher","first-page":"1373","DOI":"10.1162\/089976603321780317","volume":"15","author":"M Belkin","year":"2003","unstructured":"Belkin M, Niyogi P (2003) Laplacian eigenmaps for dimensionality reduction and data representation. Neural Comput 15(6):1373\u20131396","journal-title":"Neural Comput"},{"issue":"Nov","key":"9975_CR24","first-page":"2399","volume":"7","author":"M Belkin","year":"2006","unstructured":"Belkin M, Niyogi P, Sindhwani V (2006) Manifold regularization: a geometric framework for learning from labeled and unlabeled examples. J Mach Learn Res 7(Nov):2399\u20132434","journal-title":"J Mach Learn Res"},{"key":"9975_CR25","doi-asserted-by":"crossref","unstructured":"Belkin M, Niyogi P (2002) Laplacian eigenmaps and spectral techniques for embedding and clustering. In: Advances in Neural Information Processing Systems (NIPS), pp 585\u2013591","DOI":"10.7551\/mitpress\/1120.003.0080"},{"issue":"8","key":"9975_CR26","doi-asserted-by":"publisher","first-page":"1889","DOI":"10.1162\/089976600300015187","volume":"12","author":"Y Bengio","year":"2000","unstructured":"Bengio Y (2000) Gradient-based optimization of hyperparameters. Neural Comput 12(8):1889\u20131900","journal-title":"Neural Comput"},{"issue":"2","key":"9975_CR27","doi-asserted-by":"publisher","first-page":"157","DOI":"10.1109\/72.279181","volume":"5","author":"Y Bengio","year":"1994","unstructured":"Bengio Y, Simard P, Frasconi P (1994) Learning long-term dependencies with gradient descent is difficult. IEEE Trans Neural Netw 5(2):157\u2013166","journal-title":"IEEE Trans Neural Netw"},{"issue":"Feb","key":"9975_CR28","first-page":"281","volume":"13","author":"J Bergstra","year":"2012","unstructured":"Bergstra J, Bengio Y (2012) Random search for hyper-parameter optimization. J Mach Learn Res 13(Feb):281\u2013305","journal-title":"J Mach Learn Res"},{"issue":"1","key":"9975_CR29","doi-asserted-by":"publisher","first-page":"108","DOI":"10.1162\/neco.1995.7.1.108","volume":"7","author":"CM Bishop","year":"1995","unstructured":"Bishop CM (1995) Training with noise is equivalent to tikhonov regularization. Neural Comput 7(1):108\u2013116","journal-title":"Neural Comput"},{"key":"9975_CR30","unstructured":"Blanc G, Gupta N, Valiant G, Valiant P (2020) Implicit regularization for deep neural networks driven by an ornstein-uhlenbeck like process. In: Conference on learning Theory, pp 483\u2013513"},{"issue":"2","key":"9975_CR31","doi-asserted-by":"publisher","first-page":"263","DOI":"10.1023\/A:1026083619097","volume":"43","author":"D Calvetti","year":"2003","unstructured":"Calvetti D, Reichel L (2003) Tikhonov regularization of large linear problems. BIT Numer Math 43(2):263\u2013283","journal-title":"BIT Numer Math"},{"key":"9975_CR32","doi-asserted-by":"crossref","unstructured":"Caruana R, Lawrence S, Giles CL (2001) Overfitting in neural nets: backpropagation, conjugate gradient, and early stopping. In: Advances in neural information processing systems (NIPS), pp 402\u2013408","DOI":"10.1109\/IJCNN.2000.857823"},{"issue":"Apr","key":"9975_CR33","first-page":"841","volume":"8","author":"GC Cawley","year":"2007","unstructured":"Cawley GC, Talbot NL (2007) Preventing over-fitting during model selection via Bayesian regularisation of the hyper-parameters. J Mach Learn Res 8(Apr):841\u2013861","journal-title":"J Mach Learn Res"},{"issue":"Jul","key":"9975_CR34","first-page":"2079","volume":"11","author":"GC Cawley","year":"2010","unstructured":"Cawley GC, Talbot NL (2010) On over-fitting in model selection and subsequent selection bias in performance evaluation. J Mach Learn Res 11(Jul):2079\u20132107","journal-title":"J Mach Learn Res"},{"issue":"12","key":"9975_CR35","doi-asserted-by":"publisher","first-page":"1962","DOI":"10.1109\/TNN.2009.2034144","volume":"20","author":"H Chen","year":"2009","unstructured":"Chen H, Yao X (2009) Regularized negative correlation learning for neural network ensembles. IEEE Trans Neural Netw 20(12):1962\u20131979","journal-title":"IEEE Trans Neural Netw"},{"issue":"1\u20133","key":"9975_CR36","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1016\/S0925-2312(98)00109-X","volume":"25","author":"H-H Chen","year":"1999","unstructured":"Chen H-H, Manry MT, Chandrasekaran H (1999) A neural network training algorithm utilizing multiple sets of linear equations. Neurocomputing 25(1\u20133):55\u201372","journal-title":"Neurocomputing"},{"issue":"1","key":"9975_CR37","doi-asserted-by":"publisher","first-page":"126","DOI":"10.1109\/MSP.2017.2765695","volume":"35","author":"Y Cheng","year":"2018","unstructured":"Cheng Y, Wang D, Zhou P, Zhang T (2018) Model compression and acceleration for deep neural networks: the principles, progress, and challenges. IEEE Signal Process Mag 35(1):126\u2013136","journal-title":"IEEE Signal Process Mag"},{"key":"9975_CR38","unstructured":"Chollet F et al (2015) Keras. https:\/\/keras.io"},{"key":"9975_CR39","unstructured":"Clevert D-A, Unterthiner T, Hochreiter S (2016) Fast and accurate deep network learning by exponential linear units. In: 4th international conference on learning representations (ICLR), 1\u201314"},{"key":"9975_CR40","doi-asserted-by":"crossref","unstructured":"Cottrell M, Girard B, Girard Y, Mangeas M, Muller C (1994) SSM: a statistical stepwise method for weight elimination. In: International conference on artificial neural networks. Springer, pp 681\u2013684","DOI":"10.1007\/978-1-4471-2097-1_160"},{"issue":"2","key":"9975_CR41","doi-asserted-by":"publisher","first-page":"155","DOI":"10.1007\/s10994-013-5327-x","volume":"91","author":"K Crammer","year":"2013","unstructured":"Crammer K, Kulesza A, Dredze M (2013) Adaptive regularization of weight vectors. Mach Learn 91(2):155\u2013187","journal-title":"Mach Learn"},{"key":"9975_CR42","doi-asserted-by":"crossref","unstructured":"Cubuk ED, Zoph B, Mane D, Vasudevan V, Le QV (2019) Autoaugment: Learning augmentation strategies from data. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 113\u2013123","DOI":"10.1109\/CVPR.2019.00020"},{"key":"9975_CR43","doi-asserted-by":"crossref","unstructured":"Dai W, Yang Q, Xue G-R, Yu Y (2007) Boosting for transfer learning. In: Proceedings of the 24th international conference on machine learning. ACM, pp 193\u2013200","DOI":"10.1145\/1273496.1273521"},{"key":"9975_CR44","doi-asserted-by":"crossref","unstructured":"Darwish A, Hassanien AE, Das S (2020) A survey of swarm and evolutionary computing approaches for deep learning. Artif Intell Rev 53(3):1767\u20131812","DOI":"10.1007\/s10462-019-09719-2"},{"key":"9975_CR45","unstructured":"Demyanov S (2015) Regularization methods for neural networks and related models, Ph.D. thesis, Department of Computing and Information System, The University of Melbourne"},{"issue":"3","key":"9975_CR46","doi-asserted-by":"publisher","first-page":"326","DOI":"10.1145\/212094.212114","volume":"27","author":"T Dietterich","year":"1995","unstructured":"Dietterich T (1995) Overfitting and undercomputing in machine learning. ACM Comput Surv (CSUR) 27(3):326\u2013327","journal-title":"ACM Comput Surv (CSUR)"},{"issue":"3","key":"9975_CR47","doi-asserted-by":"publisher","first-page":"251","DOI":"10.1007\/s10462-011-9270-6","volume":"39","author":"S Ding","year":"2013","unstructured":"Ding S, Li H, Su C, Yu J, Jin F (2013) Evolutionary artificial neural networks: a review. Artif Intell Rev 39(3):251\u2013260","journal-title":"Artif Intell Rev"},{"key":"9975_CR48","unstructured":"Donahue J, Kr\u00e4henb\u00fchl P, Darrell T(2017) Adversarial feature learning. In: 5th international conference on learning representations (ICLR), 1\u201318"},{"key":"9975_CR49","unstructured":"Dong X, Chen S, Pan S (2017) Learning to prune deep neural networks via layer-wise optimal brain surgeon. In: Advances in neural information processing systems (NIPS), pp 4857\u20134867"},{"key":"9975_CR50","doi-asserted-by":"publisher","first-page":"782","DOI":"10.1016\/j.trf.2018.06.044","volume":"58","author":"HR Eftekhari","year":"2018","unstructured":"Eftekhari HR, Ghatee M (2018) Hybrid of discrete wavelet transform and adaptive neuro fuzzy inference system for overall driving behavior recognition. Transp Res Part F Traffic Psychol Behav 58:782\u2013796","journal-title":"Transp Res Part F Traffic Psychol Behav"},{"key":"9975_CR51","doi-asserted-by":"crossref","unstructured":"Eigenmann R, Nossek JA (1999) Gradient based adaptive regularization. In: Proceedings of the IEEE signal processing society workshop on neural networks for signal processing. IEEE, pp 87\u201394","DOI":"10.1109\/NNSP.1999.788126"},{"key":"9975_CR52","volume-title":"Matrix methods in data mining and pattern recognition","author":"L Eld\u00e9n","year":"2019","unstructured":"Eld\u00e9n L (2019) Matrix methods in data mining and pattern recognition, vol 15. SIAM, Bangkok"},{"issue":"6","key":"9975_CR53","doi-asserted-by":"publisher","first-page":"1386","DOI":"10.1109\/72.963775","volume":"12","author":"AP Engelbrecht","year":"2001","unstructured":"Engelbrecht AP (2001) A new pruning heuristic based on variance analysis of sensitivity information. IEEE Trans Neural Netw 12(6):1386\u20131399","journal-title":"IEEE Trans Neural Netw"},{"issue":"Feb","key":"9975_CR54","first-page":"625","volume":"11","author":"D Erhan","year":"2010","unstructured":"Erhan D, Bengio Y, Courville A, Manzagol P-A, Vincent P, Bengio S (2010) Why does unsupervised pre-training help deep learning? J Mach Learn Res 11(Feb):625\u2013660","journal-title":"J Mach Learn Res"},{"key":"9975_CR55","unstructured":"Fahlman SE, Lebiere C (1990) The cascade-correlation learning architecture. In: Advances in neural information processing systems (NIPS), pp 524\u2013532"},{"key":"9975_CR56","doi-asserted-by":"publisher","unstructured":"Feurer M, Hutter F (2019) Hyperparameter optimization. In: Hutter F, Kotthoff L, Vanschoren J (eds) Automated machine learning. The springer series on challenges in machine learning. Springer, Cham, pp 3\u201333. https:\/\/doi.org\/10.1007\/978-3-030-05318-5_1","DOI":"10.1007\/978-3-030-05318-5_1"},{"key":"9975_CR57","unstructured":"Finnoff W, Hergert F, Zimmermann H-G (1993) Extended regularization methods for nonconvergent model selection. In: Advances in neural information processing systems (NIPS), pp 228\u2013235"},{"key":"9975_CR58","doi-asserted-by":"publisher","unstructured":"Fong S, Deb S, Yang X-S (2018) How meta-heuristic algorithms contribute to deep learning in the hype of big data analytics. In: Sa P, Sahoo M, Murugappan M, Wu Y, Majhi B (eds) Progress in intelligent computing techniques: theory, practice, and applications. Advances in intelligent systems and computing, vol 518. Springer, Singapore, pp 3\u201325. https:\/\/doi.org\/10.1007\/978-981-10-3373-5_1","DOI":"10.1007\/978-981-10-3373-5_1"},{"key":"9975_CR59","unstructured":"Franceschi L, Donini M, Frasconi P, Pontil M (2017) Forward and reverse gradient-based hyperparameter optimization. In: Proceeding of the 34th international conference on machine learning-volume 70. JMLR, pp 1165\u20131173"},{"key":"9975_CR60","doi-asserted-by":"crossref","unstructured":"Frank L, Hubert E (1996) Pretopological approach for supervised learning. In: Proceedings of 13th international conference on pattern recognition, vol 4. IEEE, pp 256\u2013260","DOI":"10.1109\/ICPR.1996.547426"},{"key":"9975_CR61","unstructured":"Galdran A, Alvarez-Gila A, Meyer MI, Saratxaga CL, Ara\u00fajo T, Garrote E, Aresta G, Costa P, Mendon\u00e7a AM, Campilho A (2017) Data-driven color augmentation techniques for deep skin image analysis, 1\u20134. arXiv:1703.03702"},{"issue":"1","key":"9975_CR62","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1162\/neco.1992.4.1.1","volume":"4","author":"S Geman","year":"1992","unstructured":"Geman S, Bienenstock E, Doursat R (1992) Neural networks and the bias\/variance dilemma. Neural Comput 4(1):1\u201358","journal-title":"Neural Comput"},{"issue":"2","key":"9975_CR63","doi-asserted-by":"publisher","first-page":"219","DOI":"10.1162\/neco.1995.7.2.219","volume":"7","author":"F Girosi","year":"1995","unstructured":"Girosi F, Jones M, Poggio T (1995) Regularization theory and neural networks architectures. Neural Comput 7(2):219\u2013269","journal-title":"Neural Comput"},{"key":"9975_CR64","unstructured":"Glorot X, Bengio Y (2010) Understanding the difficulty of training deep feedforward neural networks. In: Proceedings of the 13th international conference on artificial intelligence and statistics, pp 249\u2013256"},{"issue":"1","key":"9975_CR65","doi-asserted-by":"publisher","first-page":"185","DOI":"10.1137\/S0895479897326432","volume":"21","author":"GH Golub","year":"1999","unstructured":"Golub GH, Hansen PC, O\u2019Leary DP (1999) Tikhonov regularization and total least squares. SIAM J Matrix Anal Appl 21(1):185\u2013194","journal-title":"SIAM J Matrix Anal Appl"},{"key":"9975_CR66","unstructured":"Goodfellow I, Pouget-Abadie J, Mirza M, Xu B, Warde-Farley D, Ozair S, Courville A, Bengio Y (2014) Generative adversarial nets. In: Advances in neural information processing systems (NIPS), pp 2672\u20132680"},{"issue":"Mar","key":"9975_CR67","first-page":"1157","volume":"3","author":"I Guyon","year":"2003","unstructured":"Guyon I, Elisseeff A (2003) An introduction to variable and feature selection. J Mach Learn Res 3(Mar):1157\u20131182","journal-title":"J Mach Learn Res"},{"key":"9975_CR68","unstructured":"Han S, Pool J, Tran J, Dally W (2015) Learning both weights and connections for efficient neural network. In: Advances in neural information processing systems (NIPS), pp 1135\u20131143"},{"key":"9975_CR69","unstructured":"Harvey N, Liaw C, Mehrabian A (2017) Nearly-tight vc-dimension bounds for piecewise linear neural networks. In: Conference on learning theory, pp 1064\u20131068"},{"key":"9975_CR70","doi-asserted-by":"crossref","unstructured":"Hassibi B, Stork DG, Wolff GJ (1993) Optimal brain surgeon and general network pruning. In: IEEE international conference on neural networks. IEEE, pp 293\u2013299","DOI":"10.1109\/ICNN.1993.298572"},{"issue":"1","key":"9975_CR71","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1021\/ci0342472","volume":"44","author":"DM Hawkins","year":"2004","unstructured":"Hawkins DM (2004) The problem of overfitting. J Chem Inf Comput Sci 44(1):1\u201312","journal-title":"J Chem Inf Comput Sci"},{"key":"9975_CR72","unstructured":"Heidari M, Ghatee M, Nickabadi A, Nezhad AP (2020) Diverse and styled image captioning using SVD-based mixture of recurrent experts, 1\u201313. arXiv:2007.03338"},{"key":"9975_CR73","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2015) Delving deep into rectifiers: Surpassing human-level performance on imagenet classification. In: Proceedings of the IEEE international conference on computer vision, pp 1026\u20131034","DOI":"10.1109\/ICCV.2015.123"},{"key":"9975_CR74","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"9975_CR75","doi-asserted-by":"crossref","unstructured":"He Y, Zhang X, Sun J (2017) Channel pruning for accelerating very deep neural networks. In: Proceedings of the IEEE international conference on computer vision, pp 1389\u20131397","DOI":"10.1109\/ICCV.2017.155"},{"key":"9975_CR76","doi-asserted-by":"crossref","unstructured":"Ho TK, Basu M, Law MHC (2006) Measures of geometrical complexity in classification problems. In: Data complexity in pattern recognition. Springer, pp 1\u201323","DOI":"10.1007\/978-1-84628-172-3_1"},{"issue":"1","key":"9975_CR77","doi-asserted-by":"publisher","first-page":"101","DOI":"10.1006\/cviu.1998.0624","volume":"70","author":"TK Ho","year":"1998","unstructured":"Ho TK, Baird HS (1998) Pattern classification with compact distribution maps. Comput Vis Image Understand 70(1):101\u2013110","journal-title":"Comput Vis Image Understand"},{"key":"9975_CR78","first-page":"289","volume":"3","author":"TK Ho","year":"2002","unstructured":"Ho TK, Basu M (2002) Complexity measures of supervised classification problems. IEEE Trans Pattern Anal Mach Intell 3:289\u2013300","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"9975_CR79","doi-asserted-by":"crossref","unstructured":"Hoekstra A, Duin RP (1996) On the nonlinearity of pattern classifiers. In: Proceedings of 13th international conference on pattern recognition, vol 4. IEEE, pp 271\u2013275","DOI":"10.1109\/ICPR.1996.547429"},{"key":"9975_CR80","unstructured":"Howard AG, Zhu M, Chen B, Kalenichenko D, Wang W, Weyand T, Andreetto M, Adam H (2017) Mobilenets: efficient convolutional neural networks for mobile vision applications, 1\u20139. arXiv:1704.04861"},{"key":"9975_CR81","doi-asserted-by":"crossref","unstructured":"Huang G, Liu Z, Van Der Maaten L, Weinberger KQ (2017) Densely connected convolutional networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4700\u20134708","DOI":"10.1109\/CVPR.2017.243"},{"key":"9975_CR82","unstructured":"Hu H, Peng R, Tai Y-W, Tang C-K (2016) Network trimming: a data-driven neuron pruning approach towards efficient deep architectures. arXiv:1607.03250"},{"key":"9975_CR83","unstructured":"Ioffe S (2017) Batch renormalization: Towards reducing minibatch dependence in batch-normalized models. In: Advances in neural information processing systems (NIPS), pp 1945\u20131953"},{"key":"9975_CR84","unstructured":"Ioffe S, Szegedy C (2015) Batch normalization: accelerating deep network training by reducing internal covariate shift. In: Proceedings of the 32nd international conference on international conference on machine learning\u2014volume 37. JMLR, pp 448\u2013456"},{"issue":"6","key":"9975_CR85","doi-asserted-by":"publisher","first-page":"333","DOI":"10.1016\/0950-7051(96)81917-4","volume":"8","author":"I Ivanova","year":"1995","unstructured":"Ivanova I, Kubat M (1995) Initialization of neural networks by means of decision trees. Knowl-Based Syst 8(6):333\u2013344","journal-title":"Knowl-Based Syst"},{"key":"9975_CR86","doi-asserted-by":"publisher","first-page":"57","DOI":"10.1016\/j.imavis.2019.06.005","volume":"89","author":"Y Jaafra","year":"2019","unstructured":"Jaafra Y, Laurent JL, Deruyver A, Naceur MS (2019) Reinforcement learning for neural architecture search: a review. Image Vis Comput 89:57\u201366","journal-title":"Image Vis Comput"},{"issue":"3","key":"9975_CR87","doi-asserted-by":"publisher","first-page":"699","DOI":"10.1109\/TSMCB.2005.861067","volume":"36","author":"J-T Jeng","year":"2005","unstructured":"Jeng J-T (2005) Hybrid approach of selecting hyperparameters of support vector machine for regression. IEEE Trans Syst Man Cybern Part B (Cybern) 36(3):699\u2013709","journal-title":"IEEE Trans Syst Man Cybern Part B (Cybern)"},{"key":"9975_CR88","unstructured":"Jie R, Gao J, Vasnev A, Tran M-n (2020) Regularized flexible activation function combinations for deep neural networks, 1\u201312. arXiv:2007.13101"},{"key":"9975_CR89","unstructured":"Jin J, Dundar A, Culurciello E (2015) Robust convolutional neural networks under adversarial noise, 1\u20138. arXiv:1511.06306"},{"issue":"5","key":"9975_CR90","doi-asserted-by":"publisher","first-page":"1245","DOI":"10.1109\/TPAMI.2017.2701831","volume":"40","author":"G Kang","year":"2018","unstructured":"Kang G, Li J, Tao D (2018) Shakeout: a new approach to regularized deep neural network training. IEEE Trans Pattern Anal Mach Intell 40(5):1245\u20131258","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"9975_CR91","doi-asserted-by":"publisher","first-page":"42961","DOI":"10.1109\/ACCESS.2018.2863606","volume":"6","author":"N Khan","year":"2018","unstructured":"Khan N, Shah J, Stavness I (2018) Bridgeout: stochastic bridge regularization for deep neural networks. IEEE Access 6:42961\u201342970","journal-title":"IEEE Access"},{"key":"9975_CR92","doi-asserted-by":"publisher","first-page":"82","DOI":"10.1016\/j.neunet.2018.09.009","volume":"110","author":"SH Khan","year":"2019","unstructured":"Khan SH, Hayat M, Porikli F (2019) Regularization of deep neural networks with spectral dropout. Neural Netw 110:82\u201390","journal-title":"Neural Netw"},{"key":"9975_CR93","unstructured":"Kipf TN, Welling M (2017) Semi-supervised classification with graph convolutional networks. In: 5th international conference on learning representations (ICLR), pp 1\u201314"},{"key":"9975_CR94","unstructured":"Kirsch L, Kunze J, Barber D (2018) Modular networks: learning to decompose neural computation. In: Bengio S, Wallach H, Larochelle H, Grauman K, Cesa-Bianchi N, Garnett R (eds) Advances in neural information processing systems 31 (NeurIPS 2018), pp 2408\u20132418. https:\/\/papers.nips.cc\/paper\/2018"},{"key":"9975_CR95","unstructured":"Krogh A, Hertz JA (1991) A simple weight decay can improve generalization. In: Moody J, Hanson S, Lippmann RP (eds) Advances in neural information processing systems 4 (NIPS 1991), pp 950\u2013957. https:\/\/papers.nips.cc\/paper\/1991"},{"key":"9975_CR96","unstructured":"Krueger D, Maharaj T, Kram\u00e1r J, Pezeshki M, Ballas N, Ke NR, Goyal A, Bengio Y, Courville A, Pal C (2017) Zoneout: regularizing RNNs by randomly preserving hidden activations. In: 5th international conference on learning representations (ICLR), 1\u201311"},{"key":"9975_CR97","doi-asserted-by":"crossref","unstructured":"Kwasigroch A, Miko\u0142ajczyk A, Grochowski M (2017) Deep convolutional neural networks as a decision support tool in medical problems-malignant melanoma case study. In: Polish control conference. Springer, pp 848\u2013856","DOI":"10.1007\/978-3-319-60699-6_81"},{"key":"9975_CR98","doi-asserted-by":"crossref","unstructured":"Larsen J, Hansen LK, Svarer C, Ohlsson M (1996) Design and regularization of neural networks: the optimal use of a validation set. In: Proceedings of the IEEE signal processing society workshop on neural networks for signal processing. IEEE, pp 62\u201371","DOI":"10.1109\/NNSP.1996.548336"},{"key":"9975_CR99","doi-asserted-by":"publisher","unstructured":"Larsen J, Svarer C, Andersen LN, Hansen LK (2012) Adaptive regularization in neural network modeling. In: Montavon G, Orr GB, M\u00fcller KR (eds) Neural networks: tricks of the trade. Lecture notes in computer science, vol 7700. Springer, Berlin, Heidelberg, pp 111\u2013130. https:\/\/doi.org\/10.1007\/978-3-642-35289-8_8","DOI":"10.1007\/978-3-642-35289-8_8"},{"key":"9975_CR100","unstructured":"Larsson G, Maire M, Shakhnarovich G (2017) Fractalnet: ultra-deep neural networks without residuals. In: 5th international conference on learning representations (ICLR), 1\u201311"},{"key":"9975_CR101","unstructured":"Lawrence S, Giles CL, Tsoi AC (1997) Lessons in neural network training: Overfitting may be harder than expected. In: Proceedings of the Fourteenth National Conference on Artificial Intelligence and Ninth Conference on Innovative Applications of Artificial Intelligence. AAAI Press, pp 540\u2013545"},{"key":"9975_CR102","doi-asserted-by":"crossref","unstructured":"LeCun YA, Bottou L, Orr GB, M\u00fcller K-R (2012) Efficient backprop. In: Neural networks: tricks of the trade. Springer, pp 9\u201348","DOI":"10.1007\/978-3-642-35289-8_3"},{"key":"9975_CR103","unstructured":"LeCun Y, Denker JS, Solla SA (1990) Optimal brain damage. In: Advances in neural information processing systems (NIPS), pp 598\u2013605"},{"issue":"5","key":"9975_CR104","doi-asserted-by":"publisher","first-page":"684","DOI":"10.1109\/TPAMI.2005.92","volume":"27","author":"K-C Lee","year":"2005","unstructured":"Lee K-C, Ho J, Kriegman DJ (2005) Acquiring linear subspaces for face recognition under variable lighting. IEEE Trans Pattern Anal Mach Intell 27(5):684\u2013698","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"1","key":"9975_CR105","doi-asserted-by":"publisher","first-page":"59","DOI":"10.1109\/TIFS.2010.2100382","volume":"6","author":"K-C Leung","year":"2011","unstructured":"Leung K-C, Leung CH (2011) Improvement of fingerprint retrieval by a statistical classifier. IEEE Trans Inf Forensics Secur 6(1):59\u201369","journal-title":"IEEE Trans Inf Forensics Secur"},{"issue":"8","key":"9975_CR106","doi-asserted-by":"publisher","first-page":"3919","DOI":"10.1109\/TIP.2016.2579306","volume":"25","author":"X Li","year":"2016","unstructured":"Li X, Zhao L, Wei L, Yang M-H, Wu F, Zhuang Y, Ling H, Wang J (2016) Deepsaliency: multi-task deep neural network model for salient object detection. IEEE Trans Image Process 25(8):3919\u20133930","journal-title":"IEEE Trans Image Process"},{"issue":"4","key":"9975_CR107","doi-asserted-by":"publisher","first-page":"932","DOI":"10.1109\/TNNLS.2017.2650943","volume":"29","author":"H Li","year":"2017","unstructured":"Li H, Liu D, Wang D (2017) Manifold regularized reinforcement learning. IEEE Trans Neural Netw Lear Syst 29(4):932\u2013943","journal-title":"IEEE Trans Neural Netw Lear Syst"},{"issue":"4","key":"9975_CR108","doi-asserted-by":"publisher","first-page":"455","DOI":"10.1007\/s10462-016-9535-1","volume":"49","author":"H Li","year":"2018","unstructured":"Li H, Wang X, Ding S (2018) Research and development of neural network ensembles: a survey. Artif Intell Rev 49(4):455\u2013479","journal-title":"Artif Intell Rev"},{"key":"9975_CR109","unstructured":"Li Y, Liu F (2016) Whiteout: Gaussian adaptive noise regularization in deep neural networks, 1\u201317. arXiv:1612.01490"},{"key":"9975_CR110","doi-asserted-by":"crossref","unstructured":"Li H, Liu D, Wang D (2015) Approximate policy iteration with unsupervised feature learning based on manifold regularization. In: 2015 international joint conference on neural networks (IJCNN). IEEE, pp 1\u20136","DOI":"10.1109\/IJCNN.2015.7280311"},{"key":"9975_CR111","doi-asserted-by":"crossref","unstructured":"Li F, Tian C, Zuo W, Zhang L, Yang M-H (2018) Learning spatial-temporal regularized correlation filters for visual tracking. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4904\u20134913","DOI":"10.1109\/CVPR.2018.00515"},{"key":"9975_CR112","doi-asserted-by":"publisher","first-page":"229","DOI":"10.1016\/j.neucom.2014.01.041","volume":"138","author":"Y Liu","year":"2014","unstructured":"Liu Y, Wu W, Fan Q, Yang D, Wang J (2014) A modified gradient learning algorithm with smoothing l1\/2 regularization for takagi-sugeno fuzzy models. Neurocomputing 138:229\u2013237","journal-title":"Neurocomputing"},{"key":"9975_CR113","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1016\/j.neucom.2016.12.038","volume":"234","author":"W Liu","year":"2017","unstructured":"Liu W, Wang Z, Liu X, Zeng N, Liu Y, Alsaadi FE (2017) A survey of deep neural network architectures and their applications. Neurocomputing 234:11\u201326","journal-title":"Neurocomputing"},{"key":"9975_CR114","doi-asserted-by":"publisher","first-page":"34","DOI":"10.1016\/j.neucom.2020.01.025","volume":"388","author":"X Liu","year":"2020","unstructured":"Liu X, Fan F, Kong L, Diao Z, Xie W, Lu J, You J (2020) Unimodal regularized neuron stick-breaking for ordinal classification. Neurocomputing 388:34\u201344","journal-title":"Neurocomputing"},{"key":"9975_CR115","doi-asserted-by":"crossref","unstructured":"Liu Z, Castagna J (1999) Avoiding overfitting caused by noise using a uniform training mode. In: International joint conference on neural networks (IJCNN), vol 3. IEEE, pp 1788\u20131793","DOI":"10.1109\/IJCNN.1999.832649"},{"key":"9975_CR116","doi-asserted-by":"publisher","unstructured":"Liu R, Liu E, Yang J, Li M, Wang F (2006) Optimizing the hyper-parameters for SVM by combining evolution strategies with a grid search. In: Huang DS, Li K, Irwin GW (eds) Intelligent control and automation. Lecture notes in control and information sciences, vol 344. Springer, Berlin, Heidelberg, pp 712\u2013721. https:\/\/doi.org\/10.1007\/978-3-540-37256-1_87","DOI":"10.1007\/978-3-540-37256-1_87"},{"key":"9975_CR117","doi-asserted-by":"crossref","unstructured":"Liu C, Zhang Z, Wang D (2014) Pruning deep neural networks by optimal brain damage. In: 15th annual conference of the international speech communication association","DOI":"10.21437\/Interspeech.2014-281"},{"key":"9975_CR118","unstructured":"Louizos C, Welling M, Kingma DP (2018) Learning sparse neural networks through L0 regularization. In: 6th international conference on learning representations (ICLR), 1\u201313"},{"key":"9975_CR119","doi-asserted-by":"crossref","unstructured":"Luo J-H, Wu J, Lin W (2017) Thinet: A filter level pruning method for deep neural network compression. In: Proceedings of the IEEE international conference on computer vision, pp 5058\u20135066","DOI":"10.1109\/ICCV.2017.541"},{"key":"9975_CR120","unstructured":"Luo P, Wang X, Shao W, Peng Z (2019) Towards understanding regularization in batch normalization. In: 7th international conference on learning representation (ICLR), 1\u201323"},{"key":"9975_CR121","doi-asserted-by":"publisher","first-page":"286","DOI":"10.1016\/j.neunet.2019.08.015","volume":"119","author":"R Ma","year":"2019","unstructured":"Ma R, Miao J, Niu L, Zhang P (2019) Transformed $$l_1$$-regularization for learning sparse deep neural networks. Neural Netw 119:286\u2013298","journal-title":"Neural Netw"},{"issue":"7","key":"9975_CR122","doi-asserted-by":"publisher","first-page":"1031","DOI":"10.1093\/comjnl\/bxz062","volume":"63","author":"Z Ma","year":"2020","unstructured":"Ma Z, Sattar A, Zhou J, Chen Q, Su K (2020) Dropout with tabu strategy for regularizing deep neural networks. Comput J 63(7):1031\u20131038","journal-title":"Comput J"},{"issue":"Nov","key":"9975_CR123","first-page":"2579","volume":"9","author":"Lvd Maaten","year":"2008","unstructured":"Maaten Lvd, Hinton G (2008) Visualizing data using t-SNE. J Mach Learn Res 9(Nov):2579\u20132605","journal-title":"J Mach Learn Res"},{"key":"9975_CR124","unstructured":"Maclaurin D, Duvenaud D, Adams R (2015) Gradient-based hyperparameter optimization through reversible learning. In: International conference on machine learning, pp 2113\u20132122"},{"issue":"12","key":"9975_CR125","doi-asserted-by":"publisher","first-page":"3793","DOI":"10.1016\/j.patcog.2014.06.010","volume":"47","author":"R Mart\u00edn-F\u00e9lez","year":"2014","unstructured":"Mart\u00edn-F\u00e9lez R, Xiang T (2014) Uncooperative gait recognition by learning to rank. Pattern Recogn 47(12):3793\u20133806","journal-title":"Pattern Recogn"},{"issue":"16","key":"9975_CR126","doi-asserted-by":"publisher","first-page":"2413","DOI":"10.1016\/j.neucom.2010.12.042","volume":"74","author":"Y Miche","year":"2011","unstructured":"Miche Y, Van Heeswijk M, Bas P, Simula O, Lendasse A (2011) TROP-ELM: a double-regularized ELM using LARS and tikhonov regularization. Neurocomputing 74(16):2413\u20132421","journal-title":"Neurocomputing"},{"key":"9975_CR127","doi-asserted-by":"crossref","unstructured":"Miko\u0142ajczyk A, Grochowski M (2018) Data augmentation for improving deep learning in image classification problem. In: International interdisciplinary PhD workshop (IIPhDW). IEEE, pp 117\u2013122","DOI":"10.1109\/IIPHDW.2018.8388338"},{"key":"9975_CR128","unstructured":"Mirza M, Osindero S (2014) Conditional generative adversarial nets, 1\u20137. arXiv:1411.1784"},{"key":"9975_CR129","doi-asserted-by":"crossref","unstructured":"Mitsuno K, Miyao J, Kurita T (2020) Hierarchical group sparse regularization for deep convolutional neural networks, 1\u20138. arXiv:2004.04394","DOI":"10.1109\/IJCNN48605.2020.9207531"},{"key":"9975_CR130","unstructured":"Miyato T, Kataoka T, Koyama M, Yoshida Y (2018) Spectral normalization for generative adversarial networks. In: 6th international conference on learning representations (ICLR), pp 1\u201326"},{"key":"9975_CR131","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Graves A, Antonoglou I, Wierstra D, Riedmiller M (2013) Playing atari with deep reinforcement learning. arXiv:1312.5602"},{"key":"9975_CR132","first-page":"117","volume":"2","author":"J Mockus","year":"1978","unstructured":"Mockus J, Tiesis V, Zilinskas A (1978) The application of Bayesian methods for seeking the extremum. Towards Global Optim 2:117\u2013129","journal-title":"Towards Global Optim"},{"issue":"4","key":"9975_CR133","doi-asserted-by":"publisher","first-page":"264","DOI":"10.7326\/0003-4819-151-4-200908180-00135","volume":"151","author":"D Moher","year":"2009","unstructured":"Moher D, Liberati A, Tetzlaff J, Altman DG (2009) Preferred reporting items for systematic reviews and meta-analyses: the prisma statement. Ann Intern Med 151(4):264\u2013269","journal-title":"Ann Intern Med"},{"issue":"6","key":"9975_CR134","doi-asserted-by":"publisher","first-page":"1481","DOI":"10.1162\/089976602753713025","volume":"14","author":"G Monari","year":"2002","unstructured":"Monari G, Dreyfus G (2002) Local overfitting control via leverages. Neural Comput 14(6):1481\u20131506","journal-title":"Neural Comput"},{"issue":"1","key":"9975_CR135","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1109\/72.478388","volume":"7","author":"JO Moody","year":"1996","unstructured":"Moody JO, Antsaklis PJ (1996) The dependence identification neural network construction algorithm. IEEE Trans Neural Netw 7(1):3\u201315","journal-title":"IEEE Trans Neural Netw"},{"key":"9975_CR136","doi-asserted-by":"publisher","unstructured":"Nabian MA, Meidani H (2020) Physics-driven regularization of deep neural networks for enhanced engineering design and analysis. J Comput Inf Sci Eng 20(1):011006, Paper No: JCISE-19-1072. https:\/\/doi.org\/10.1115\/1.4044507","DOI":"10.1115\/1.4044507"},{"key":"9975_CR137","unstructured":"Nair V, Hinton GE (2010) Rectified linear units improve restricted Boltzmann machines. In: Proceedings of the 27th international conference on machine learning (ICML), pp 807\u2013814"},{"key":"9975_CR138","unstructured":"Nannen V (2003) The paradox of overfitting, Ph.D. thesis, Faculty of Artificial Intelligence, the University of Groningen"},{"issue":"13\u201315","key":"9975_CR139","doi-asserted-by":"publisher","first-page":"2831","DOI":"10.1016\/j.neucom.2007.08.026","volume":"71","author":"PL Narasimha","year":"2008","unstructured":"Narasimha PL, Delashmit WH, Manry MT, Li J, Maldonado F (2008) An integrated growing-pruning method for feedforward network training. Neurocomputing 71(13\u201315):2831\u20132847","journal-title":"Neurocomputing"},{"issue":"2.32","key":"9975_CR140","doi-asserted-by":"publisher","first-page":"177","DOI":"10.14419\/ijet.v7i2.32.15399","volume":"7","author":"M NarasingaRao","year":"2018","unstructured":"NarasingaRao M, Prasad VV, Teja PS, Zindavali M, Reddy OP (2018) A survey on prevention of overfitting in convolution neural networks using machine learning techniques. Int J Eng Technol (UAE) 7(2.32):177\u2013180","journal-title":"Int J Eng Technol (UAE)"},{"issue":"1","key":"9975_CR141","doi-asserted-by":"publisher","first-page":"116","DOI":"10.1137\/1028033","volume":"28","author":"MZ Nashed","year":"1986","unstructured":"Nashed MZ (1986) The theory of tikhonov regularization for fredholm equations of the first kind (c. w. groetsch). SIAM Rev 28(1):116\u2013118","journal-title":"SIAM Rev"},{"issue":"1\u20132","key":"9975_CR142","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1080\/00036818408839508","volume":"18","author":"F Natterer","year":"1984","unstructured":"Natterer F (1984) Error bounds for tikhonov regularization in Hilbert scales. Appl Anal 18(1\u20132):29\u201337","journal-title":"Appl Anal"},{"key":"9975_CR143","unstructured":"Ng AY (1997) Preventing\u201d overfitting\u201d of cross-validation data. In: Proceedings of the 14th international conference on machine learning (ICML), vol 97. Citeseer, pp 245\u2013253"},{"issue":"4","key":"9975_CR144","doi-asserted-by":"publisher","first-page":"473","DOI":"10.1162\/neco.1992.4.4.473","volume":"4","author":"SJ Nowlan","year":"1992","unstructured":"Nowlan SJ, Hinton GE (1992) Simplifying neural networks by soft weight-sharing. Neural Comput 4(4):473\u2013493","journal-title":"Neural Comput"},{"key":"9975_CR145","first-page":"2045","volume":"10","author":"I Nurhaida","year":"2020","unstructured":"Nurhaida I, Ayumi V, Fitrianah D, Zen RA, Noprisson H, Wei H (2020) Implementation of deep neural networks (DNN) with batch normalization for batik pattern recognition. Int J Electr Comput Eng (2088\u20138708) 10:2045\u20132053","journal-title":"Int J Electr Comput Eng (2088\u20138708)"},{"key":"9975_CR146","doi-asserted-by":"publisher","first-page":"36322","DOI":"10.1109\/ACCESS.2019.2905015","volume":"7","author":"Z Pan","year":"2019","unstructured":"Pan Z, Yu W, Yi X, Khan A, Yuan F, Zheng Y (2019) Recent progress on generative adversarial networks (GANs): a survey. IEEE Access 7:36322\u201336333","journal-title":"IEEE Access"},{"key":"9975_CR147","doi-asserted-by":"publisher","first-page":"1051","DOI":"10.1007\/s11554-019-00852-3","volume":"17","author":"A Pashaei","year":"2019","unstructured":"Pashaei A, Ghatee M, Sajedi H (2019) Convolution neural network joint with mixture of extreme learning machines for feature extraction and classification of accident images. J Real-Time Image Proc 17:1051\u20131066","journal-title":"J Real-Time Image Proc"},{"key":"9975_CR148","doi-asserted-by":"crossref","unstructured":"Patel V, Mujumdar N, Balasubramanian P, Marvaniya S, Mittal A (2019) Data augmentation using part analysis for shape classification. In: IEEE winter conference on applications of computer vision (WACV). IEEE, pp 1223\u20131232","DOI":"10.1109\/WACV.2019.00135"},{"key":"9975_CR149","unstructured":"Perez L, Wang J (2017) The effectiveness of data augmentation in image classification using deep learning, 1\u20138. arXiv:1712.04621"},{"key":"9975_CR150","doi-asserted-by":"publisher","first-page":"220","DOI":"10.1016\/j.neunet.2019.05.011","volume":"118","author":"DN Phan","year":"2019","unstructured":"Phan DN, Le Thi HA (2019) Group variable selection via $$\\ell _{p,0}$$ regularization and application to optimal scoring. Neural Netw 118:220\u2013234","journal-title":"Neural Netw"},{"issue":"1","key":"9975_CR151","first-page":"37","volume":"2","author":"DM Powers","year":"2011","unstructured":"Powers DM (2011) Evaluation: from precision, recall and f-measure to ROC, informedness, markedness and correlation. J Mach Learn Technol 2(1):37\u201363","journal-title":"J Mach Learn Technol"},{"issue":"01","key":"9975_CR152","first-page":"4780","volume":"33","author":"E Real","year":"2019","unstructured":"Real E, Aggarwal A, Huang Y, Le QV (2019) Regularized evolution for image classifier architecture search. Proc AAAI Conf Artif Intell 33(01):4780\u20134789","journal-title":"Proc AAAI Conf Artif Intell"},{"issue":"5","key":"9975_CR153","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1109\/72.248452","volume":"4","author":"R Reed","year":"1993","unstructured":"Reed R (1993) Pruning algorithms-a survey. IEEE Trans Neural Netw 4(5):740\u2013747","journal-title":"IEEE Trans Neural Netw"},{"key":"9975_CR154","unstructured":"Ren M, Zeng W, Yang B, Urtasun R (2018) Learning to reweight examples for robust deep learning. In: 35th international conference on machine learning, 1\u201313"},{"issue":"Mar","key":"9975_CR155","first-page":"1371","volume":"3","author":"J Reunanen","year":"2003","unstructured":"Reunanen J (2003) Overfitting in making comparisons between variable selection methods. J Mach Learn Res 3(Mar):1371\u20131382","journal-title":"J Mach Learn Res"},{"issue":"5500","key":"9975_CR156","doi-asserted-by":"publisher","first-page":"2323","DOI":"10.1126\/science.290.5500.2323","volume":"290","author":"ST Roweis","year":"2000","unstructured":"Roweis ST, Saul LK (2000) Nonlinear dimensionality reduction by locally linear embedding. Science 290(5500):2323\u20132326","journal-title":"Science"},{"key":"9975_CR157","doi-asserted-by":"crossref","unstructured":"Sak H, Senior A, Beaufays F (2014) Long short-term memory recurrent neural network architectures for large scale acoustic modeling. In: 15th annual conference of the international speech communication association","DOI":"10.21437\/Interspeech.2014-80"},{"issue":"3","key":"9975_CR158","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1109\/LSP.2017.2657381","volume":"24","author":"J Salamon","year":"2017","unstructured":"Salamon J, Bello JP (2017) Deep convolutional neural networks and data augmentation for environmental sound classification. IEEE Signal Process Lett 24(3):279\u2013283","journal-title":"IEEE Signal Process Lett"},{"key":"9975_CR159","unstructured":"Salimans T, Kingma DP (2016) Weight normalization: a simple reparameterization to accelerate training of deep neural networks. In: Advances in neural information processing systems (NIPS), pp 901\u2013909"},{"key":"9975_CR160","unstructured":"Salman S, Liu X (2019) Overfitting mechanism and avoidance in deep neural networks, 1\u20138. arXiv:1901.06566"},{"key":"9975_CR161","doi-asserted-by":"crossref","unstructured":"Sandler M, Howard A, Zhu M, Zhmoginov A, Chen L-C (2018) Mobilenetv2: inverted residuals and linear bottlenecks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4510\u20134520","DOI":"10.1109\/CVPR.2018.00474"},{"key":"9975_CR162","unstructured":"Sarle WS (1995) Stopped training and other remedies for overfitting. In: Proceedings of the 27th symposium on the interface of computing science and statistics, pp 352\u2013360"},{"key":"9975_CR163","doi-asserted-by":"crossref","unstructured":"Schaffer JD, Whitley D, Eshelman LJ (1992) Combinations of genetic algorithms and neural networks: a survey of the state of the art. In: International workshop on combinations of genetic algorithms and neural networks. IEEE, pp 1\u201337","DOI":"10.1109\/COGANN.1992.273950"},{"issue":"6","key":"9975_CR164","doi-asserted-by":"publisher","first-page":"1796","DOI":"10.1137\/0730091","volume":"30","author":"O Scherzer","year":"1993","unstructured":"Scherzer O, Engl HW, Kunisch K (1993) Optimal a posteriori parameter choice for tikhonov regularization for solving nonlinear ill-posed problems. SIAM J Numer Anal 30(6):1796\u20131838","journal-title":"SIAM J Numer Anal"},{"issue":"3","key":"9975_CR165","doi-asserted-by":"publisher","first-page":"505","DOI":"10.1016\/S0893-6080(96)00086-X","volume":"10","author":"C Schittenkopf","year":"1997","unstructured":"Schittenkopf C, Deco G, Brauer W (1997) Two strategies to avoid overfitting in feedforward networks. Neural Netw 10(3):505\u2013516","journal-title":"Neural Netw"},{"key":"9975_CR166","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1016\/j.neunet.2014.09.003","volume":"61","author":"J Schmidhuber","year":"2015","unstructured":"Schmidhuber J (2015) Deep learning in neural networks: an overview. Neural Netw 61:85\u2013117","journal-title":"Neural Netw"},{"key":"9975_CR167","doi-asserted-by":"crossref","unstructured":"Serra T, Kumar A, Ramalingam S (2020) Lossless compression of deep neural networks, 1\u201314. arXiv:2001.00218","DOI":"10.1007\/978-3-030-58942-4_27"},{"issue":"1","key":"9975_CR168","doi-asserted-by":"publisher","first-page":"185","DOI":"10.1162\/neco.1997.9.1.185","volume":"9","author":"R Setiono","year":"1997","unstructured":"Setiono R (1997) A penalty-function approach for pruning feedforward neural networks. Neural Comput 9(1):185\u2013204","journal-title":"Neural Comput"},{"issue":"12","key":"9975_CR169","doi-asserted-by":"publisher","first-page":"2865","DOI":"10.1162\/089976601317098565","volume":"13","author":"R Setiono","year":"2001","unstructured":"Setiono R (2001) Feedforward neural network construction using cross validation. Neural Comput 13(12):2865\u20132877","journal-title":"Neural Comput"},{"key":"9975_CR170","doi-asserted-by":"crossref","unstructured":"Sharma A, Zaidi A, Singh R, Jain S, Sahoo A (2013) Optimization of svm classifier using firefly algorithm. In: 2013 IEEE second international conference on image information processing (ICIIP-2013). IEEE, pp 198\u2013202","DOI":"10.1109\/ICIIP.2013.6707582"},{"key":"9975_CR171","doi-asserted-by":"crossref","unstructured":"Shekar B, Dagnew G (2020) L1-regulated feature selection and classification of microarray cancer data using deep learning. In: Proceedings of 3rd international conference on computer vision and image processing. Springer, pp 227\u2013242","DOI":"10.1007\/978-981-32-9291-8_19"},{"issue":"4","key":"9975_CR172","doi-asserted-by":"publisher","first-page":"367","DOI":"10.1109\/TC.1968.229395","volume":"100","author":"FW Smith","year":"1968","unstructured":"Smith FW (1968) Pattern classifier design by linear programming. IEEE Trans Comput 100(4):367\u2013372","journal-title":"IEEE Trans Comput"},{"issue":"5","key":"9975_CR173","doi-asserted-by":"publisher","first-page":"757","DOI":"10.1109\/34.6789","volume":"10","author":"SP Smith","year":"1988","unstructured":"Smith SP, Jain AK (1988) A test to determine the multivariate normality of a data set. IEEE Trans Pattern Anal Mach Intell 10(5):757\u2013761","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"9975_CR174","first-page":"2951","volume-title":"Advances in neural information processing systems (NIPS)","author":"J Snoek","year":"2012","unstructured":"Snoek J, Larochelle H, Adams RP (2012) Practical Bayesian optimization of machine learning algorithms. In: Pereira F, Burges CJC, Bottou L, Weinberger KQ (eds) Advances in neural information processing systems (NIPS). Curran Associates Inc, New York, pp 2951\u20132959"},{"key":"9975_CR175","unstructured":"Snoek J, Rippel O, Swersky K, Kiros R, Satish N, Sundaram N, Patwary M, Prabhat M, Adams R (2015) Scalable Bayesian optimization using deep neural networks. In: International conference on machine learning, pp 2171\u20132180"},{"issue":"1","key":"9975_CR176","first-page":"1929","volume":"15","author":"N Srivastava","year":"2014","unstructured":"Srivastava N, Hinton G, Krizhevsky A, Sutskever I, Salakhutdinov R (2014) Dropout: a simple way to prevent neural networks from overfitting. J Mach Learn Res 15(1):1929\u20131958","journal-title":"J Mach Learn Res"},{"issue":"2","key":"9975_CR177","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1162\/106365602320169811","volume":"10","author":"KO Stanley","year":"2002","unstructured":"Stanley KO, Miikkulainen R (2002) Evolving neural networks through augmenting topologies. Evol Comput 10(2):99\u2013127","journal-title":"Evol Comput"},{"key":"9975_CR178","doi-asserted-by":"crossref","unstructured":"Suganuma M, Shirakawa S, Nagao T (2017) A genetic programming approach to designing convolutional neural network architectures. In: Proceedings of the genetic and evolutionary computation conference. ACM, pp 497\u2013504","DOI":"10.1145\/3071178.3071229"},{"key":"9975_CR179","doi-asserted-by":"crossref","unstructured":"Szegedy C, Liu W, Jia Y, Sermanet P, Reed S, Anguelov D, Erhan D, Vanhoucke V, Rabinovich A (2015) Going deeper with convolutions. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1\u20139","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"9975_CR180","unstructured":"Tam E, Dunson D (2020) Fiedler regularization: Learning neural networks with graph sparsity, 1\u201310. arXiv:2003.00992"},{"key":"9975_CR181","first-page":"1","volume":"95","author":"Y Tao","year":"2018","unstructured":"Tao Y, Takagi K, Nakata K (2018) RDEC: integrating regularization into deep embedded clustering for imbalanced datasets. Proc Mach Learn Resear 95:1\u201316","journal-title":"Proc Mach Learn Resear"},{"key":"9975_CR182","doi-asserted-by":"crossref","unstructured":"Taylor L, Nitschke G (2018) Improving deep learning with generic data augmentation. In: IEEE symposium series on computational intelligence (SSCI), pp 1542\u20131547","DOI":"10.1109\/SSCI.2018.8628742"},{"issue":"8","key":"9975_CR183","doi-asserted-by":"publisher","first-page":"2870","DOI":"10.3390\/app10082870","volume":"10","author":"Y Tian","year":"2020","unstructured":"Tian Y, Pan G (2020) An unsupervised regularization and dropout based deep neural network and its application for thermal error prediction. Appl Sci 10(8):2870","journal-title":"Appl Sci"},{"key":"9975_CR184","volume-title":"Methods for solving ill-posed problems","author":"A Tikhonov","year":"1977","unstructured":"Tikhonov A, Arsenin VY (1977) Methods for solving ill-posed problems. Wiley, Hoboken"},{"key":"9975_CR185","doi-asserted-by":"crossref","unstructured":"Tomar VS, Rose RC (2014) Manifold regularized deep neural networks. In: 15th annual conference of the international speech communication association, pp 1\u20135","DOI":"10.21437\/Interspeech.2014-82"},{"issue":"5\u20136","key":"9975_CR186","doi-asserted-by":"publisher","first-page":"507","DOI":"10.1016\/0378-4754(95)00003-8","volume":"40","author":"S Tzafestas","year":"1996","unstructured":"Tzafestas S, Dalianis P, Anthopoulos G (1996) On the overtraining phenomenon of backpropagation neural networks. Math Comput Simul 40(5\u20136):507\u2013521","journal-title":"Math Comput Simul"},{"key":"9975_CR187","doi-asserted-by":"publisher","unstructured":"Vapnik VN, Chervonenkis AY (2015) On the uniform convergence of relative frequencies of events to their probabilities. In: Vovk V, Papadopoulos H, Gammerman A (eds) Measures of complexity. Springer, Cham, pp 11\u201330. https:\/\/doi.org\/10.1007\/978-3-319-21852-6_3","DOI":"10.1007\/978-3-319-21852-6_3"},{"key":"9975_CR188","doi-asserted-by":"publisher","DOI":"10.1007\/0-387-34239-7","volume-title":"Estimation of dependences based on empirical data","author":"V Vapnik","year":"2006","unstructured":"Vapnik V (2006) Estimation of dependences based on empirical data. Springer, Berlin"},{"issue":"2","key":"9975_CR189","doi-asserted-by":"publisher","first-page":"285","DOI":"10.1109\/42.700740","volume":"17","author":"M Vauhkonen","year":"1998","unstructured":"Vauhkonen M, Vadasz D, Karjalainen PA, Somersalo E, Kaipio JP (1998) Tikhonov regularization and prior information in electrical impedance tomography. IEEE Trans Med Imag 17(2):285\u2013293","journal-title":"IEEE Trans Med Imag"},{"key":"9975_CR190","unstructured":"Verma V, Lamb A, Beckham C, Najafi A, Mitliagkas I, Lopez-Paz D, Bengio Y (2019) Manifold mixup: better representations by interpolating hidden states. In: Proceedings of the 36th international conference on machine learning, vol 97. PMLR, pp 6438\u20136447"},{"key":"9975_CR191","unstructured":"Verma V, Qu M, Lamb A, Bengio Y, Kannala J, Tang J (2019) Graphmix: regularized training of graph neural networks for semi-supervised learning, 1\u201316. arXiv:1909.11715"},{"key":"9975_CR192","first-page":"1","volume":"197","author":"T Vu","year":"2019","unstructured":"Vu T, Wen E, Nehoran R (2019) How not to give a FLOP: combining regularization and pruning for efficient inference. Comput Sci Res 197:1\u201314","journal-title":"Comput Sci Res"},{"issue":"7","key":"9975_CR193","doi-asserted-by":"publisher","first-page":"8696","DOI":"10.1016\/j.eswa.2011.01.077","volume":"38","author":"S Wang","year":"2011","unstructured":"Wang S, Li D, Song X, Wei Y, Li H (2011) A feature selection method based on improved fisher\u2019s discriminant ratio for text sentiment classification. Expert Syst Appl 38(7):8696\u20138702","journal-title":"Expert Syst Appl"},{"key":"9975_CR194","unstructured":"Wan L, Zeiler M, Zhang S, Le Cun Y, Fergus R (2013) Regularization of neural networks using dropconnect. In: International conference on machine learning, pp 1058\u20131066"},{"key":"#cr-split#-9975_CR195.1","doi-asserted-by":"crossref","unstructured":"W\u0105sowicz M, Grochowski M, Kulka M, Miko\u0142ajczyk A, Ficek M, Karpie\u0144ko K, Ci\u0107kiewicz M (2017) Computed aided system for separation and classification of the abnormal erythrocytes in human blood. In: Spigulis J","DOI":"10.1117\/12.2297218"},{"key":"#cr-split#-9975_CR195.2","unstructured":"(ed) Biophotonics-Riga 2017, vol 10592. International Society for Optics and Photonics, SPIE, pp 49-55"},{"issue":"3\u20134","key":"9975_CR196","doi-asserted-by":"publisher","first-page":"229","DOI":"10.1007\/BF00992696","volume":"8","author":"RJ Williams","year":"1992","unstructured":"Williams RJ (1992) Simple statistical gradient-following algorithms for connectionist reinforcement learning. Mach Learn 8(3\u20134):229\u2013256","journal-title":"Mach Learn"},{"key":"9975_CR197","volume-title":"Quantum machine learning: what quantum computing means to data mining","author":"P Wittek","year":"2014","unstructured":"Wittek P (2014) Quantum machine learning: what quantum computing means to data mining. Academic Press, Cambridge"},{"key":"9975_CR198","doi-asserted-by":"publisher","first-page":"72","DOI":"10.1016\/j.neunet.2013.11.006","volume":"50","author":"W Wu","year":"2014","unstructured":"Wu W, Fan Q, Zurada JM, Wang J, Yang D, Liu Y (2014) Batch gradient method with smoothing l1\/2 regularization for training of feedforward neural networks. Neural Netw 50:72\u201378","journal-title":"Neural Netw"},{"key":"9975_CR199","doi-asserted-by":"crossref","unstructured":"Wu Y, He K (2018) Group normalization. In: Proceedings of the European conference on computer vision (ECCV), pp 3\u201319","DOI":"10.1007\/978-3-030-01261-8_1"},{"key":"9975_CR200","doi-asserted-by":"crossref","unstructured":"Xu K, Su H, Zhu J, Guan J-S, Zhang B (2016) Neuron segmentation based on CNN with semi-supervised regularization. In: Proceedings of the IEEE conference on computer vision and pattern recognition workshops, pp 20\u201328","DOI":"10.1109\/CVPRW.2016.167"},{"key":"9975_CR201","unstructured":"Xu B, Wang N, Chen T, Li M (2015) Empirical evaluation of rectified activations in convolutional network. In: Deep learning workshop, ICML, pp 1\u20135"},{"issue":"1\u20134","key":"9975_CR202","doi-asserted-by":"publisher","first-page":"219","DOI":"10.1016\/S0925-2312(99)00127-7","volume":"30","author":"JY Yam","year":"2000","unstructured":"Yam JY, Chow TW (2000) A weight initialization method for improving training speed in feedforward neural network. Neurocomputing 30(1\u20134):219\u2013232","journal-title":"Neurocomputing"},{"key":"9975_CR203","doi-asserted-by":"crossref","unstructured":"Yang H, Tang M, Wen W, Yan F, Hu D, Li A, Li H, Chen Y (2020) Learning low-rank deep neural networks via singular vector orthogonality regularization and singular value sparsification. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition workshops, pp 678\u2013679","DOI":"10.1109\/CVPRW50498.2020.00347"},{"key":"9975_CR204","unstructured":"Yang T, Zhu S, Chen C (2020)GradAug: a new regularization method for deep neural networks. arXiv:2006.07989"},{"key":"9975_CR205","doi-asserted-by":"crossref","unstructured":"Yu Y, Gong Z, Zhong P, Shan J (2017) Unsupervised representation learning with deep convolutional neural network for remote sensing images. In: International conference on image and graphics. Springer, pp 97\u2013108","DOI":"10.1007\/978-3-319-71589-6_9"},{"key":"9975_CR206","doi-asserted-by":"crossref","unstructured":"Yu R, Li A, Chen C-F, Lai J-H, Morariu VI, Han X, Gao M, Lin C-Y, Davis LS (2018) Nisp: pruning networks using neuron importance score propagation. In: The IEEE conference on computer vision and pattern recognition (CVPR)","DOI":"10.1109\/CVPR.2018.00958"},{"key":"9975_CR207","doi-asserted-by":"crossref","unstructured":"Zagoruyko S, Komodakis N (2016) Wide residual networks, 1\u201315. arXiv:1605.07146","DOI":"10.5244\/C.30.87"},{"key":"9975_CR208","doi-asserted-by":"publisher","unstructured":"Zemouri R (2017) An evolutionary building algorithm for deep neural networks. In: 12th international workshop on self-organizing maps and learning vector quantization. Clustering and data visualization (WSOM) 2017:1\u20137. https:\/\/doi.org\/10.1109\/WSOM.2017.8020002","DOI":"10.1109\/WSOM.2017.8020002"},{"issue":"8","key":"9975_CR209","doi-asserted-by":"publisher","first-page":"1608","DOI":"10.1109\/TNNLS.2014.2346399","volume":"26","author":"L Zhang","year":"2014","unstructured":"Zhang L, Li K, Bai E-W, Irwin GW (2014) Two-stage orthogonal least squares methods for neural network construction. IEEE Trans Neural Netw Learn Syst 26(8):1608\u20131621","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"9975_CR210","doi-asserted-by":"publisher","first-page":"146","DOI":"10.1016\/j.inffus.2017.10.006","volume":"42","author":"Q Zhang","year":"2018","unstructured":"Zhang Q, Yang LT, Chen Z, Li P (2018) A survey on deep learning for big data. Inf Fusion 42:146\u2013157","journal-title":"Inf Fusion"},{"key":"9975_CR211","first-page":"11393","volume-title":"Advances in neural information processing systems (NIPS)","author":"H Zhao","year":"2019","unstructured":"Zhao H, Tsai Y-HH, Salakhutdinov RR, Gordon GJ (2019) Learning neural networks with adaptive regularization. In: Wallach H, Larochelle H, Beygelzimer A, Alch\u00e9-Buc Fd, Fox E, Garnett R (eds) Advances in neural information processing systems (NIPS). Curran Associates, Inc., New York, pp 11393\u201311404"},{"key":"9975_CR212","doi-asserted-by":"publisher","first-page":"107331","DOI":"10.1016\/j.measurement.2019.107331","volume":"152","author":"M Zhao","year":"2020","unstructured":"Zhao M, Tang B, Deng L, Pecht M (2020) Multiple wavelet regularized deep residual networks for fault diagnosis. Measurement 152:107331","journal-title":"Measurement"},{"key":"9975_CR213","doi-asserted-by":"crossref","unstructured":"Zhong Z, Zheng L, Kang G, Li S, Yang Y (2020) Random erasing data augmentation. In: Proceedings of the AAAI conference on artificial intelligence (AAAI)","DOI":"10.1609\/aaai.v34i07.7000"},{"key":"9975_CR214","doi-asserted-by":"crossref","unstructured":"Zhu X, Zhou W, Li H (2018) Improving deep neural network sparsity through decorrelation regularization. In: Proceedings of the 27th international joint conference on artificial intelligence (IJCAI), pp 3264\u20133270","DOI":"10.24963\/ijcai.2018\/453"},{"key":"9975_CR215","unstructured":"Zoph B, Le QV (2017) Neural architecture search with reinforcement learning. In: 5th international conference on learning representations (ICLR), pp 1\u201316"},{"issue":"2","key":"9975_CR216","doi-asserted-by":"publisher","first-page":"301","DOI":"10.1111\/j.1467-9868.2005.00503.x","volume":"67","author":"H Zou","year":"2005","unstructured":"Zou H, Hastie T (2005) Regularization and variable selection via the elastic net. J R Stat Soc Ser B (Stat Methodol) 67(2):301\u2013320","journal-title":"J R Stat Soc Ser B (Stat Methodol)"}],"container-title":["Artificial Intelligence Review"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10462-021-09975-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10462-021-09975-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10462-021-09975-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,25]],"date-time":"2024-08-25T08:05:59Z","timestamp":1724573159000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10462-021-09975-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,3,3]]},"references-count":217,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2021,12]]}},"alternative-id":["9975"],"URL":"https:\/\/doi.org\/10.1007\/s10462-021-09975-1","relation":{},"ISSN":["0269-2821","1573-7462"],"issn-type":[{"value":"0269-2821","type":"print"},{"value":"1573-7462","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,3,3]]},"assertion":[{"value":"3 March 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}