{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,18]],"date-time":"2026-04-18T05:36:54Z","timestamp":1776490614957,"version":"3.51.2"},"reference-count":41,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2022,10,27]],"date-time":"2022-10-27T00:00:00Z","timestamp":1666828800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,10,27]],"date-time":"2022-10-27T00:00:00Z","timestamp":1666828800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Natural Science Basic Research Plan in Shaanxi Province of China","award":["2021JM-446"],"award-info":[{"award-number":["2021JM-446"]}]},{"name":"The 65th China Postdoctoral Science Foundation","award":["2019M652837"],"award-info":[{"award-number":["2019M652837"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Process Lett"],"published-print":{"date-parts":[[2023,8]]},"DOI":"10.1007\/s11063-022-11069-0","type":"journal-article","created":{"date-parts":[[2022,10,27]],"date-time":"2022-10-27T19:07:51Z","timestamp":1666897671000},"page":"4871-4888","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Convergence of Batch Gradient Method for Training of Pi-Sigma Neural Network with Regularizer and Adaptive Momentum Term"],"prefix":"10.1007","volume":"55","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1017-3496","authenticated-orcid":false,"given":"Qinwei","family":"Fan","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Le","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qian","family":"Kang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Li","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,10,27]]},"reference":[{"key":"11069_CR1","volume-title":"Neural networks and learning machines","author":"S Haykin","year":"2008","unstructured":"Haykin S (2008) Neural networks and learning machines. Prentice-Hall, Upper Saddle River"},{"issue":"1","key":"11069_CR2","doi-asserted-by":"publisher","first-page":"5","DOI":"10.3390\/designs5010005","volume":"5","author":"K Ilias","year":"2021","unstructured":"Ilias K, Michail P (2021) Predictive maintenance using machine learning and data mining: a pioneer method implemented to Greek railways. Designs 5(1):5","journal-title":"Designs"},{"issue":"11","key":"11069_CR3","doi-asserted-by":"publisher","first-page":"8243","DOI":"10.1007\/s00500-019-04506-1","volume":"24","author":"C Kocak","year":"2019","unstructured":"Kocak C et al (2019) A new fuzzy time series method based on an ARMA-type recurrent Pi-Sigma artificial neural network. Soft Comput 24(11):8243\u20138252","journal-title":"Soft Comput"},{"key":"11069_CR4","doi-asserted-by":"publisher","first-page":"350","DOI":"10.1016\/j.engappai.2018.04.017","volume":"72","author":"E Bas","year":"2018","unstructured":"Bas E et al (2018) High order fuzzy time series method based on pi-sigma neural network. Eng Appl Artif Intell 72:350\u2013356","journal-title":"Eng Appl Artif Intell"},{"issue":"12","key":"11069_CR5","doi-asserted-by":"publisher","first-page":"1583","DOI":"10.3390\/pr8121583","volume":"8","author":"T Liu","year":"2020","unstructured":"Liu T, Fan QW, Kang Q et al (2020) Extreme learning machine based on firefly adaptive flower pollination algorithm optimization. Processes 8(12):1583","journal-title":"Processes"},{"key":"11069_CR6","doi-asserted-by":"publisher","first-page":"250","DOI":"10.1016\/j.ins.2016.11.020","volume":"381","author":"J Wang","year":"2017","unstructured":"Wang J, Cai QL et al (2017) Convergence analyses on sparse feedforward neural networks via group lasso regularization. Inf Sci 381:250\u2013269","journal-title":"Inf Sci"},{"key":"11069_CR7","doi-asserted-by":"publisher","DOI":"10.1002\/adts.202200047","author":"QW Fan","year":"2022","unstructured":"Fan QW, Zhang ZW, Huang XD (2022) Parameter conjugate gradient with secant equation based Elman neural network and its convergence analysis. Adv Theory Simul. https:\/\/doi.org\/10.1002\/adts.202200047","journal-title":"Adv Theory Simul"},{"key":"11069_CR8","first-page":"13","volume":"1","author":"Y Shin","year":"1991","unstructured":"Shin Y, Ghosh J (1991) The pi-sigma network: an efficient higher-order neural network for pattern classification and function approximation. Int Jt Conf Neural Netw 1:13\u201318","journal-title":"Int Jt Conf Neural Netw"},{"issue":"1","key":"11069_CR9","first-page":"11","volume":"7","author":"KS Mohamed","year":"2016","unstructured":"Mohamed KS, Habtamu ZA et al (2016) Batch gradient method for training of pi-Sigma neural network with penalty. Int J Artif Intell Appl 7(1):11\u201320","journal-title":"Int J Artif Intell Appl"},{"key":"11069_CR10","doi-asserted-by":"publisher","first-page":"70","DOI":"10.1016\/j.ins.2021.11.044","volume":"585","author":"QW Fan","year":"2022","unstructured":"Fan QW, Kang Q, Zurada JM (2022) Convergence analysis for sigma-pi-sigma neural network based on some relaxed conditions. Inf Sci 585:70\u201388","journal-title":"Inf Sci"},{"issue":"4","key":"11069_CR11","doi-asserted-by":"publisher","first-page":"331","DOI":"10.1023\/A:1016249727555","volume":"17","author":"W Wu","year":"2002","unstructured":"Wu W, Feng G, Li X (2002) Training multilayer perceptrons via minimization of sum of ridge functions. Adv Comput Math 17(4):331\u2013347","journal-title":"Adv Comput Math"},{"issue":"2","key":"11069_CR12","doi-asserted-by":"publisher","first-page":"522","DOI":"10.1109\/TNN.2005.863460","volume":"17","author":"NM Zhang","year":"2006","unstructured":"Zhang NM, Wu W, Zheng GF (2006) Convergence of gradient method with momentum for two-layer feedforward neural networks. IEEE Trans Neural Netw 17(2):522\u20135","journal-title":"IEEE Trans Neural Netw"},{"issue":"3","key":"11069_CR13","doi-asserted-by":"publisher","first-page":"105","DOI":"10.2478\/s13537-013-0109-x","volume":"3","author":"MG Augasta","year":"2013","unstructured":"Augasta MG, Kathirvalavakumar T (2013) Pruning algorithms of neural networks\u2014a comparative study. Open Comput Sci 3(3):105\u2013115","journal-title":"Open Comput Sci"},{"key":"11069_CR14","first-page":"1","volume":"2020","author":"QW Fan","year":"2020","unstructured":"Fan QW, Liu T (2020) Smoothing $$L_0$$ regularization for extreme learning machine. Math Probl Eng 2020:1\u201310","journal-title":"Math Probl Eng"},{"key":"11069_CR15","doi-asserted-by":"publisher","first-page":"260","DOI":"10.1016\/j.neucom.2017.07.051","volume":"273","author":"CY Xu","year":"2018","unstructured":"Xu CY, Yang J et al (2018) SRNN: self-regularized neural network. Neurocomputing 273:260\u2013270","journal-title":"Neurocomputing"},{"issue":"1","key":"11069_CR16","doi-asserted-by":"publisher","first-page":"273","DOI":"10.1109\/72.363426","volume":"6","author":"R Setiono","year":"1995","unstructured":"Setiono R, Hui LCK (1995) Use of a quasi-newton method in a feedforward neural network construction algorithm. Neural Netw IEEE Trans 6(1):273\u2013277","journal-title":"Neural Netw IEEE Trans"},{"issue":"1","key":"11069_CR17","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1016\/S0893-6080(97)00111-1","volume":"11","author":"J Zhang","year":"1998","unstructured":"Zhang J, Morris AJ (1998) A sequential learning approach for single hidden layer neural networks. Neural Netw 11(1):65\u201380","journal-title":"Neural Netw"},{"issue":"3","key":"11069_CR18","doi-asserted-by":"publisher","first-page":"241","DOI":"10.1007\/s11063-011-9196-7","volume":"34","author":"MG Augasta","year":"2011","unstructured":"Augasta MG, Kathirvalavakumar T (2011) A novel pruning algorithm for optimizing feedforward neural network of classification problems. Neural Process Lett 34(3):241\u2013258","journal-title":"Neural Process Lett"},{"issue":"1","key":"11069_CR19","doi-asserted-by":"publisher","first-page":"161","DOI":"10.1007\/s10100-017-0496-5","volume":"27","author":"R Hrebik","year":"2019","unstructured":"Hrebik R, Kukal J, Jablonsky J (2019) Optimal unions of hidden classes. Cent Eur J Oper Res 27(1):161\u2013177","journal-title":"Cent Eur J Oper Res"},{"key":"11069_CR20","unstructured":"Sabo D, Yu XH (2008) Neural network dimension selection for dynamical system identification. IEEE International Conference on Control Applications. pp 972-977"},{"issue":"1","key":"11069_CR21","doi-asserted-by":"publisher","first-page":"185","DOI":"10.1162\/neco.1997.9.1.185","volume":"9","author":"R Setiono","year":"1997","unstructured":"Setiono R (1997) A penalty-function approach for pruning feedforward neural networks. Neural Comput 9(1):185\u2013204","journal-title":"Neural Comput"},{"key":"11069_CR22","doi-asserted-by":"crossref","unstructured":"Wang J, Wu W, Zurada JM, (2011) Boundedness and convergence of MPN for cyclic and almost cyclic learning with penalty. Proceedings IEEE International Joint Conference on Neural Networks (IJCNN), pp 125\u2013132","DOI":"10.1109\/IJCNN.2011.6033210"},{"issue":"6","key":"11069_CR23","doi-asserted-by":"publisher","first-page":"1050","DOI":"10.1109\/TNN.2009.2020848","volume":"20","author":"H Zhang","year":"2009","unstructured":"Zhang H, Wu W, Liu F, Yao M (2009) Boundedness and convergence of online gradient method with penalty for feedforward neural networks. Neural Netw IEEE Trans 20(6):1050\u20131054","journal-title":"Neural Netw IEEE Trans"},{"key":"11069_CR24","unstructured":"Huynh TQ, Setiono R (2005) Effective neural network pruning using cross-validation. Proceedings IEEE international joint conference on neural networks(IJCNN). pp 972\u2013977"},{"issue":"2","key":"11069_CR25","doi-asserted-by":"publisher","first-page":"207","DOI":"10.1016\/0925-2312(94)90055-8","volume":"6","author":"M Hagiwara","year":"1994","unstructured":"Hagiwara M (1994) A simple and effective method for removal of hidden units and weights. Neurocomputing 6(2):207\u2013218","journal-title":"Neurocomputing"},{"issue":"3","key":"11069_CR26","doi-asserted-by":"publisher","first-page":"347","DOI":"10.1016\/0167-8191(90)90086-O","volume":"14","author":"D Whitley","year":"1990","unstructured":"Whitley D, Starkweather T, Bogart C (1990) Genetic algorithms and neural networks: optimizing connections and connectivity. Parallel Comput 14(3):347\u2013361","journal-title":"Parallel Comput"},{"key":"11069_CR27","doi-asserted-by":"crossref","unstructured":"Fletcher L, Katkovnik V, Steffens FE, Engelbrecht AP (1998) Optimizing the number of hidden nodes of a feedforward artificial neural network. Proceedings IEEE world congress on computational intelligence. The international joint conference on neural networks, pp 1608\u20131612","DOI":"10.1109\/IJCNN.1998.686018"},{"issue":"2","key":"11069_CR28","doi-asserted-by":"publisher","first-page":"111","DOI":"10.1016\/0925-2312(94)E0053-T","volume":"7","author":"LM Belue","year":"1995","unstructured":"Belue LM, Bauer KW (1995) Determining input features for multilayer perceptrons. Neurocomputing 7(2):111\u2013121","journal-title":"Neurocomputing"},{"key":"11069_CR29","doi-asserted-by":"publisher","first-page":"28742","DOI":"10.1109\/ACCESS.2020.3048235","volume":"9","author":"QW Fan","year":"2021","unstructured":"Fan QW, Peng J, Li H, Lin S (2021) Convergence of a gradient-based learning algorithm with penalty for Ridge Polynomial Neural Networks. IEEE Access 9:28742\u201328752","journal-title":"IEEE Access"},{"issue":"1","key":"11069_CR30","doi-asserted-by":"publisher","first-page":"659","DOI":"10.1109\/TKDE.2019.2893266","volume":"32","author":"H Zhang","year":"2020","unstructured":"Zhang H, Wang J, Sun Z et al (2020) Feature selection for neural networks using group Lasso regularization. IEEE Trans Knowl Data Eng 32(1):659\u2013673","journal-title":"IEEE Trans Knowl Data Eng"},{"issue":"1","key":"11069_CR31","doi-asserted-by":"publisher","first-page":"71","DOI":"10.1016\/S0925-2312(00)00314-3","volume":"37","author":"SM Loone","year":"2001","unstructured":"Loone SM, Irwin G (2001) Improving neural network training solutions using regularisation. Neurocomputing 37(1):71\u201390","journal-title":"Neurocomputing"},{"issue":"7","key":"11069_CR32","doi-asserted-by":"publisher","first-page":"1013","DOI":"10.1109\/TNNLS.2012.2197412","volume":"23","author":"ZB Xu","year":"2012","unstructured":"Xu ZB, Zhang H et al (2012) $$L_{1\/2}$$ regularization: a thresholding representation theory and a fast solver. IEEE Trans Netw Learn Syst 23(7):1013\u20131027","journal-title":"IEEE Trans Netw Learn Syst"},{"key":"11069_CR33","doi-asserted-by":"publisher","first-page":"191482","DOI":"10.1109\/ACCESS.2020.3031647","volume":"8","author":"QW Fan","year":"2020","unstructured":"Fan QW, Niu L, Kang Q (2020) Regression and multiclass classification using sparse extreme learning machine via smoothing group $$L_{1\/2}$$ regularizer. IEEE Access 8:191482\u2013191494","journal-title":"IEEE Access"},{"issue":"6","key":"11069_CR34","doi-asserted-by":"publisher","first-page":"577","DOI":"10.14311\/NNW.2017.27.032","volume":"27","author":"KS Mohamed","year":"2017","unstructured":"Mohamed KS, Wu W et al (2017) A modified higher-order feed forward neural network with smoothing regularization. Neural Netw World 27(6):577\u2013592","journal-title":"Neural Netw World"},{"key":"11069_CR35","doi-asserted-by":"publisher","unstructured":"Zhou L, Fan QW, Huang XD, Liu Y (2022) Weak and strong convergence analysis of Elman neural networks via weight decay regularization. Optimization, pp 1-24. https:\/\/doi.org\/10.1080\/02331934.2022.2057852.","DOI":"10.1080\/02331934.2022.2057852."},{"issue":"1","key":"11069_CR36","doi-asserted-by":"crossref","first-page":"267","DOI":"10.1111\/j.2517-6161.1996.tb02080.x","volume":"58","author":"R Tibshirani","year":"1996","unstructured":"Tibshirani R (1996) Regression shrinkage and selection via the lasso. J R Stat Soc Ser B (Methodological) 58(1):267\u2013288","journal-title":"J R Stat Soc Ser B (Methodological)"},{"issue":"476","key":"11069_CR37","doi-asserted-by":"publisher","first-page":"1418","DOI":"10.1198\/016214506000000735","volume":"101","author":"H Zou","year":"2006","unstructured":"Zou H (2006) The adaptive lasso and its oracle properties. J Am Stat Assoc 101(476):1418\u20131429","journal-title":"J Am Stat Assoc"},{"issue":"1","key":"11069_CR38","doi-asserted-by":"publisher","first-page":"49","DOI":"10.1111\/j.1467-9868.2005.00532.x","volume":"68","author":"M Yuan","year":"2006","unstructured":"Yuan M, Lin Y (2006) Model selection and estimation in regression with grouped variables. J R Stat Soc 68(1):49\u201367","journal-title":"J R Stat Soc"},{"key":"11069_CR39","unstructured":"Friedman J, Hastie T, Tibshirani R (2010) A note on the group lasso and a sparse group lasso, Statistics"},{"issue":"2","key":"11069_CR40","doi-asserted-by":"publisher","first-page":"231","DOI":"10.1080\/10618600.2012.681250","volume":"22","author":"S Noah","year":"2013","unstructured":"Noah S, Friedman J, Hastie T, Tibshirani R (2013) A sparse group lasso. J Comput Graph Stat 22(2):231\u2013245","journal-title":"J Comput Graph Stat"},{"key":"11069_CR41","doi-asserted-by":"publisher","first-page":"66","DOI":"10.1016\/j.ins.2020.12.014","volume":"553","author":"Q Kang","year":"2021","unstructured":"Kang Q, Fan QW, Zurada JM (2021) Deterministic convergence analysis via smoothing group lasso regularization and adaptive momentum for sigma-pi-sigma neural network. Inf Sci 553:66\u201382","journal-title":"Inf Sci"}],"container-title":["Neural Processing Letters"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11063-022-11069-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11063-022-11069-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11063-022-11069-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,6]],"date-time":"2024-10-06T18:24:35Z","timestamp":1728239075000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11063-022-11069-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,27]]},"references-count":41,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2023,8]]}},"alternative-id":["11069"],"URL":"https:\/\/doi.org\/10.1007\/s11063-022-11069-0","relation":{},"ISSN":["1370-4621","1573-773X"],"issn-type":[{"value":"1370-4621","type":"print"},{"value":"1573-773X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,10,27]]},"assertion":[{"value":"16 October 2022","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 October 2022","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}