{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,18]],"date-time":"2026-06-18T18:47:49Z","timestamp":1781808469553,"version":"3.54.5"},"reference-count":91,"publisher":"Springer Science and Business Media LLC","issue":"34","license":[{"start":{"date-parts":[[2023,9,27]],"date-time":"2023-09-27T00:00:00Z","timestamp":1695772800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,9,27]],"date-time":"2023-09-27T00:00:00Z","timestamp":1695772800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2023,12]]},"DOI":"10.1007\/s00521-023-09035-5","type":"journal-article","created":{"date-parts":[[2023,9,27]],"date-time":"2023-09-27T14:03:37Z","timestamp":1695823417000},"page":"24259-24281","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["Gish: a novel activation function for image classification"],"prefix":"10.1007","volume":"35","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8416-6520","authenticated-orcid":false,"given":"Mustafa","family":"Kaytan","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8037-8625","authenticated-orcid":false,"given":"\u0130brahim Berkan","family":"Aydilek","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6106-2374","authenticated-orcid":false,"given":"Celaleddin","family":"Yero\u011flu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2023,9,27]]},"reference":[{"key":"9035_CR1","doi-asserted-by":"publisher","first-page":"420","DOI":"10.1007\/s42979-021-00815-1","volume":"2","author":"IH Sarker","year":"2021","unstructured":"Sarker IH (2021) Deep learning: a comprehensive overview on techniques, taxonomy, applications and research directions. SN Comput Sci 2:420. https:\/\/doi.org\/10.1007\/s42979-021-00815-1","journal-title":"SN Comput Sci"},{"key":"9035_CR2","doi-asserted-by":"publisher","first-page":"114805","DOI":"10.1016\/j.eswa.2021.114805","volume":"174","author":"S Kili\u00e7arslan","year":"2021","unstructured":"Kili\u00e7arslan S, Celik M (2021) RSigELU: a nonlinear activation function for deep neural networks. Expert Syst Appl 174:114805. https:\/\/doi.org\/10.1016\/j.eswa.2021.114805","journal-title":"Expert Syst Appl"},{"issue":"6","key":"9035_CR3","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1145\/3065386","volume":"60","author":"A Krizhevsky","year":"2017","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2017) ImageNet classification with deep convolutional neural networks. Commun ACM 60(6):84\u201390. https:\/\/doi.org\/10.1145\/3065386","journal-title":"Commun ACM"},{"issue":"3","key":"9035_CR4","doi-asserted-by":"publisher","first-page":"810","DOI":"10.3390\/en15030810","volume":"15","author":"O Jogunola","year":"2022","unstructured":"Jogunola O, Adebisi B, Hoang KV, Tsado Y, Popoola SI, Hammoudeh M, Nawaz R (2022) CBLSTM-AE: a hybrid deep learning framework for predicting energy consumption. Energies 15(3):810. https:\/\/doi.org\/10.3390\/en15030810","journal-title":"Energies"},{"key":"9035_CR5","doi-asserted-by":"publisher","first-page":"103085","DOI":"10.1016\/j.bspc.2021.103085","volume":"71","author":"\u00d6F Ertu\u011frul","year":"2022","unstructured":"Ertu\u011frul \u00d6F, Ak\u0131l MF (2022) Detecting hemorrhage types and bounding box of hemorrhage by deep learning. Biomed Signal Process Control 71:103085. https:\/\/doi.org\/10.1016\/j.bspc.2021.103085","journal-title":"Biomed Signal Process Control"},{"key":"9035_CR6","doi-asserted-by":"publisher","first-page":"114534","DOI":"10.1016\/j.eswa.2020.114534","volume":"171","author":"Y Zhou","year":"2021","unstructured":"Zhou Y, Li D, Huo S, Kung S-Y (2021) Shape autotuning activation function. Expert Syst Appl 171:114534. https:\/\/doi.org\/10.1016\/j.eswa.2020.114534","journal-title":"Expert Syst Appl"},{"key":"9035_CR7","doi-asserted-by":"publisher","first-page":"113977","DOI":"10.1016\/j.eswa.2020.113977","volume":"164","author":"Y Ko\u00e7ak","year":"2021","unstructured":"Ko\u00e7ak Y, \u015eiray G\u00dc (2021) New activation functions for single layer feedforward neural network. Expert Syst Appl 164:113977. https:\/\/doi.org\/10.1016\/j.eswa.2020.113977","journal-title":"Expert Syst Appl"},{"issue":"1","key":"9035_CR8","doi-asserted-by":"publisher","first-page":"323","DOI":"10.3390\/s22010323","volume":"22","author":"IU Khan","year":"2022","unstructured":"Khan IU, Afzal S, Lee JW (2022) Human activity recognition via hybrid deep learning based model. Sensors 22(1):323. https:\/\/doi.org\/10.3390\/s22010323","journal-title":"Sensors"},{"key":"9035_CR9","doi-asserted-by":"publisher","DOI":"10.1016\/j.mlwa.2021.100112","volume":"6","author":"L Parisi","year":"2021","unstructured":"Parisi L, Ma R, RaviChandran N, Lanzillotta M (2021) hyper-sinh: an accurate and reliable function from shallow to deep learning in TensorFlow and Keras. Mach Learn Appl 6:100112. https:\/\/doi.org\/10.1016\/j.mlwa.2021.100112","journal-title":"Mach Learn Appl"},{"key":"9035_CR10","doi-asserted-by":"publisher","first-page":"16283","DOI":"10.1109\/access.2022.3147519","volume":"10","author":"K Yousaf","year":"2022","unstructured":"Yousaf K, Nawaz T (2022) A deep learning-based approach for inappropriate content detection and classification of youtube videos. IEEE Access 10:16283\u201316298. https:\/\/doi.org\/10.1109\/access.2022.3147519","journal-title":"IEEE Access"},{"key":"9035_CR11","doi-asserted-by":"publisher","first-page":"3835","DOI":"10.1109\/TIP.2020.2965299","volume":"29","author":"C Dhiman","year":"2020","unstructured":"Dhiman C, Vishwakarma DK (2020) View-invariant deep architecture for human action recognition using two-stream motion and shape temporal dynamics. IEEE Trans Image Process 29:3835\u20133844. https:\/\/doi.org\/10.1109\/TIP.2020.2965299","journal-title":"IEEE Trans Image Process"},{"key":"9035_CR12","doi-asserted-by":"publisher","unstructured":"Alwassel H, Giancola S, Ghanem B (2021) TSP: temporally-sensitive pretraining of video encoders for localization tasks. In: IEEE\/CVF international conference on computer vision workshops (ICCVW). Montreal, BC, Canada, pp 3166\u20133176. https:\/\/doi.org\/10.1109\/ICCVW54120.2021.00356","DOI":"10.1109\/ICCVW54120.2021.00356"},{"issue":"3","key":"9035_CR13","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3441628","volume":"17","author":"C Dhiman","year":"2021","unstructured":"Dhiman C, Vishwakarma DK, Agarwal P (2021) Part-wise spatio-temporal attention driven CNN-based 3D human action recognition. ACM Trans Multimed Comput Commun Appl 17(3):1\u201324. https:\/\/doi.org\/10.1145\/3441628","journal-title":"ACM Trans Multimed Comput Commun Appl"},{"key":"9035_CR14","doi-asserted-by":"publisher","first-page":"227","DOI":"10.1007\/s11263-008-0168-y","volume":"81","author":"S Klein","year":"2009","unstructured":"Klein S, Pluim JPW, Staring M, Viergever MA (2009) Adaptive stochastic gradient descent optimisation for image registration. Int J Comput Vision 81:227\u2013239. https:\/\/doi.org\/10.1007\/s11263-008-0168-y","journal-title":"Int J Comput Vision"},{"issue":"1","key":"9035_CR15","doi-asserted-by":"publisher","first-page":"224","DOI":"10.1109\/72.655045","volume":"9","author":"GB Huang","year":"1998","unstructured":"Huang GB, Babri HA (1998) Upper bounds on the number of hidden neurons in feedforward networks with arbitrary bounded nonlinear activation functions. IEEE Trans Neural Netw 9(1):224\u2013229. https:\/\/doi.org\/10.1109\/72.655045","journal-title":"IEEE Trans Neural Netw"},{"issue":"1","key":"9035_CR16","doi-asserted-by":"publisher","first-page":"18757","DOI":"10.1038\/s41598-021-96723-8","volume":"11","author":"B Yuen","year":"2021","unstructured":"Yuen B, Hoang MT, Dong X, Lu T (2021) Universal activation function for machine learning. Sci Rep 11(1):18757. https:\/\/doi.org\/10.1038\/s41598-021-96723-8","journal-title":"Sci Rep"},{"issue":"2","key":"9035_CR17","doi-asserted-by":"publisher","first-page":"133","DOI":"10.1038\/nn.2735","volume":"14","author":"E Marder","year":"2011","unstructured":"Marder E, Taylor AL (2011) Multiple models to capture the variability in biological neurons and networks. Nat Neurosci 14(2):133\u2013138. https:\/\/doi.org\/10.1038\/nn.2735","journal-title":"Nat Neurosci"},{"key":"9035_CR18","doi-asserted-by":"publisher","unstructured":"Matsuda S (2016) BPSpike: a backpropagation learning for all parameters in spiking neural networks with multiple layers and multiple spikes. In: IEEE international joint conference on neural networks (IJCNN). Vancouver, BC, Canada, pp 293\u2013298. https:\/\/doi.org\/10.1109\/IJCNN.2016.7727211","DOI":"10.1109\/IJCNN.2016.7727211"},{"key":"9035_CR19","doi-asserted-by":"publisher","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: IEEE conference on computer vision and pattern recognition (CVPR). Las Vegas, NV, USA, pp 770\u2013778. https:\/\/doi.org\/10.1109\/CVPR.2016.90","DOI":"10.1109\/CVPR.2016.90"},{"key":"9035_CR20","doi-asserted-by":"publisher","first-page":"71","DOI":"10.1016\/j.neucom.2020.09.050","volume":"423","author":"X Liang","year":"2021","unstructured":"Liang X, Xu J (2021) Biased ReLU neural networks. Neurocomputing 423:71\u201379. https:\/\/doi.org\/10.1016\/j.neucom.2020.09.050","journal-title":"Neurocomputing"},{"key":"9035_CR21","doi-asserted-by":"publisher","first-page":"195","DOI":"10.1007\/3-540-59497-3_175","volume-title":"From natural artificial neural computation international workshop on artificial neural networks (IWANN)","author":"J Han","year":"1995","unstructured":"Han J, Moraga C (1995) The influence of the sigmoid function parameters on the speed of backpropagation learning. In: Mira J, Sandoval F (eds) From natural artificial neural computation international workshop on artificial neural networks (IWANN). Springer, Heidelberg, pp 195\u2013201. https:\/\/doi.org\/10.1007\/3-540-59497-3_175"},{"issue":"24","key":"9035_CR22","doi-asserted-by":"publisher","first-page":"2300023","DOI":"10.1002\/adma.202300023","volume":"35","author":"J Oh","year":"2023","unstructured":"Oh J, Kim S, Lee C, Cha J-H, Yang SY, Im SG, Park C, Jang BC, Choi S-Y (2023) Preventing vanishing gradient problem of hardware neuromorphic system by implementing imidazole-based memristive ReLU activation neuron. Adv Mater 35(24):2300023. https:\/\/doi.org\/10.1002\/adma.202300023","journal-title":"Adv Mater"},{"issue":"15","key":"9035_CR23","doi-asserted-by":"publisher","first-page":"2167","DOI":"10.1021\/ac00063a042","volume":"65","author":"PB Harrington","year":"1993","unstructured":"Harrington PB (1993) Sigmoid transfer functions in backpropagation neural networks. Anal Chem 65(15):2167\u20132168. https:\/\/doi.org\/10.1021\/ac00063a042","journal-title":"Anal Chem"},{"issue":"3","key":"9035_CR24","doi-asserted-by":"publisher","first-page":"621","DOI":"10.1162\/089976603321192103","volume":"15","author":"RHR Hahnloser","year":"2003","unstructured":"Hahnloser RHR, Seung HS, Slotine J-J (2003) Permitted and forbidden sets in symmetric threshold-linear networks. Neural Comput 15(3):621\u2013638. https:\/\/doi.org\/10.1162\/089976603321192103","journal-title":"Neural Comput"},{"key":"9035_CR25","doi-asserted-by":"publisher","unstructured":"Nair V, Hinton GE (2010) Rectified linear units improve restricted boltzmann machines. In: Proceedings of the 27th international conference on machine learning (ICML). Omnipress, Madison, WI, USA, pp 807\u2013814. https:\/\/doi.org\/10.5555\/3104322.3104425","DOI":"10.5555\/3104322.3104425"},{"key":"9035_CR26","doi-asserted-by":"publisher","unstructured":"Courbariaux M, Bengio Y, David J-P (2015) BinaryConnect: training deep neural networks with binary weights during propagations. In: Proceedings of the 28th international conference on neural information processing systems (NIPS). MIT Press, Cambridge, MA, USA, 2:3123\u20133131. https:\/\/doi.org\/10.5555\/2969442.2969588","DOI":"10.5555\/2969442.2969588"},{"key":"9035_CR27","doi-asserted-by":"publisher","unstructured":"Gulcehre C, Moczulski M, Denil M, Bengio Y (2016) Noisy activation functions. arXiv preprint arXiv:1603.00391v3. https:\/\/doi.org\/10.48550\/arXiv.1603.00391","DOI":"10.48550\/arXiv.1603.00391"},{"key":"9035_CR28","unstructured":"Maas AL, Hannun AY, Ng AY (2013) Rectifier nonlinearities improve neural network acoustic models. In: Proceedings of the 30th international conference on machine learning (ICML) vol 28, Atlanta, Georgia, USA. https:\/\/ai.stanford.edu\/~amaas\/papers\/relu_hybrid_icml2013_final.pdf"},{"key":"9035_CR29","doi-asserted-by":"publisher","unstructured":"Clevert D-A, Unterthiner T, Hochreiter S (2016) Fast and Accurate deep network learning by exponential linear units (ELUs). arXiv preprint arXiv:1511.07289. https:\/\/doi.org\/10.48550\/arXiv.1511.07289","DOI":"10.48550\/arXiv.1511.07289"},{"key":"9035_CR30","doi-asserted-by":"publisher","first-page":"88","DOI":"10.1016\/j.neucom.2019.07.017","volume":"363","author":"X Wang","year":"2019","unstructured":"Wang X, Qin Y, Wang Y, Xiang S, Chen H (2019) ReLTanh: an activation function with vanishing gradient resistance for SAE-based DNNs and its application to rotating machinery fault diagnosis. Neurocomputing 363:88\u201398. https:\/\/doi.org\/10.1016\/j.neucom.2019.07.017","journal-title":"Neurocomputing"},{"key":"9035_CR31","doi-asserted-by":"publisher","first-page":"92","DOI":"10.1016\/j.neucom.2022.06.111","volume":"503","author":"SR Dubey","year":"2022","unstructured":"Dubey SR, Singh SK, Chaudhuri BB (2022) Activation functions in deep learning: a comprehensive survey and benchmark. Neurocomputing 503:92\u2013108. https:\/\/doi.org\/10.1016\/j.neucom.2022.06.111","journal-title":"Neurocomputing"},{"issue":"5","key":"9035_CR32","doi-asserted-by":"publisher","first-page":"3814","DOI":"10.1109\/tie.2018.2856205","volume":"66","author":"Y Qin","year":"2019","unstructured":"Qin Y, Wang X, Zou J (2019) The optimized deep belief networks with improved logistic sigmoid units and their application in fault diagnosis for planetary gearboxes of wind turbines. IEEE Trans Ind Electron 66(5):3814\u20133824. https:\/\/doi.org\/10.1109\/tie.2018.2856205","journal-title":"IEEE Trans Ind Electron"},{"key":"9035_CR33","doi-asserted-by":"publisher","first-page":"106608","DOI":"10.1016\/j.ymssp.2019.106608","volume":"138","author":"Z Ren","year":"2020","unstructured":"Ren Z, Zhu Y, Yan K, Chen K, Kang W, Yue Y, Gao D (2020) A novel model with the ability of few-shot learning and quick updating for intelligent fault diagnosis. Mech Syst Signal Process 138:106608. https:\/\/doi.org\/10.1016\/j.ymssp.2019.106608","journal-title":"Mech Syst Signal Process"},{"key":"9035_CR34","doi-asserted-by":"publisher","unstructured":"Ramachandran P, Zoph B, Le QV (2017) Searching for activation functions. arXiv preprint arXiv: 1710.05941v2. https:\/\/doi.org\/10.48550\/arXiv.1710.05941","DOI":"10.48550\/arXiv.1710.05941"},{"key":"9035_CR35","doi-asserted-by":"publisher","unstructured":"Athlur S, Saran N, Sivathanu M, Ramjee R, Kwatra N (2022) Varuna: scalable, low-cost training of massive deep learning models. In: Proceedings of the seventeenth european conference on computer systems (EuroSys\u201922). Association for computing machinery, New York, NY, USA, pp 472\u2013487. https:\/\/doi.org\/10.1145\/3492321.3519584","DOI":"10.1145\/3492321.3519584"},{"key":"9035_CR36","doi-asserted-by":"publisher","first-page":"105031","DOI":"10.1016\/j.compbiomed.2021.105031","volume":"141","author":"I Pacal","year":"2022","unstructured":"Pacal I, Karaman A, Karaboga D, Akay B, Basturk A, Nalbantoglu U, Coskun S (2022) An efficient real-time colonic polyp detection with YOLO algorithms trained by using negative samples and large datasets. Comput Biol Med 141:105031. https:\/\/doi.org\/10.1016\/j.compbiomed.2021.105031","journal-title":"Comput Biol Med"},{"key":"9035_CR37","doi-asserted-by":"publisher","unstructured":"Sendjasni A, Traparic D, Larabi M-C (2022) Investigating normalization methods for CNN-based image quality assessment. In: IEEE international conference on image processing (ICIP). Bordeaux, France, pp 4113\u20134117. https:\/\/doi.org\/10.1109\/ICIP46576.2022.9897268","DOI":"10.1109\/ICIP46576.2022.9897268"},{"key":"9035_CR38","doi-asserted-by":"publisher","unstructured":"Misra D (2019) Mish: A self regularized non-monotonic activation function. arXiv preprint arXiv:arXiv:1908.08681v3. https:\/\/doi.org\/10.48550\/arXiv.1908.08681","DOI":"10.48550\/arXiv.1908.08681"},{"key":"9035_CR39","doi-asserted-by":"publisher","first-page":"490","DOI":"10.1016\/j.neucom.2021.06.067","volume":"458","author":"H Zhu","year":"2021","unstructured":"Zhu H, Zeng H, Liu J, Zhang X (2021) Logish: a new nonlinear nonmonotonic activation function for convolutional neural network. Neurocomputing 458:490\u2013499. https:\/\/doi.org\/10.1016\/j.neucom.2021.06.067","journal-title":"Neurocomputing"},{"key":"9035_CR40","doi-asserted-by":"publisher","first-page":"155","DOI":"10.1016\/j.neunet.2022.01.012","volume":"148","author":"T Sun","year":"2022","unstructured":"Sun T, Ding S, Guo L (2022) Low-degree term first in ResNet, its variants and the whole neural network family. Neural Netw 148:155\u2013165. https:\/\/doi.org\/10.1016\/j.neunet.2022.01.012","journal-title":"Neural Netw"},{"issue":"9","key":"9035_CR41","doi-asserted-by":"publisher","first-page":"4232","DOI":"10.3390\/app12094232","volume":"12","author":"L Yin","year":"2022","unstructured":"Yin L, Hong P, Zheng G, Chen H, Deng W (2022) A novel image recognition method based on DenseNet and DPRN. Appl Sci 12(9):4232. https:\/\/doi.org\/10.3390\/app12094232","journal-title":"Appl Sci"},{"issue":"4","key":"9035_CR42","doi-asserted-by":"publisher","first-page":"540","DOI":"10.3390\/electronics11040540","volume":"11","author":"X Wang","year":"2022","unstructured":"Wang X, Ren H, Wang A (2022) Smish: a novel activation function for deep learning methods. Electronics 11(4):540. https:\/\/doi.org\/10.3390\/electronics11040540","journal-title":"Electronics"},{"issue":"1","key":"9035_CR43","doi-asserted-by":"publisher","first-page":"100620","DOI":"10.1016\/j.disopt.2020.100620","volume":"44","author":"D Boob","year":"2022","unstructured":"Boob D, Dey SS, Lan G (2022) Complexity of training ReLU neural network. Discret Optim 44(1):100620. https:\/\/doi.org\/10.1016\/j.disopt.2020.100620","journal-title":"Discret Optim"},{"key":"9035_CR44","doi-asserted-by":"publisher","unstructured":"Sharma O (2022) Exploring the statistical properties and developing a non-linear activation function. In: IEEE international conference on automation, computing and renewable systems (ICACRS) pp 1370\u20131375, Pudukkottai, India. https:\/\/doi.org\/10.1109\/ICACRS55517.2022.10029124","DOI":"10.1109\/ICACRS55517.2022.10029124"},{"issue":"2","key":"9035_CR45","doi-asserted-by":"publisher","first-page":"283","DOI":"10.3390\/math10020283","volume":"10","author":"M Asghari","year":"2022","unstructured":"Asghari M, Fathollahi-Fard AM, Mirzapour Al-e-hashem SMJ, Dulebenets MA (2022) Transformation and linearization techniques in optimization: a state-of-the-art survey. Mathematics 10(2):283. https:\/\/doi.org\/10.3390\/math10020283","journal-title":"Mathematics"},{"issue":"3","key":"9035_CR46","doi-asserted-by":"publisher","first-page":"682","DOI":"10.3390\/math11030682","volume":"11","author":"Y Tian","year":"2023","unstructured":"Tian Y, Zhang Y, Zhang H (2023) Recent advances in stochastic gradient descent in deep learning. Mathematics 11(3):682. https:\/\/doi.org\/10.3390\/math11030682","journal-title":"Mathematics"},{"issue":"10","key":"9035_CR47","doi-asserted-by":"publisher","first-page":"7021","DOI":"10.1016\/j.jfranklin.2023.05.007","volume":"360","author":"Z Tan","year":"2023","unstructured":"Tan Z, Chen H (2023) Nonlinear function activated GNN versus ZNN for online solution of general linear matrix equations. J Franklin Inst 360(10):7021\u20137036. https:\/\/doi.org\/10.1016\/j.jfranklin.2023.05.007","journal-title":"J Franklin Inst"},{"key":"9035_CR48","doi-asserted-by":"publisher","unstructured":"Kurtz M, Kopinsky J, Gelashvili R, Matveev A, Carr J, Goin M, Leiserson W, Moore S, Nell B, Shavit N, Alistarh D (2020) Inducing and exploiting activation sparsity for fast neural network inference. In: Proceedings of the 37th international conference on machine learning (ICML). https:\/\/doi.org\/10.5555\/3524938.3525451","DOI":"10.5555\/3524938.3525451"},{"key":"9035_CR49","unstructured":"SciPy User Guide (2022) Gumbel left-skewed (for minimum order statistic) distribution\u2014SciPy v1.7.1 Manual. The SciPy community. https:\/\/docs.scipy.org\/doc\/scipy\/tutorial\/stats\/continuous_gumbel_l.html. Accessed 02 Jan 2022"},{"issue":"3","key":"9035_CR50","doi-asserted-by":"publisher","first-page":"320","DOI":"10.1016\/j.icte.2021.12.012","volume":"9","author":"I Jahan","year":"2023","unstructured":"Jahan I, Ahmed MF, Ali MO, Jang YM (2023) Self-gated rectified linear unit for performance improvement of deep neural networks. ICT Express 9(3):320\u2013325. https:\/\/doi.org\/10.1016\/j.icte.2021.12.012","journal-title":"ICT Express"},{"key":"9035_CR51","doi-asserted-by":"publisher","unstructured":"Sun Y (2021) The role of activation function in image classification. In: International Conference on communications, information system and computer engineering (CISCE), Beijing, China pp 275\u2013278. https:\/\/doi.org\/10.1109\/CISCE52179.2021.9445868","DOI":"10.1109\/CISCE52179.2021.9445868"},{"key":"9035_CR52","doi-asserted-by":"publisher","unstructured":"Mercioni MA, Tat AM, Holban S (2020) Improving the Accuracy of deep neural networks through developing new activation functions. In: IEEE 16th international conference on intelligent computer communication and processing (ICCP), pp 385\u2013391. https:\/\/doi.org\/10.1109\/ICCP51029.2020.9266162","DOI":"10.1109\/ICCP51029.2020.9266162"},{"key":"9035_CR53","doi-asserted-by":"publisher","unstructured":"Hendrycks D, Gimpel K (2016) Gaussian error linear units (GELUs). arXiv preprint arXiv:1606.08415v5. https:\/\/doi.org\/10.48550\/arXiv.1606.08415","DOI":"10.48550\/arXiv.1606.08415"},{"key":"9035_CR54","doi-asserted-by":"publisher","first-page":"103076","DOI":"10.1016\/j.earscirev.2019.103076","volume":"201","author":"AP Piotrowski","year":"2020","unstructured":"Piotrowski AP, Napiorkowski JJ, Piotrowska AE (2020) Impact of deep learning-based dropout on shallow neural networks applied to stream temperature modelling. Earth Sci Rev 201:103076. https:\/\/doi.org\/10.1016\/j.earscirev.2019.103076","journal-title":"Earth Sci Rev"},{"key":"9035_CR55","doi-asserted-by":"publisher","unstructured":"Krueger D, Maharaj T, Kram\u00e1r J, Pezeshki M, Ballas N, Ke NR, Goyal A, Bengio Y, Courville A, Pal C (2017) Zoneout: regularizing RNNs by randomly preserving hidden activations. arXiv preprint arXiv:1606.01305v4. https:\/\/doi.org\/10.48550\/arXiv.1606.01305","DOI":"10.48550\/arXiv.1606.01305"},{"key":"9035_CR56","unstructured":"Keras: Deep Learning for humans (2022) https:\/\/keras.io\/. Accessed 03 Feb 2022"},{"key":"9035_CR57","unstructured":"TensorFlow (2022) https:\/\/www.tensorflow.org\/. Accessed 14 Mar 2022"},{"key":"9035_CR58","unstructured":"Torch Scientific computing for LuaJIT (2022) http:\/\/torch.ch\/. Accessed 20 Apr 2022"},{"key":"9035_CR59","unstructured":"PyTorch (2022) https:\/\/pytorch.org\/. Accessed 21 May 2022"},{"key":"9035_CR60","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/tgrs.2022.3216732","volume":"60","author":"C Xing","year":"2022","unstructured":"Xing C, Zhao J, Duan C, Wang Z, Wang M (2022) Deep encoder with kernel-wise Taylor series for hyperspectral image classification. IEEE Trans Geosci Remote Sens 60:1\u201313. https:\/\/doi.org\/10.1109\/tgrs.2022.3216732","journal-title":"IEEE Trans Geosci Remote Sens"},{"issue":"10","key":"9035_CR61","doi-asserted-by":"publisher","first-page":"5991","DOI":"10.17762\/turcomat.v12i10.5422","volume":"12","author":"B Mahaboob","year":"2021","unstructured":"Mahaboob B, Venkateswararao P, Kumar PSP, Sarma SVM, Reddy SR, Krishna YH (2021) A review article on mathematical aspects of nonlinear models. Turk J Comput Math Educ (TURCOMAT) 12(10):5991\u20136010. https:\/\/doi.org\/10.17762\/turcomat.v12i10.5422","journal-title":"Turk J Comput Math Educ (TURCOMAT)"},{"key":"9035_CR62","doi-asserted-by":"publisher","first-page":"100743","DOI":"10.1016\/j.jmathb.2019.100743","volume":"57","author":"V Sealey","year":"2020","unstructured":"Sealey V, Infante N, Campbell MP, Bolyard J (2020) The generation and use of graphical examples in calculus classrooms: the case of the mean value theorem. J Math Behav 57:100743. https:\/\/doi.org\/10.1016\/j.jmathb.2019.100743","journal-title":"J Math Behav"},{"issue":"1","key":"9035_CR63","doi-asserted-by":"publisher","first-page":"128","DOI":"10.1109\/tc.2020.2986970","volume":"70","author":"V Vyas","year":"2021","unstructured":"Vyas V, Jiang-Wei L, Zhou P, Hu X, Friedman JS (2021) Karnaugh map method for memristive and spintronic asymmetric basis logic functions. IEEE Trans Comput 70(1):128\u2013138. https:\/\/doi.org\/10.1109\/tc.2020.2986970","journal-title":"IEEE Trans Comput"},{"issue":"10","key":"9035_CR64","doi-asserted-by":"publisher","first-page":"1974","DOI":"10.1109\/tcad.2018.2871198","volume":"38","author":"T Yang","year":"2019","unstructured":"Yang T, Wei Y, Tu Z, Zeng H, Kinsy MA, Zheng N, Ren P (2019) Design space exploration of neural network activation function circuits. IEEE Trans Comput Aided Des Integr Circuits Syst 38(10):1974\u20131978. https:\/\/doi.org\/10.1109\/tcad.2018.2871198","journal-title":"IEEE Trans Comput Aided Des Integr Circuits Syst"},{"issue":"5","key":"9035_CR65","doi-asserted-by":"publisher","first-page":"2581","DOI":"10.1109\/tit.2021.3062161","volume":"67","author":"D Elbr\u00e4chter","year":"2021","unstructured":"Elbr\u00e4chter D, Perekrestenko D, Grohs P, B\u00f6lcskei H (2021) Deep neural network approximation theory. IEEE Trans Inf Theory 67(5):2581\u20132623. https:\/\/doi.org\/10.1109\/tit.2021.3062161","journal-title":"IEEE Trans Inf Theory"},{"issue":"2","key":"9035_CR66","doi-asserted-by":"publisher","first-page":"461","DOI":"10.1109\/jas.2020.1003048","volume":"7","author":"AH Khan","year":"2020","unstructured":"Khan AH, Cao X, Li S, Katsikis VN, Liao L (2020) BAS-ADAM: an ADAM based approach to improve the performance of beetle antennae search optimizer. IEEE\/CAA J Autom Sin 7(2):461\u2013471. https:\/\/doi.org\/10.1109\/jas.2020.1003048","journal-title":"IEEE\/CAA J Autom Sin"},{"key":"9035_CR67","doi-asserted-by":"publisher","unstructured":"He K, Zhang X, Ren S, Sun J (2015) Delving deep into rectifiers: surpassing human-level performance on imagenet classification. In: IEEE international conference on computer vision (ICCV) pp 1026\u20131034. https:\/\/doi.org\/10.1109\/ICCV.2015.123","DOI":"10.1109\/ICCV.2015.123"},{"key":"9035_CR68","doi-asserted-by":"publisher","unstructured":"Tan M, Le QV (2019) EfficientNet: rethinking model scaling for convolutional neural networks. arXiv preprint arXiv:1905.11946v5. https:\/\/doi.org\/10.48550\/arXiv.1905.11946","DOI":"10.48550\/arXiv.1905.11946"},{"issue":"4","key":"9035_CR69","doi-asserted-by":"publisher","first-page":"312","DOI":"10.1016\/j.icte.2020.04.010","volume":"6","author":"I Kandel","year":"2020","unstructured":"Kandel I, Castelli M (2020) The effect of batch size on the generalizability of the convolutional neural networks on a histopathology dataset. ICT Express 6(4):312\u2013315. https:\/\/doi.org\/10.1016\/j.icte.2020.04.010","journal-title":"ICT Express"},{"key":"9035_CR70","doi-asserted-by":"publisher","unstructured":"Gao Y, Liu Y, Zhang H, Li Z, Zhu Y, Lin H, Yang M (2020) Estimating GPU memory consumption of deep learning models. In: Proceedings of the 28th ACM joint meeting on European software engineering conference and symposium on the foundations of software engineering (ESEC\/FSE). New York, NY, USA pp 1342\u20131352. https:\/\/doi.org\/10.1145\/3368089.3417050","DOI":"10.1145\/3368089.3417050"},{"issue":"1","key":"9035_CR71","doi-asserted-by":"publisher","first-page":"1929","DOI":"10.5555\/2627435.2670313","volume":"15","author":"N Srivastava","year":"2014","unstructured":"Srivastava N, Hinton G, Krizhevsky A, Sutskever I, Salakhutdinov R (2014) Dropout: a simple way to prevent neural networks from overfitting. J Mach Learn Res 15(1):1929\u20131958. https:\/\/doi.org\/10.5555\/2627435.2670313","journal-title":"J Mach Learn Res"},{"key":"9035_CR72","doi-asserted-by":"publisher","unstructured":"Arpit D, Campos V, Bengio Y (2019) How to initialize your network? Robust initialization for WeightNorm and ResNets. In: Proceedings of the 33rd international conference on neural information processing systems. Curran Associates Inc., Red Hook, NY, USA, Article 978, pp 10902\u201310911. https:\/\/doi.org\/10.5555\/3454287.3455265","DOI":"10.5555\/3454287.3455265"},{"key":"9035_CR73","unstructured":"Glorot X, Bengio Y (2010) Understanding the difficulty of training deep feedforward neural networks. In: Proceedings of the thirteenth international conference on artificial intelligence and statistics, PMLR, 9:249\u2013256. https:\/\/proceedings.mlr.press\/v9\/glorot10a\/glorot10a.pdf"},{"key":"9035_CR74","doi-asserted-by":"publisher","unstructured":"Saxe AM, McClelland JL, Ganguli S (2014) Exact solutions to the nonlinear dynamics of learning in deep linear neural networks. arXiv preprint arXiv:1312.6120v3. https:\/\/doi.org\/10.48550\/arXiv.1312.6120","DOI":"10.48550\/arXiv.1312.6120"},{"key":"9035_CR75","doi-asserted-by":"publisher","unstructured":"Smith SL, Kindermans P-J, Ying C, Le QV (2018) Don\u2019t decay the learning rate, increase the batch size. arXiv preprint arXiv:1711.00489v2. https:\/\/doi.org\/10.48550\/arXiv.1711.00489","DOI":"10.48550\/arXiv.1711.00489"},{"key":"9035_CR76","doi-asserted-by":"publisher","unstructured":"Mustika IW, Adi HN, Najib F (2021) Comparison of Keras optimizers for earthquake signal classification based on deep neural networks. In: IEEE 4th international conference on information and communications technology (ICOIACT) pp 304\u2013308. https:\/\/doi.org\/10.1109\/ICOIACT53268.2021.9563990","DOI":"10.1109\/ICOIACT53268.2021.9563990"},{"key":"9035_CR77","doi-asserted-by":"publisher","first-page":"2121","DOI":"10.5555\/1953048.2021068","volume":"12","author":"J Duchi","year":"2011","unstructured":"Duchi J, Hazan E, Singer Y (2011) Adaptive subgradient methods for online learning and stochastic optimization. J Mach Learn Res (JMLR) 12:2121\u20132159. https:\/\/doi.org\/10.5555\/1953048.2021068","journal-title":"J Mach Learn Res (JMLR)"},{"key":"9035_CR78","doi-asserted-by":"publisher","unstructured":"Kingma DP, Ba J (2015) Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980v9. https:\/\/doi.org\/10.48550\/arXiv.1412.6980","DOI":"10.48550\/arXiv.1412.6980"},{"key":"9035_CR79","doi-asserted-by":"publisher","unstructured":"Dogo EM, Afolabi OJ, Nwulu NI, Twala B, Aigbavboa CO (2018) A comparative analysis of gradient descent-based optimization algorithms on convolutional neural networks. In: International conference on computational techniques, electronics and mechanical systems (CTEMS) pp 92\u201399. https:\/\/doi.org\/10.1109\/CTEMS.2018.8769211","DOI":"10.1109\/CTEMS.2018.8769211"},{"key":"9035_CR80","doi-asserted-by":"publisher","unstructured":"Sutskever I, Martens J, Dahl G, Hinton G (2013) On the importance of initialization and momentum in deep learning. In: Proceedings of the 30th international conference on machine learning (ICML) 28:1139\u20131147. https:\/\/doi.org\/10.5555\/3042817.3043064","DOI":"10.5555\/3042817.3043064"},{"key":"9035_CR81","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1016\/j.neunet.2021.02.011","volume":"139","author":"D Xu","year":"2021","unstructured":"Xu D, Zhang S, Zhang H, Mandic DP (2021) Convergence of the RMSProp deep learning method with penalty for nonconvex optimization. Neural Netw 139:17\u201323. https:\/\/doi.org\/10.1016\/j.neunet.2021.02.011","journal-title":"Neural Netw"},{"issue":"11","key":"9035_CR82","doi-asserted-by":"publisher","first-page":"648","DOI":"10.3390\/sym10110648","volume":"10","author":"I Nusrat","year":"2018","unstructured":"Nusrat I, Jang S-B (2018) A comparison of regularization techniques in deep neural networks. Symmetry 10(11):648. https:\/\/doi.org\/10.3390\/sym10110648","journal-title":"Symmetry"},{"issue":"11","key":"9035_CR83","doi-asserted-by":"publisher","first-page":"2278","DOI":"10.1109\/5.726791","volume":"86","author":"Y Lecun","year":"1998","unstructured":"Lecun Y, Bottou L, Bengio Y, Haffner P (1998) Gradient-based learning applied to document recognition. Proc IEEE 86(11):2278\u20132324. https:\/\/doi.org\/10.1109\/5.726791","journal-title":"Proc IEEE"},{"key":"9035_CR84","unstructured":"CIFAR-10 and CIFAR-100 datasets (2022) https:\/\/www.cs.toronto.edu\/~kriz\/cifar.html. Accessed 29 June 2022"},{"key":"9035_CR85","unstructured":"Netzer Y, Wang T, Coates A, Bissacco A, Wu B, Ng AY (2011) Reading digits in natural images with unsupervised feature learning. In: Neural information processing systems (NIPS) workshop on deep learning and unsupervised feature learning. http:\/\/ufldl.stanford.edu\/housenumbers\/nips2011_housenumbers.pdf"},{"issue":"3","key":"9035_CR86","doi-asserted-by":"publisher","first-page":"616","DOI":"10.1016\/j.injury.2020.09.010","volume":"52","author":"NA Farda","year":"2021","unstructured":"Farda NA, Lai J-Y, Wang J-C, Lee P-Y, Liu J-W, Hsieh I-H (2021) Sanders classification of calcaneal fractures in CT images with deep learning and differential data augmentation techniques. Injury 52(3):616\u2013624. https:\/\/doi.org\/10.1016\/j.injury.2020.09.010","journal-title":"Injury"},{"key":"9035_CR87","doi-asserted-by":"publisher","unstructured":"Xiao H, Rasul K, Vollgraf R (2017) Fashion-MNIST: a novel image dataset for benchmarking machine learning algorithms. arXiv preprint arXiv:1708.07747v2. https:\/\/doi.org\/10.48550\/arXiv.1708.07747","DOI":"10.48550\/arXiv.1708.07747"},{"key":"9035_CR88","doi-asserted-by":"publisher","unstructured":"Noel MM, Trivedi A, Dutta P (2023) Growing cosine unit: a novel oscillatory activation function that can speedup training and reduce parameters in convolutional neural networks. arXiv preprint arXiv:2108.12943v3. https:\/\/doi.org\/10.48550\/arXiv.2108.12943","DOI":"10.48550\/arXiv.2108.12943"},{"key":"9035_CR89","doi-asserted-by":"publisher","first-page":"110","DOI":"10.1016\/j.neucom.2020.11.068","volume":"429","author":"M Zhu","year":"2021","unstructured":"Zhu M, Min W, Wang Q, Zou S, Chen X (2021) PFLU and FPFLU: two novel non-monotonic activation functions in convolutional neural networks. Neurocomputing 429:110\u2013117. https:\/\/doi.org\/10.1016\/j.neucom.2020.11.068","journal-title":"Neurocomputing"},{"key":"9035_CR90","doi-asserted-by":"publisher","unstructured":"Mercioni MA, Holban S (2021) Soft-clipping swish: a novel activation function for deep learning. In: IEEE 15th international symposium on applied computational intelligence and informatics (SACI), Timisoara, Romania. https:\/\/doi.org\/10.1109\/SACI51354.2021.9465622","DOI":"10.1109\/SACI51354.2021.9465622"},{"issue":"2","key":"9035_CR91","doi-asserted-by":"publisher","first-page":"136","DOI":"10.1049\/cvi2.12020","volume":"15","author":"X Liu","year":"2021","unstructured":"Liu X, Di X (2021) TanhExp: a smooth activation function with high convergence speed for lightweight neural networks. IET Comput Vision 15(2):136\u2013150. https:\/\/doi.org\/10.1049\/cvi2.12020","journal-title":"IET Comput Vision"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-023-09035-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-023-09035-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-023-09035-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,3]],"date-time":"2023-11-03T13:08:02Z","timestamp":1699016882000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-023-09035-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,9,27]]},"references-count":91,"journal-issue":{"issue":"34","published-print":{"date-parts":[[2023,12]]}},"alternative-id":["9035"],"URL":"https:\/\/doi.org\/10.1007\/s00521-023-09035-5","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"value":"0941-0643","type":"print"},{"value":"1433-3058","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,9,27]]},"assertion":[{"value":"22 September 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 September 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 September 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no relevant financial or non-financial interests to disclose.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}