{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T12:46:28Z","timestamp":1774529188152,"version":"3.50.1"},"reference-count":49,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"11","license":[{"start":{"date-parts":[[2020,11,1]],"date-time":"2020-11-01T00:00:00Z","timestamp":1604188800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,11,1]],"date-time":"2020-11-01T00:00:00Z","timestamp":1604188800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,11,1]],"date-time":"2020-11-01T00:00:00Z","timestamp":1604188800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"IIIT Sri City"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Neural Netw. Learning Syst."],"published-print":{"date-parts":[[2020,11]]},"DOI":"10.1109\/tnnls.2019.2955777","type":"journal-article","created":{"date-parts":[[2019,12,23]],"date-time":"2019-12-23T20:22:58Z","timestamp":1577132578000},"page":"4500-4511","source":"Crossref","is-referenced-by-count":214,"title":["diffGrad: An Optimization Method for Convolutional Neural Networks"],"prefix":"10.1109","volume":"31","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4532-8996","authenticated-orcid":false,"given":"Shiv Ram","family":"Dubey","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8778-8229","authenticated-orcid":false,"given":"Soumendu","family":"Chakraborty","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6580-3977","authenticated-orcid":false,"given":"Swalpa Kumar","family":"Roy","sequence":"additional","affiliation":[]},{"given":"Snehasis","family":"Mukherjee","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8536-4991","authenticated-orcid":false,"given":"Satish Kumar","family":"Singh","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0297-8929","authenticated-orcid":false,"given":"Bidyut Baran","family":"Chaudhuri","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","article-title":"GANs trained by a two time-scale update rule converge to a local Nash equilibrium","author":"heusel","year":"2017","journal-title":"arXiv 1706 08500"},{"key":"ref38","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2014","journal-title":"arXiv 1412 6980"},{"key":"ref33","first-page":"2121","article-title":"Adaptive subgradient methods for online learning and stochastic optimization","volume":"12","author":"duchi","year":"2011","journal-title":"J Mach Learn Res"},{"key":"ref32","first-page":"1139","article-title":"On the importance of initialization and momentum in deep learning","author":"sutskever","year":"2013","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1016\/S0893-6080(98)00116-6"},{"key":"ref30","first-page":"2933","article-title":"Identifying and attacking the saddle point problem in high-dimensional non-convex optimization","author":"dauphin","year":"2014","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref37","author":"hinton","year":"2012","journal-title":"Neural networks for machine learning lecture 6a overview of mini-batch gradient descent"},{"key":"ref36","article-title":"ADADELTA: An adaptive learning rate method","author":"zeiler","year":"2012","journal-title":"arXiv 1212 5701"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"ref34","first-page":"1223","article-title":"Large scale distributed deep networks","author":"dean","year":"2012","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-7908-2604-3_16"},{"key":"ref27","first-page":"1222","article-title":"RCCNet: An efficient convolutional neural network for histological routine colon cancer nuclei classification","author":"basha","year":"2018","journal-title":"Proc 15th Int Conf Control Autom Robot Vis (ICARCV)"},{"key":"ref29","first-page":"823","article-title":"Two problems with back propagation and other steepest descent learning procedures for networks","author":"sutton","year":"1986","journal-title":"Proc 8th Annu Conf Cognit Sci Soc"},{"key":"ref2","first-page":"1097","article-title":"ImageNet classification with deep convolutional neural networks","author":"krizhevsky","year":"2012","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref1","author":"goodfellow","year":"2016","journal-title":"Deep Learning"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.169"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.81"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.91"},{"key":"ref23","first-page":"91","article-title":"Faster R-CNN: Towards real-time object detection with region proposal networks","author":"ren","year":"2015","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00913"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.322"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390177"},{"key":"ref40","first-page":"1","article-title":"On the convergence of adam and beyond","author":"reddi","year":"2018","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2016.2582924"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2015.2481325"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2016.2586194"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TIE.2019.2898618"},{"key":"ref16","article-title":"Spontaneous facial micro-expression recognition using 3D spatiotemporal convolutional neural networks","author":"reddy","year":"0","journal-title":"Proc IEEE Int Joint Conf Neural Netw"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2019.8852422"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.2019.2918719"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"ref4","article-title":"Very deep convolutional networks for large-scale image recognition","author":"simonyan","year":"2014","journal-title":"arXiv 1409 1556"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2017.2777183"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2015.2439281"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TIFS.2018.2868230"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2010.939038"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2017.2699184"},{"key":"ref49","first-page":"971","article-title":"Self-normalizing neural networks","author":"klambauer","year":"2017","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2012.2229986"},{"key":"ref46","article-title":"Learning multiple layers of features from tiny images","author":"krizhevsky","year":"2009"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"ref48","article-title":"Fast and accurate deep network learning by exponential linear units (ELUs)","author":"clevert","year":"2015","journal-title":"arXiv 1511 07289"},{"key":"ref47","first-page":"1","article-title":"Rectifier nonlinearities improve neural network acoustic models","author":"maas","year":"2013","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref42","article-title":"signSGD: Compressed optimisation for non-convex problems","author":"bernstein","year":"2018","journal-title":"arXiv 1802 04434"},{"key":"ref41","article-title":"Predictive local smoothness for stochastic gradient methods","author":"li","year":"2018","journal-title":"arXiv 1805 09386"},{"key":"ref44","first-page":"928","article-title":"Online convex programming and generalized infinitesimal gradient ascent","author":"zinkevich","year":"2003","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref43","article-title":"Nostalgic Adam: Weighting more of the past gradients when designing the adaptive learning rate","author":"huang","year":"2018","journal-title":"arXiv 1805 07557"}],"container-title":["IEEE Transactions on Neural Networks and Learning Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5962385\/9244673\/08939562.pdf?arnumber=8939562","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,4,27]],"date-time":"2022-04-27T17:19:55Z","timestamp":1651079995000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8939562\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,11]]},"references-count":49,"journal-issue":{"issue":"11"},"URL":"https:\/\/doi.org\/10.1109\/tnnls.2019.2955777","relation":{},"ISSN":["2162-237X","2162-2388"],"issn-type":[{"value":"2162-237X","type":"print"},{"value":"2162-2388","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,11]]}}}