{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,21]],"date-time":"2026-01-21T05:06:26Z","timestamp":1768971986128,"version":"3.49.0"},"reference-count":47,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"6","license":[{"start":{"date-parts":[[2022,6,1]],"date-time":"2022-06-01T00:00:00Z","timestamp":1654041600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,6,1]],"date-time":"2022-06-01T00:00:00Z","timestamp":1654041600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,6,1]],"date-time":"2022-06-01T00:00:00Z","timestamp":1654041600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61625301"],"award-info":[{"award-number":["61625301"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61731018"],"award-info":[{"award-number":["61731018"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Major Scientific Research Project of Zhejiang Lab","award":["2019KB0AC01"],"award-info":[{"award-number":["2019KB0AC01"]}]},{"name":"Major Scientific Research Project of Zhejiang Lab","award":["2019KB0AB02"],"award-info":[{"award-number":["2019KB0AB02"]}]},{"name":"Beijing Academy of Artificial Intelligence, and Qualcomm"},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61802269"],"award-info":[{"award-number":["61802269"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61972132"],"award-info":[{"award-number":["61972132"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61876007"],"award-info":[{"award-number":["61876007"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2022,6,1]]},"DOI":"10.1109\/tpami.2020.3048430","type":"journal-article","created":{"date-parts":[[2020,12,31]],"date-time":"2020-12-31T21:38:59Z","timestamp":1609450739000},"page":"3334-3348","source":"Crossref","is-referenced-by-count":12,"title":["Training Neural Networks by Lifted Proximal Operator Machines"],"prefix":"10.1109","volume":"44","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2956-2846","authenticated-orcid":false,"given":"Jia","family":"Li","sequence":"first","affiliation":[{"name":"Key Laboratory of Machine Perception (MOE), School of Electronics Engineering and Computer Science, Peking University, Beijing, P.R. China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6191-7726","authenticated-orcid":false,"given":"Mingqing","family":"Xiao","sequence":"additional","affiliation":[{"name":"Key Laboratory of Machine Perception (MOE), School of Electronics Engineering and Computer Science, Peking University, Beijing, P.R. China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5076-7897","authenticated-orcid":false,"given":"Cong","family":"Fang","sequence":"additional","affiliation":[{"name":"University of Pennsylvania, Philadelphia, PA, USA"}]},{"given":"Yue","family":"Dai","sequence":"additional","affiliation":[{"name":"College of Software, Beihang University, Beijing, P.R. China"}]},{"given":"Chao","family":"Xu","sequence":"additional","affiliation":[{"name":"Key Laboratory of Machine Perception (MOE), School of Electronics Engineering and Computer Science, Peking University, Beijing, P.R. China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1493-7569","authenticated-orcid":false,"given":"Zhouchen","family":"Lin","sequence":"additional","affiliation":[{"name":"Key Laboratory of Machine Perception (MOE), School of Electronics Engineering and Computer Science, Peking University, Beijing, P.R. China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.5555\/2999134.2999257"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2205597"},{"issue":"Aug","key":"ref3","first-page":"2493","article-title":"Natural language processing (almost) from scratch","volume":"12","author":"Collobert","year":"2011","journal-title":"J. Mach. Learn. Res."},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1038\/nature16961"},{"key":"ref5","volume-title":"Deep Learning","volume":"1","author":"Goodfellow","year":"2016"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1038\/323533a0"},{"key":"ref7","first-page":"1139","article-title":"On the importance of initialization and momentum in deep learning","volume-title":"Proc. 30th Int. Conf. Mach. Learn.","author":"Sutskever"},{"key":"ref8","first-page":"2121","article-title":"Adaptive subgradient methods for online learning and stochastic optimization","volume":"12","author":"Duchi","year":"2011","journal-title":"J. Mach. Learn. Res."},{"issue":"2","key":"ref9","first-page":"26","article-title":"Lecture 6.5-rmsprop: Divide the gradient by a running average of its recent magnitude","volume":"4","author":"Tieleman","year":"2012","journal-title":"COURSERA: Neural Netw. Mach. Learn."},{"key":"ref10","article-title":"Adam: A method for stochastic optimization","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Kingma"},{"key":"ref11","article-title":"On the convergence of Adam and beyond","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Reddi"},{"key":"ref12","first-page":"797","article-title":"Escaping from saddle points-online stochastic gradient for tensor decomposition","volume-title":"Proc. Conf. Learn. Theory","author":"Ge"},{"key":"ref13","first-page":"448","article-title":"Batch normalization: Accelerating deep network training by reducing internal covariate shift","author":"Ioffe","year":"2015"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref15","first-page":"265","article-title":"On optimization methods for deep learning","volume-title":"Proc. 28th Int. Conf. Mach. Learn.","author":"Le"},{"key":"ref16","first-page":"4107","article-title":"Binarized neural networks","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Hubara"},{"key":"ref17","first-page":"2722","article-title":"Training neural networks without gradients: A scalable ADMM approach","volume-title":"Proc. 33rd Int. Conf. Mach. Learn.","author":"Taylor"},{"key":"ref18","first-page":"10","article-title":"Distributed optimization of deeply nested systems","volume-title":"Proc. Int. Conf. Artif. Intell. Statist.","author":"Carreira-Perpinan"},{"key":"ref19","first-page":"7313","article-title":"Global convergence of block coordinate descent in deep learning","author":"Zeng","year":"2019"},{"key":"ref20","first-page":"612","article-title":"Linearized alternating direction method with adaptive penalty for low-rank representation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Lin"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.165"},{"key":"ref22","first-page":"1721","article-title":"Convergent block coordinate descent for training Tikhonov regularized deep neural networks","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Zhang"},{"key":"ref23","article-title":"Lifted neural networks","author":"Askari","year":"2018"},{"key":"ref24","first-page":"3362","article-title":"Fenchel lifted networks: A Lagrange relaxation of neural network training","volume-title":"Proc. Int. Conf. Artif. Intell. Statist.","volume":"108","author":"Gu"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33014181"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1561\/2400000003"},{"key":"ref27","volume-title":"Introductory Functional Analysis With Applications","volume":"1","author":"Kreyszig","year":"1978"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1137\/080716542"},{"key":"ref29","first-page":"3054","article-title":"A comprehensive linear speedup analysis for asynchronous stochastic parallel optimization from zeroth-order to first-order","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Lian"},{"key":"ref30","first-page":"6182","article-title":"Asynchronous coordinate descent under more realistic assumptions","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Sun"},{"key":"ref31","volume-title":"Parallel and Distributed Computation: Numerical Methods","author":"Bertsekas","year":"1989"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1126\/science.1127647"},{"key":"ref33","first-page":"249","article-title":"Understanding the difficulty of training deep feedforward neural networks","volume-title":"Proc. Int. Conf. Artif. Intell. Statist.","author":"Glorot"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/5.726791"},{"key":"ref35","article-title":"Learning multiple layers of features from tiny images","author":"Krizhevsky","year":"2009"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"ref37","first-page":"1675","article-title":"Gradient descent finds global minima of deep neural networks","author":"Du","year":"2019"},{"key":"ref38","first-page":"242","article-title":"A convergence theory for deep learning via over-parameterization","author":"Allen-Zhu","year":"2019"},{"key":"ref39","article-title":"A downsampled variant of ImageNet as an alternative to the CIFAR datasets","author":"Chrabaszcz","year":"2017"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.2118\/18761-MS"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11732"},{"key":"ref42","first-page":"2595","article-title":"Parallelized stochastic gradient descent","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Zinkevich"},{"key":"ref43","first-page":"4120","article-title":"Asynchronous stochastic gradient descent with delay compensation","volume-title":"Proc. 34th Int. Conf. Mach. Learn.","volume":"108","author":"Zheng"},{"key":"ref44","volume-title":"Nonlinear Programming: 2nd Edition","author":"Bertsekas","year":"1999"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1137\/120887795"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.56021\/9781421407944"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4419-8853-9"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/34\/9769881\/09311864.pdf?arnumber=9311864","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,9]],"date-time":"2024-01-09T23:21:33Z","timestamp":1704842493000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9311864\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,1]]},"references-count":47,"journal-issue":{"issue":"6"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2020.3048430","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,6,1]]}}}