{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T18:00:41Z","timestamp":1772906441401,"version":"3.50.1"},"reference-count":50,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,6,1]],"date-time":"2023-06-01T00:00:00Z","timestamp":1685577600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,6,1]],"date-time":"2023-06-01T00:00:00Z","timestamp":1685577600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,6]]},"DOI":"10.1109\/cvpr52729.2023.01152","type":"proceedings-article","created":{"date-parts":[[2023,8,22]],"date-time":"2023-08-22T17:30:52Z","timestamp":1692725452000},"page":"11970-11979","source":"Crossref","is-referenced-by-count":10,"title":["Transformer-Based Learned Optimization"],"prefix":"10.1109","author":[{"given":"Erik","family":"G\u00e4rtner","sequence":"first","affiliation":[{"name":"Google Research"}]},{"given":"Luke","family":"Metz","sequence":"additional","affiliation":[{"name":"Google Research"}]},{"given":"Mykhaylo","family":"Andriluka","sequence":"additional","affiliation":[{"name":"Google Research"}]},{"given":"C. Daniel","family":"Freeman","sequence":"additional","affiliation":[{"name":"Google Research"}]},{"given":"Cristian","family":"Sminchisescu","sequence":"additional","affiliation":[{"name":"Google Research"}]}],"member":"263","reference":[{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9560935"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01284"},{"key":"ref15","author":"kingma","year":"2014","journal-title":"Adam A method for stochastic optimization"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2013.248"},{"key":"ref11","author":"fletcher","year":"1987","journal-title":"Practical Methods of Optimization"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/s101070100263"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/s10898-004-1936-z"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00530"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00884"},{"key":"ref18","article-title":"Learning to optimize","author":"li","year":"2017","journal-title":"ICLRE"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01425"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01133"},{"key":"ref45","author":"wu","year":"2018","journal-title":"Understanding short-horizon bias in stochastic meta-optimization"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00708"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00622"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1021\/jp970984n"},{"key":"ref41","first-page":"10553","article-title":"Unbiased gradient estimation in unrolled computation graphs with persistent evolution strategies","author":"vicol","year":"2021","journal-title":"Proceedings of the 38th International Conference on Machine Learning volume 139 of Proceedings of Machine Learning Research"},{"key":"ref44","article-title":"Learned optimizers that scale and generalize","author":"wichrowska","year":"2017","journal-title":"International Conference on Machine Learning"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/5.58337"},{"key":"ref49","author":"zanfir","year":"0","journal-title":"Weakly supervised 3d human pose and shape reconstruction with normalizing flows"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.512"},{"key":"ref7","author":"bengio","year":"1990","journal-title":"Learning a synaptic learning rule"},{"key":"ref9","author":"chen","year":"2021","journal-title":"Learning to optimize A primer and a benchmark"},{"key":"ref4","first-page":"3981","article-title":"Learning to learn by gradient descent by gradient descent","author":"andrychowicz","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref3","author":"amos","year":"2022","journal-title":"Tutorial on amortized optimization for learning to optimize over continuous domains"},{"key":"ref6","first-page":"6","article-title":"On the optimization of a synaptic learning rule","author":"bengio","year":"1992","journal-title":"Preprints Conf Optimality in Artificial and Biological Neural Networks"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/s11081-017-9366-1"},{"key":"ref40","article-title":"Attention is all you need","volume":"30","author":"vaswani","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref35","first-page":"744","article-title":"Human body model fitting by learned gradient descent","author":"song","year":"2020","journal-title":"European Conference on Computer Vision"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1145\/3414685.3417877"},{"key":"ref37","author":"surjanovic","year":"0","journal-title":"Virtual Library of Simulation Experiments Test Functions and Datasets"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4615-4677-1"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1093\/comjnl\/3.3.175"},{"key":"ref30","article-title":"Contact and human dynamics from monocular video","author":"rempe","year":"2020","journal-title":"Proceedings of the European Conference on Computer Vision (ECCV)"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/3450626.3459825"},{"key":"ref32","first-page":"4596","article-title":"Adafactor: Adaptive learning rates with sublinear memory cost","author":"shazeer","year":"2018","journal-title":"International Conference on Machine Learning"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/s10898-004-9972-2"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4613-1997-9"},{"key":"ref39","first-page":"501","article-title":"Aist dance video database: Multi-genre, multi-dancer, and multi-camera database for dance information processing","author":"tsuchida","year":"2019","journal-title":"Proceedings of the 20th International Society for Music Information Retrieval Conference ISMIR 2019"},{"key":"ref38","first-page":"26","article-title":"Lecture 6. 5-rmsprop: Divide the gradient by a running average of its recent magnitude","volume":"4","author":"tieleman","year":"2012","journal-title":"COURSERA Neural Networks for Machine Learning"},{"key":"ref24","author":"metz","year":"2022","journal-title":"Practical tradeoffs between memory compute and performance in learned optimizers"},{"key":"ref23","first-page":"8661","article-title":"Learn2hop: Learned optimization on rough landscapes","author":"merchant","year":"2021","journal-title":"International Conference on Machine Learning"},{"key":"ref26","author":"metz","year":"0","journal-title":"Tasks stability archi-tecture and compute Training more effective learned optimizers and using them to train themselves"},{"key":"ref25","author":"metz","year":"2021","journal-title":"Gradients are not all you need"},{"key":"ref20","author":"lucas","year":"2018","journal-title":"Aggregated momentum Stability through passive damping"},{"key":"ref22","first-page":"19910","article-title":"Reverse engineering learned optimizers reveals known and novel mechanisms","volume":"34","author":"maheswaranathan","year":"2021","journal-title":"Advances in neural information processing systems"},{"key":"ref21","first-page":"2113","article-title":"Gradient-based hyperparameter optimization through reversible learning","author":"maclaurin","year":"2015","journal-title":"International Conference on Machine Learning"},{"key":"ref28","first-page":"619","volume":"17","author":"m\u00fchlenbein","year":"1991","journal-title":"The Parallel Genetic Algorithm As Function Optimizer"},{"key":"ref27","first-page":"4556","article-title":"Understanding and correcting pathologies in the training of learned optimizers","author":"metz","year":"2019","journal-title":"International Conference on Machine Learning"},{"key":"ref29","article-title":"Meta-curvature","volume":"32","author":"park","year":"2019","journal-title":"Advances in neural information processing systems"}],"event":{"name":"2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","location":"Vancouver, BC, Canada","start":{"date-parts":[[2023,6,17]]},"end":{"date-parts":[[2023,6,24]]}},"container-title":["2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10203037\/10203050\/10205218.pdf?arnumber=10205218","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,11]],"date-time":"2023-09-11T18:00:34Z","timestamp":1694455234000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10205218\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6]]},"references-count":50,"URL":"https:\/\/doi.org\/10.1109\/cvpr52729.2023.01152","relation":{},"subject":[],"published":{"date-parts":[[2023,6]]}}}