{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T15:03:55Z","timestamp":1775228635318,"version":"3.50.1"},"reference-count":54,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,6,1]],"date-time":"2022-06-01T00:00:00Z","timestamp":1654041600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,6,1]],"date-time":"2022-06-01T00:00:00Z","timestamp":1654041600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,6]]},"DOI":"10.1109\/cvpr52688.2022.00352","type":"proceedings-article","created":{"date-parts":[[2022,9,27]],"date-time":"2022-09-27T19:56:41Z","timestamp":1664308601000},"page":"3522-3532","source":"Crossref","is-referenced-by-count":127,"title":["Video Frame Interpolation with Transformer"],"prefix":"10.1109","author":[{"given":"Liying","family":"Lu","sequence":"first","affiliation":[{"name":"The Chinese University of Hong Kong"}]},{"given":"Ruizheng","family":"Wu","sequence":"additional","affiliation":[{"name":"SmartMore"}]},{"given":"Huaijia","family":"Lin","sequence":"additional","affiliation":[{"name":"SmartMore"}]},{"given":"Jiangbo","family":"Lu","sequence":"additional","affiliation":[{"name":"SmartMore"}]},{"given":"Jiaya","family":"Jia","sequence":"additional","affiliation":[{"name":"The Chinese University of Hong Kong"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00098"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00250"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00548"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00183"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298747"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00059"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01427"},{"key":"ref36","first-page":"109","article-title":"Bmbc: Bilateral motion estimation with bilateral cost vol-ume for video interpolation","author":"park","year":"2020","journal-title":"Computer Vision-ECCV 2020 16th European Conference"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.37"},{"key":"ref34","first-page":"670","article-title":"Video frame interpolation via adaptive convolution","author":"niklaus","year":"0","journal-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition"},{"key":"ref28","first-page":"4905","article-title":"Understanding the effective receptive field in deep convolutional neural networks","author":"luo","year":"0","journal-title":"NeurIPS"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.478"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12276"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00382"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-010-0390-2"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/2980179.2980251"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00536"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6788"},{"key":"ref24","first-page":"41","article-title":"Enhanced quadratic video interpolation","author":"liu","year":"0","journal-title":"European Conference on Computer Vision"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW54120.2021.00210"},{"key":"ref26","article-title":"Swin trans-former: Hierarchical vision transformer using shifted windows","author":"liu","year":"2021","journal-title":"ArXiv Preprint"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33018794"},{"key":"ref50","article-title":"Quadratic video interpolation","author":"xu","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-018-01144-2"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01237"},{"key":"ref53","first-page":"151","article-title":"Non-parametric local transforms for computing visual correspondence","author":"zabih","year":"0","journal-title":"European Conference on Computer Vision"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00583"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01358"},{"key":"ref11","first-page":"3","article-title":"Twins: Revisiting the design of spatial attention in vision transformers","volume":"1","author":"chu","year":"2021","journal-title":"ArXiv Preprint"},{"key":"ref40","first-page":"234","article-title":"U-net: Convolutional networks for biomedical image segmentation","author":"ronneberger","year":"0","journal-title":"International Conference on Medical Image Computing and Computer-Assisted Intervention"},{"key":"ref12","article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","author":"devlin","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref13","article-title":"Cswin transformer: A general vision trans-former backbone with cross-shaped windows","author":"dong","year":"2021","journal-title":"ArXiv Preprint"},{"key":"ref14","article-title":"An image is worth 16&#x00D7;16 words: Transformers for image recognition at scale","author":"dosovitskiy","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref15","first-page":"5515","article-title":"Deepstereo: Learning to predict new views from the world's imagery","author":"flynn","year":"0","journal-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition"},{"key":"ref16","first-page":"2859","article-title":"Space-time-aware multiresolution video enhancement","author":"haris","year":"0","journal-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition"},{"key":"ref17","article-title":"Rife: Real-time intermediate flow estimation for video frame interpolation","author":"huang","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref18","first-page":"8981","article-title":"Lite-flownet: A lightweight convolutional neural network for optical flow estimation","author":"hui","year":"0","journal-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00938"},{"key":"ref4","article-title":"Language models are few-shot learners","author":"brown","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref3","article-title":"Memc-net: Motion estimation and motion compensation driven neural network for video interpolation and enhancement","author":"bao","year":"2019","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"ref6","first-page":"213","article-title":"End-to-end object detection with transformers","author":"carion","year":"0","journal-title":"European Conference on Computer Vision"},{"key":"ref5","article-title":"Video super-resolution transformer","author":"cao","year":"2021","journal-title":"ArXiv Preprint"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3100714"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01212"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00343"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6693"},{"key":"ref46","first-page":"5998","article-title":"Attention is all you need","author":"vaswani","year":"0","journal-title":"Advances in neural information processing systems"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00931"},{"key":"ref48","first-page":"416","article-title":"Video compression through image interpolation","author":"wu","year":"0","journal-title":"Proceedings of the European Conference on Computer Vision (ECCV)"},{"key":"ref47","article-title":"Uformer: A general u-shaped transformer for image restoration","author":"wang","year":"2021","journal-title":"ArXiv Preprint"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01422"},{"key":"ref41","first-page":"arxiv","article-title":"Video interpolation via generalized deformable convolution","author":"shi","year":"2020","journal-title":"ArXiv e-prints"},{"key":"ref44","article-title":"Ucf101: A dataset of 101 human actions classes from videos in the wild","author":"soomro","year":"2012","journal-title":"ArXiv Preprint"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01422"}],"event":{"name":"2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","location":"New Orleans, LA, USA","start":{"date-parts":[[2022,6,18]]},"end":{"date-parts":[[2022,6,24]]}},"container-title":["2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9878378\/9878366\/09880064.pdf?arnumber=9880064","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,14]],"date-time":"2022-10-14T20:55:52Z","timestamp":1665780952000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9880064\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6]]},"references-count":54,"URL":"https:\/\/doi.org\/10.1109\/cvpr52688.2022.00352","relation":{},"subject":[],"published":{"date-parts":[[2022,6]]}}}