{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:14:33Z","timestamp":1750220073127,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":24,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,1,6]],"date-time":"2023-01-06T00:00:00Z","timestamp":1672963200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,1,6]]},"DOI":"10.1145\/3582649.3582662","type":"proceedings-article","created":{"date-parts":[[2023,4,7]],"date-time":"2023-04-07T16:23:28Z","timestamp":1680884608000},"page":"204-209","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["SPSTT: Second-Order Propagation Spatial Temporal Transformer Network for Space-Time Video Super-Resolution"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1517-6669","authenticated-orcid":false,"given":"Yaping","family":"Qi","sequence":"first","affiliation":[{"name":"School of Information Science and Engineering, Shandong University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3003-9358","authenticated-orcid":false,"given":"Rui","family":"Su","sequence":"additional","affiliation":[{"name":"School of Information Science and Engineering, Shandong University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7922-3551","authenticated-orcid":false,"given":"Lei","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Information Science and Engineering, Shandong University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8083-3501","authenticated-orcid":false,"given":"Xianye","family":"Ben","sequence":"additional","affiliation":[{"name":"School of Information Science and Engineering, Shandong University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9224-324X","authenticated-orcid":false,"given":"Zheng","family":"Dong","sequence":"additional","affiliation":[{"name":"School of Information Science and Engineering, Shandong University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9783-0873","authenticated-orcid":false,"given":"Hongchao","family":"Zhou","sequence":"additional","affiliation":[{"name":"School of Information Science and Engineering, Shandong University, China"}]}],"member":"320","published-online":{"date-parts":[[2023,4,7]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2005.85"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"crossref","unstructured":"Zheng S Lu J Zhao H Rethinking semantic segmentation from a sequence-to-sequence perspective with transformers[C]\/\/Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2021: 6881-6890.  Zheng S Lu J Zhao H Rethinking semantic segmentation from a sequence-to-sequence perspective with transformers[C]\/\/Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2021: 6881-6890.","DOI":"10.1109\/CVPR46437.2021.00681"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"crossref","unstructured":"Niklaus S Mai L Liu F. Video frame interpolation via adaptive separable convolution[C]\/\/Proceedings of the IEEE International Conference on Computer Vision. 2017: 261-270.  Niklaus S Mai L Liu F. Video frame interpolation via adaptive separable convolution[C]\/\/Proceedings of the IEEE International Conference on Computer Vision. 2017: 261-270.","DOI":"10.1109\/ICCV.2017.37"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-018-01144-2"},{"key":"e_1_3_2_1_5_1","first-page":"30","article-title":"Attention is all you need[J]","author":"Vaswani A","year":"2017","unstructured":"Vaswani A , Shazier N , Parmar N , Attention is all you need[J] . Advances in Neural Information Processing Systems , 2017 , 30 . Vaswani A, Shazier N, Parmar N, Attention is all you need[J]. Advances in Neural Information Processing Systems, 2017, 30.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_6_1","volume-title":"Yu K","author":"Wang X","year":"2019","unstructured":"Wang X , Chan K C K , Yu K , Edvard : Video restoration with enhanced deformable convolutional networks[C]\/\/Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops . 2019 : 1954-1963. Wang X, Chan K C K, Yu K, Edvard: Video restoration with enhanced deformable convolutional networks[C]\/\/Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops. 2019: 1954-1963."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"crossref","unstructured":"Haris M Shakhnarovich G Ukita N. Recurrent back-projection network for video super-resolution[C]\/\/Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2019: 3897-3906.  Haris M Shakhnarovich G Ukita N. Recurrent back-projection network for video super-resolution[C]\/\/Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2019: 3897-3906.","DOI":"10.1109\/CVPR.2019.00402"},{"key":"e_1_3_2_1_8_1","volume-title":"Fu Y","author":"Tian Y","year":"2020","unstructured":"Tian Y , Zhang Y , Fu Y , Tdan : Temporally-deformable alignment network for video super-resolution[C]\/\/Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition . 2020 : 3360-3369. Tian Y, Zhang Y, Fu Y, Tdan: Temporally-deformable alignment network for video super-resolution[C]\/\/Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2020: 3360-3369."},{"key":"e_1_3_2_1_9_1","volume-title":"Very deep convolutional networks for large-scale image recognition[J]. arXiv preprint arXiv:1409.1556","author":"Simonyan K","year":"2014","unstructured":"Simonyan K , Zisserman A. Very deep convolutional networks for large-scale image recognition[J]. arXiv preprint arXiv:1409.1556 , 2014 . Simonyan K, Zisserman A. Very deep convolutional networks for large-scale image recognition[J]. arXiv preprint arXiv:1409.1556, 2014."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"crossref","unstructured":"Jiang H Sun D Jampani V Super slo-mo: High-quality estimation of multiple intermediate frames for video interpolation[C]\/\/Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2018: 9000-9008.  Jiang H Sun D Jampani V Super slo-mo: High-quality estimation of multiple intermediate frames for video interpolation[C]\/\/Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2018: 9000-9008.","DOI":"10.1109\/CVPR.2018.00938"},{"key":"e_1_3_2_1_11_1","volume-title":"Ding T","author":"Geng Z","year":"2022","unstructured":"Geng Z , Liang L , Ding T , RSTT : Real-time Spatial Temporal Transformer for Space-Time Video Super-Resolution[C]\/\/Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition . 2022 : 17441-17451. Geng Z, Liang L, Ding T, RSTT: Real-time Spatial Temporal Transformer for Space-Time Video Super-Resolution[C]\/\/Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2022: 17441-17451."},{"key":"e_1_3_2_1_12_1","volume-title":"An image is worth 16x16 words: Transformers for image recognition at scale[J]. arXiv preprint arXiv:2010.11929","author":"Dosovitskiy A","year":"2020","unstructured":"Dosovitskiy A , Beyer L , Kolesnikov A , An image is worth 16x16 words: Transformers for image recognition at scale[J]. arXiv preprint arXiv:2010.11929 , 2020 . Dosovitskiy A, Beyer L, Kolesnikov A, An image is worth 16x16 words: Transformers for image recognition at scale[J]. arXiv preprint arXiv:2010.11929, 2020."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"crossref","unstructured":"Bao W Lai W S Ma C Depth-aware video frame interpolation[C]\/\/Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2019: 3703-3712.  Bao W Lai W S Ma C Depth-aware video frame interpolation[C]\/\/Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2019: 3703-3712.","DOI":"10.1109\/CVPR.2019.00382"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"crossref","unstructured":"Zhang Y Li K Li K Image super-resolution using very deep residual channel attention networks[C]\/\/Proceedings of the European Conference on Computer Vision (ECCV). 2018: 286-301.  Zhang Y Li K Li K Image super-resolution using very deep residual channel attention networks[C]\/\/Proceedings of the European Conference on Computer Vision (ECCV). 2018: 286-301.","DOI":"10.1007\/978-3-030-01234-2_18"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"crossref","unstructured":"Shechtman E Caspi Y Irani M. Increasing space-time resolution in video[C]\/\/ Proceedings of the European Conference on Computer Vision(ECCV).2002: 753-768.  Shechtman E Caspi Y Irani M. Increasing space-time resolution in video[C]\/\/ Proceedings of the European Conference on Computer Vision(ECCV).2002: 753-768.","DOI":"10.1007\/3-540-47969-4_50"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.1984.4767596"},{"key":"e_1_3_2_1_17_1","volume-title":"Spatiotemporal video upscaling using motion-assisted steering kernel (mask) regression[M]\/\/High-Quality Visual Experience","author":"Takeda H","year":"2010","unstructured":"Takeda H , Beek P , Milanfar P. Spatiotemporal video upscaling using motion-assisted steering kernel (mask) regression[M]\/\/High-Quality Visual Experience . Springer , Berlin, Heidelberg , 2010 : 245-274. Takeda H, Beek P, Milanfar P. Spatiotemporal video upscaling using motion-assisted steering kernel (mask) regression[M]\/\/High-Quality Visual Experience. Springer, Berlin, Heidelberg, 2010: 245-274."},{"key":"e_1_3_2_1_18_1","volume-title":"Ba J. Adam: A method for stochastic optimization[J]. arXiv preprint arXiv:1412.6980","author":"Kingma D P","year":"2014","unstructured":"Kingma D P , Ba J. Adam: A method for stochastic optimization[J]. arXiv preprint arXiv:1412.6980 , 2014 . Kingma D P, Ba J. Adam: A method for stochastic optimization[J]. arXiv preprint arXiv:1412.6980, 2014."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.7763\/IJCTE.2014.V6.902"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.7763\/IJCTE.2009.V1.39"},{"key":"e_1_3_2_1_21_1","volume-title":"Very deep convolutional networks for large-scale image recognition[J]. arXiv preprint arXiv:1409.1556","author":"Simonyan K","year":"2014","unstructured":"Simonyan K , Zisserman A. Very deep convolutional networks for large-scale image recognition[J]. arXiv preprint arXiv:1409.1556 , 2014 . Simonyan K, Zisserman A. Very deep convolutional networks for large-scale image recognition[J]. arXiv preprint arXiv:1409.1556, 2014."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"crossref","unstructured":"He K Zhang X Ren S Deep residual learning for image recognition[C]\/\/Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2016: 770-778.  He K Zhang X Ren S Deep residual learning for image recognition[C]\/\/Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2016: 770-778.","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"crossref","unstructured":"Liu Z Lin Y Cao Y Swin transformer: Hierarchical vision transformer using shifted windows[C]\/\/Proceedings of the IEEE\/CVF International Conference on Computer Vision. 2021: 10012-10022.  Liu Z Lin Y Cao Y Swin transformer: Hierarchical vision transformer using shifted windows[C]\/\/Proceedings of the IEEE\/CVF International Conference on Computer Vision. 2021: 10012-10022.","DOI":"10.1109\/ICCV48922.2021.00986"},{"volume-title":"PMLR","author":"Chen M","key":"e_1_3_2_1_24_1","unstructured":"Chen M , Radford A , Child R , Generative pretraining from pixels[C]\/\/International Conference on Machine Learning . PMLR , 2020: 1691-1703. Chen M, Radford A, Child R, Generative pretraining from pixels[C]\/\/International Conference on Machine Learning. PMLR, 2020: 1691-1703."}],"event":{"name":"ICIGP 2023: 2023 The 6th International Conference on Image and Graphics Processing","acronym":"ICIGP 2023","location":"Chongqing China"},"container-title":["Proceedings of the 2023 6th International Conference on Image and Graphics Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3582649.3582662","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3582649.3582662","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T18:09:14Z","timestamp":1750183754000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3582649.3582662"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,1,6]]},"references-count":24,"alternative-id":["10.1145\/3582649.3582662","10.1145\/3582649"],"URL":"https:\/\/doi.org\/10.1145\/3582649.3582662","relation":{},"subject":[],"published":{"date-parts":[[2023,1,6]]},"assertion":[{"value":"2023-04-07","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}