{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T16:13:09Z","timestamp":1774627989149,"version":"3.50.1"},"reference-count":82,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2026,1,29]],"date-time":"2026-01-29T00:00:00Z","timestamp":1769644800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2026,1,29]],"date-time":"2026-01-29T00:00:00Z","timestamp":1769644800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"DOI":"10.13039\/100002418","name":"Intel Corporation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100002418","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"DOI":"10.1007\/s11042-026-21138-4","type":"journal-article","created":{"date-parts":[[2026,1,29]],"date-time":"2026-01-29T14:39:59Z","timestamp":1769697599000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Towards efficient real-time video motion transfer via generative time series modeling"],"prefix":"10.1007","volume":"85","author":[{"given":"Tasmiah","family":"Haque","sequence":"first","affiliation":[]},{"given":"Md Asif Bin","family":"Syed","sequence":"additional","affiliation":[]},{"given":"Byungheon","family":"Jeong","sequence":"additional","affiliation":[]},{"given":"Xue","family":"Bai","sequence":"additional","affiliation":[]},{"given":"Sumit","family":"Mohan","sequence":"additional","affiliation":[]},{"given":"Somdyuti","family":"Paul","sequence":"additional","affiliation":[]},{"given":"Imtiaz","family":"Ahmed","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3821-8112","authenticated-orcid":false,"given":"Srinjoy","family":"Das","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,1,29]]},"reference":[{"key":"21138_CR1","doi-asserted-by":"crossref","unstructured":"Chan C, Ginosar S, Zhou T, Efros AA (2019) Everybody dance now. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp 5933\u20135942","DOI":"10.1109\/ICCV.2019.00603"},{"key":"21138_CR2","doi-asserted-by":"crossref","unstructured":"Wang TC, Mallya A, Liu MY (2021) One-shot free-view neural talking-head synthesis for video conferencing. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 10039\u201310049","DOI":"10.1109\/CVPR46437.2021.00991"},{"issue":"3","key":"21138_CR3","doi-asserted-by":"publisher","first-page":"29506","DOI":"10.2196\/29506","volume":"24","author":"HC Yang","year":"2022","unstructured":"Yang HC, Rahmanti AR, Huang CW, Li YC (2022) How can research on artificial empathy be enhanced by applying deepfakes? J Med Internet Res 24(3):29506","journal-title":"J Med Internet Res"},{"issue":"6","key":"21138_CR4","doi-asserted-by":"publisher","first-page":"2806","DOI":"10.1109\/TPAMI.2020.3045007","volume":"44","author":"S Oprea","year":"2020","unstructured":"Oprea S, Martinez-Gonzalez P, Garcia-Garcia A, Castro-Vargas JA, Orts-Escolano S, Garcia-Rodriguez J, Argyros A (2020) A review on deep learning techniques for video prediction. IEEE Trans Pattern Anal Mach Intell 44(6):2806\u20132826","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"21138_CR5","doi-asserted-by":"crossref","unstructured":"Siarohin A, Lathuili\u00e8re S, Tulyakov S, Ricci E, Sebe N (2019) Animating arbitrary objects via deep motion transfer. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 2377\u20132386","DOI":"10.1109\/CVPR.2019.00248"},{"key":"21138_CR6","unstructured":"Siarohin A, Lathuili\u00e8re S, Tulyakov S, Ricci E, Sebe N (2019) First order motion model for image animation. Adv Neural Inf Process Syst 32"},{"key":"21138_CR7","doi-asserted-by":"crossref","unstructured":"Pondaven A, Siarohin A, Tulyakov S, Torr P, Pizzati F (2024) Video motion transfer with diffusion transformers. arXiv:2412.07776","DOI":"10.1109\/CVPR52734.2025.02133"},{"key":"21138_CR8","unstructured":"Chung J, Kastner K, Dinh L, Goel K, Courville AC, Bengio Y (2015) A recurrent latent variable model for sequential data. In: Advances in Neural Information Processing Systems (NeurIPS), vol 28"},{"key":"21138_CR9","unstructured":"Rasul K, Sheikh AS, Schuster I, Bergmann U, Vollgraf R (2020) Multivariate probabilistic time series forecasting via conditioned normalizing flows. arXiv:2002.06103"},{"key":"21138_CR10","doi-asserted-by":"crossref","unstructured":"Bai X, Haque T, Mohan S, Cai Y, Jeong B, Halasz A, Das S (2024) Enhancing bandwidth efficiency for video motion transfer applications using deep learning based keypoint prediction. In: International Conference on Engineering Applications of Neural Networks. Springer, Cham, pp 134\u2013151","DOI":"10.1007\/978-3-031-62495-7_11"},{"key":"21138_CR11","doi-asserted-by":"crossref","unstructured":"Zhao J, Zhang H (2022) Thin-plate spline motion model for image animation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 3657\u20133666","DOI":"10.1109\/CVPR52688.2022.00364"},{"key":"21138_CR12","unstructured":"Luc P, Clark A, Dieleman S, Casas DD, Doron Y, Cassirer A, Simonyan K (2020) Transformation-based adversarial video prediction on large-scale data. arXiv:2003.04035"},{"key":"21138_CR13","unstructured":"Mathieu M, Couprie C, LeCun Y (2015) Deep multi-scale video prediction beyond mean square error. arXiv preprint. arXiv:1511.05440"},{"key":"21138_CR14","unstructured":"Shi X, Chen Z, Wang H, Yeung D-Y, Wong W-K, Woo W-C (2015) Convolutional lstm network: A machine learning approach for precipitation nowcasting. In: Advances in Neural Information Processing Systems (NeurIPS), vol 28"},{"key":"21138_CR15","unstructured":"Wang Y, Jiang L, Yang MH, Li LJ, Long M, Fei-Fei L (2018) Eidetic 3d lstm: A model for video prediction and beyond. In: International Conference on Learning Representations (ICLR)"},{"key":"21138_CR16","unstructured":"Villar-Corrales A, Karapetyan A, Boltres A, Behnke S (2022) Mspred: Video prediction at multiple spatio-temporal scales with hierarchical recurrent networks. arXiv:2203.09303"},{"key":"21138_CR17","doi-asserted-by":"crossref","unstructured":"Gao Z, Tan C, Wu L, Li SZ (2022) Simvp: Simpler yet better video prediction. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 3170\u20133180","DOI":"10.1109\/CVPR52688.2022.00317"},{"key":"21138_CR18","doi-asserted-by":"crossref","unstructured":"Shi Z, Xu X, Liu X, Chen J, Yang M-H (2022) Video frame interpolation transformer. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 17482\u201317491","DOI":"10.1109\/CVPR52688.2022.01696"},{"key":"21138_CR19","doi-asserted-by":"publisher","first-page":"104612","DOI":"10.1016\/j.imavis.2022.104612","volume":"130","author":"X Ye","year":"2023","unstructured":"Ye X, Bilodeau G-A (2023) Video prediction by efficient transformers. Image Vis Comput 130:104612","journal-title":"Image Vis Comput"},{"key":"21138_CR20","doi-asserted-by":"crossref","unstructured":"Lu L, Wu R, Lin H, Lu J, Jia J (2022) Video frame interpolation with transformer. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 3532\u20133542","DOI":"10.1109\/CVPR52688.2022.00352"},{"key":"21138_CR21","unstructured":"Ming R, Huang Z, Ju Z, Hu J, Peng L, Zhou S (2024) A survey on video prediction: From deterministic to generative approaches. arXiv:2401.14718"},{"key":"21138_CR22","doi-asserted-by":"crossref","unstructured":"Xu Z, Wang Y, Long M, Wang J, Kliss M (2018) Predcnn: Predictive learning with cascade convolutions. In: Proceedings of the International Joint Conference on Artificial Intelligence (IJCAI), pp 2940\u20132947","DOI":"10.24963\/ijcai.2018\/408"},{"key":"21138_CR23","doi-asserted-by":"crossref","unstructured":"Reda FA, Liu G, Shih KJ, Kirby R, Barker J, Tarjan D, Tao A, Catanzaro B (2018) Sdc-net: Video prediction using spatially-displaced convolution. In: Proceedings of the European Conference on Computer Vision (ECCV), pp 718\u2013733","DOI":"10.1007\/978-3-030-01234-2_44"},{"key":"21138_CR24","doi-asserted-by":"crossref","unstructured":"Aigner S, K\u00f6rner M (2018) Futuregan: Anticipating the future frames of video sequences using spatio-temporal 3d convolutions in progressively growing gans. arXiv:1810.01325","DOI":"10.5194\/isprs-archives-XLII-2-W16-3-2019"},{"key":"21138_CR25","unstructured":"Jang Y, Kim G, Song Y (2018) Video prediction with appearance and motion conditions. In: Proceedings of the International Conference on Machine Learning (ICML)"},{"key":"21138_CR26","unstructured":"Pottorff R, Nielsen J, Wingate D (2019) Video extrapolation with an invertible linear embedding. arXiv:1903.00133"},{"key":"21138_CR27","doi-asserted-by":"crossref","unstructured":"Lopez R, Huang TS (1995) Head pose computation for very low bit-rate video coding. In: International Conference on Computer Analysis of Images and Patterns. Springer, Berlin, Heidelberg, pp 440\u2013447","DOI":"10.1007\/3-540-60268-2_327"},{"issue":"14","key":"21138_CR28","doi-asserted-by":"publisher","first-page":"1031","DOI":"10.1016\/S0262-8856(99)00005-0","volume":"17","author":"I Koufakis","year":"1999","unstructured":"Koufakis I, Buxton BF (1999) Very low bit rate face video compression using linear combination of 2d face views and principal components analysis. Image Vis Comput 17(14):1031\u20131051","journal-title":"Image Vis Comput"},{"key":"21138_CR29","doi-asserted-by":"crossref","unstructured":"Walker J, Marino K, Gupta A, Hebert M (2017) The pose knows: Video forecasting by generating pose futures. In: Proceedings of the IEEE International Conference on Computer Vision (ICCV), pp 3332\u20133341","DOI":"10.1109\/ICCV.2017.361"},{"key":"21138_CR30","unstructured":"Zhou Y, Luo C, Sun X, Zha ZJ, Zeng W (2021) Vae2: Preventing posterior collapse of variational video predictions in the wild. arXiv:2101.12050"},{"key":"21138_CR31","doi-asserted-by":"crossref","unstructured":"Tang J, Hu H, Zhou Q, Shan H, Tian C, Quek TQ (2019) Pose guided global and local gan for appearance preserving human video prediction. In: 2019 IEEE International Conference on Image Processing (ICIP), pp 614\u2013618. IEEE","DOI":"10.1109\/ICIP.2019.8803792"},{"key":"21138_CR32","unstructured":"Villegas R, Erhan D, Lee H et al (2018) Hierarchical long-term video prediction without supervision. In: International Conference on Machine Learning, pp 6038\u20136046. PMLR"},{"key":"21138_CR33","unstructured":"Villegas R, Yang J, Zou Y, Sohn S, Lin X, Lee H (2017) Learning to generate long-term future via hierarchical prediction. In: International Conference on Machine Learning, pp 3560\u20133569. PMLR"},{"key":"21138_CR34","unstructured":"Ranzato M, Szlam A, Bruna J, Mathieu M, Collobert R, Chopra S (2014) Video (language) modeling: A baseline for generative models of natural videos. arXiv:1412.6604"},{"key":"21138_CR35","doi-asserted-by":"crossref","unstructured":"Terwilliger A, Brazil G, Liu X (2019) Recurrent flow-guided semantic forecasting. In: 2019 IEEE Winter Conference on Applications of Computer Vision (WACV), pp 1703\u20131712. IEEE","DOI":"10.1109\/WACV.2019.00186"},{"issue":"8","key":"21138_CR36","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter S, Schmidhuber J (1997) Long short-term memory. Neural Comput 9(8):1735\u20131780","journal-title":"Neural Comput"},{"key":"21138_CR37","unstructured":"Minderer M, Sun C, Villegas R, Cole F, Murphy KP, Lee H (2019) Unsupervised learning of object structure and dynamics from videos. In: Advances in Neural Information Processing Systems (NeurIPS), vol 32"},{"key":"21138_CR38","doi-asserted-by":"crossref","unstructured":"Esser P, Rombach R, Ommer B (2021) Taming transformers for high-resolution image synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","DOI":"10.1109\/CVPR46437.2021.01268"},{"key":"21138_CR39","unstructured":"Walker J, Razavi A, Oord A (2021) Predicting video with vqvae. arXiv:2103.01950"},{"key":"21138_CR40","unstructured":"Shrivastava G, Shrivastava A (2021) Diverse video generation using a gaussian process trigger. arXiv:2107.04619"},{"key":"21138_CR41","unstructured":"Goroshin R, Mathieu MF, LeCun Y (2015) Learning to linearize under uncertainty. In: Advances in Neural Information Processing Systems (NeurIPS), vol 28"},{"key":"21138_CR42","doi-asserted-by":"crossref","unstructured":"Hu A, Cotter F, Mohan N, Gurau C, Kendall A (2020) Probabilistic future prediction for video scene understanding. In: European Conference on Computer Vision, pp 767\u2013785. Springer","DOI":"10.1007\/978-3-030-58517-4_45"},{"key":"21138_CR43","unstructured":"Fragkiadaki K, Huang J, Alemi A, Vijayanarasimhan S, Ricco S, Sukthankar R (2017) Motion prediction under multimodality with conditional stochastic networks. arXiv:1705.02082"},{"key":"21138_CR44","unstructured":"Babaeizadeh M, Finn C, Erhan D, Campbell RH, Levine S (2017) Stochastic variational video prediction. arXiv:1710.11252"},{"key":"21138_CR45","unstructured":"Denton E, Fergus R (2018) Stochastic video generation with a learned prior. In: International Conference on Machine Learning, pp 1174\u20131183. PMLR"},{"key":"21138_CR46","unstructured":"Xue T, Wu J, Bouman K, Freeman B (2016) Visual dynamics: Probabilistic future frame synthesis via cross convolutional networks. In: Advances in Neural Information Processing Systems (NeurIPS), vol 29"},{"key":"21138_CR47","unstructured":"Yang D, Hong S, Jang Y, Zhao T, Lee H (2019) Diversity-sensitive conditional generative adversarial networks. arXiv:1901.09024"},{"key":"21138_CR48","unstructured":"S\u00f8nderby CK, Raiko T, Maal\u00f8e L, S\u00f8nderby SK, Winther O (2016) Ladder variational autoencoders. In: Advances in Neural Information Processing Systems (NeurIPS), vol 29"},{"key":"21138_CR49","unstructured":"Lee AX, Zhang R, Ebert F, Abbeel P, Finn C, Levine S (2018) Stochastic adversarial video prediction. arXiv:1804.01523"},{"key":"21138_CR50","unstructured":"Gur S, Benaim S, Wolf L (2020) Hierarchical patch vae-gan: Generating diverse videos from a single sample. In: Advances in Neural Information Processing Systems (NeurIPS), vol 33, pp 16761\u201316772"},{"key":"21138_CR51","unstructured":"Kumar M, Babaeizadeh M, Erhan D, Finn C, Levine S, Dinh L, Kingma D (2019) Videoflow: A conditional flow-based model for stochastic video generation. arXiv:1903.01434"},{"key":"21138_CR52","unstructured":"Ma YJ, Inala JP, Jayaraman D, Bastani O (2020) Diverse sampling for normalizing flow based trajectory forecasting (7(8)). arXiv:2011.15084"},{"key":"21138_CR53","doi-asserted-by":"crossref","unstructured":"Zakharov E, Shysheya A, Burkov E, Lempitsky V (2019) Few-shot adversarial learning of realistic neural talking head models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp 9459\u20139468","DOI":"10.1109\/ICCV.2019.00955"},{"key":"21138_CR54","unstructured":"Guo J, Zhang D, Liu X, Zhong Z, Zhang Y, Wan P, Zhang D (2024) Liveportrait: Efficient portrait animation with stitching and retargeting control. arXiv:2407.03168"},{"key":"21138_CR55","doi-asserted-by":"crossref","unstructured":"Wang H, Liu F, Zhou Q, Yi R, Tan X, Ma L (2024) Continuous piecewise-affine based motion model for image animation. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol 38, pp 5427\u20135435","DOI":"10.1609\/aaai.v38i6.28351"},{"key":"21138_CR56","unstructured":"Bommasani Rea (2021) On the opportunities and risks of foundation models. arXiv:2108.07258"},{"key":"21138_CR57","doi-asserted-by":"crossref","unstructured":"Kirillov A, Mintun E, Ravi N, Mao H, Rolland C, Gustafson L, Xiao T, Whitehead S, Berg AC, Lo W-Y et al (2023) Segment anything. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 4015\u20134026","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"21138_CR58","doi-asserted-by":"crossref","unstructured":"Tumanyan N, Bar-Tal O, Bagon S, Dekel T (2022) Splicing vit features for semantic appearance transfer. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 10748\u201310757","DOI":"10.1109\/CVPR52688.2022.01048"},{"key":"21138_CR59","unstructured":"Yesiltepe H, Meral THS, Dunlop C, Yanardag P (2024) Motionshop: Zero-shot motion transfer in video diffusion models with mixture of score guidance. arXiv:2412.05355"},{"key":"21138_CR60","unstructured":"Meral THS, Yesiltepe H, Dunlop C, Yanardag P (2024) Motionflow: Attention-driven motion transfer in video diffusion models. arXiv:2412.05275"},{"key":"21138_CR61","doi-asserted-by":"crossref","unstructured":"Thewlis J, Bilen H, Vedaldi A (2017) Unsupervised learning of object landmarks by factorized spatial embeddings. In: Proceedings of the IEEE International Conference on Computer Vision, pp 5916\u20135925","DOI":"10.1109\/ICCV.2017.348"},{"key":"21138_CR62","doi-asserted-by":"crossref","unstructured":"Zhang Y, Guo Y, Jin Y, Luo Y, He Z, Lee H (2018) Unsupervised discovery of object landmarks as structural representations. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 2694\u20132703","DOI":"10.1109\/CVPR.2018.00285"},{"key":"21138_CR63","unstructured":"Cho K, Van\u00a0Merri\u00ebnboer B, Gulcehre C, Bahdanau D, Bougares F, Schwenk H, Bengio Y (2014) Empirical evaluation of gated recurrent neural networks on sequence modeling. arXiv:1406.1078"},{"key":"21138_CR64","unstructured":"Kingma DP (2013) Auto-encoding variational bayes. arXiv:1312.6114"},{"key":"21138_CR65","unstructured":"Rezende D, Mohamed S (2015) Variational inference with normalizing flows. In: International Conference on Machine Learning, pp 1530\u20131538. PMLR"},{"key":"21138_CR66","unstructured":"Rezende DJ, Mohamed S, Wierstra D (2014) Stochastic backpropagation and approximate inference in deep generative models. In: Proceedings of the 31st International Conference on Machine Learning (ICML)"},{"key":"21138_CR67","doi-asserted-by":"crossref","unstructured":"Ullah S, Xu Z, Wang H, Menzel S, Sendhoff B, B\u00e4ck T (2020) Exploring clinical time series forecasting with meta-features in variational recurrent models. In: 2020 International Joint Conference on Neural Networks (IJCNN), pp 1\u20139. IEEE","DOI":"10.1109\/IJCNN48605.2020.9207254"},{"issue":"57","key":"21138_CR68","first-page":"1","volume":"22","author":"G Papamakarios","year":"2021","unstructured":"Papamakarios G, Nalisnick E, Rezende DJ, Mohamed S, Lakshminarayanan B (2021) Normalizing flows for probabilistic modeling and inference. J Mach Learn Res 22(57):1\u201364","journal-title":"J Mach Learn Res"},{"key":"21138_CR69","unstructured":"Dinh L, Sohl-Dickstein J, Bengio S (2016) Density estimation using real nvp. arXiv:1605.08803"},{"key":"21138_CR70","unstructured":"Luo GY, Favero GM, Luo ZH, Jolicoeur-Martineau A, Pal C (2024) Beyond fvd: Enhanced evaluation metrics for video generation quality. arXiv:2410.05203"},{"key":"21138_CR71","unstructured":"Unterthiner T, Oord A, Heusel M, Ramsauer H, Nessler B, Hochreiter S (2018) Towards accurate generative models of video: A new metric & challenges. NeurIPS Workshop on Challenges and Opportunities for Deep Learning in Autonomous Driving. arXiv:1812.01717 [cs.LG]"},{"issue":"1","key":"21138_CR72","first-page":"723","volume":"13","author":"A Gretton","year":"2012","unstructured":"Gretton A, Borgwardt KM, Rasch MJ, Sch\u00f6lkopf B, Smola A (2012) A kernel two-sample test. J Mach Learn Res 13(1):723\u2013773","journal-title":"J Mach Learn Res"},{"key":"21138_CR73","unstructured":"Chen J, Cao J, Lin D, Kitani K, Pang J (2024) Mixed gaussian flow for diverse trajectory prediction. arXiv:2402.12238"},{"issue":"4","key":"21138_CR74","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1109\/TIP.2003.819861","volume":"13","author":"Z Wang","year":"2004","unstructured":"Wang Z, Bovik AC, Sheikh HR, Simoncelli EP (2004) Image quality assessment: From error visibility to structural similarity. IEEE Trans Image Process 13(4):600\u2013612. https:\/\/doi.org\/10.1109\/TIP.2003.819861","journal-title":"IEEE Trans Image Process"},{"key":"21138_CR75","doi-asserted-by":"crossref","unstructured":"Kotevski Z, Mitrevski P (2009) Experimental comparison of psnr and ssim metrics for video quality estimation. In: International Conference on ICT Innovations, pp 357\u2013366. Springer, Berlin, Heidelberg","DOI":"10.1007\/978-3-642-10781-8_37"},{"issue":"16","key":"21138_CR76","first-page":"23","volume":"12","author":"F Ebert","year":"2017","unstructured":"Ebert F, Finn C, Lee AX, Levine S (2017) Self-supervised visual planning with temporal skip connections. CoRL 12(16):23","journal-title":"CoRL"},{"key":"21138_CR77","doi-asserted-by":"crossref","unstructured":"Nagrani A, Chung JS, Zisserman A (2017) Voxceleb: a large-scale speaker identification dataset. arXiv:1706.08612","DOI":"10.21437\/Interspeech.2017-950"},{"key":"21138_CR78","doi-asserted-by":"crossref","unstructured":"Rehman A, Zeng K, Wang Z (2015) Display device-adapted video quality-of-experience assessment. In: Human Vision and Electronic Imaging XX, vol 9394, pp 27\u201337. SPIE","DOI":"10.1117\/12.2077917"},{"key":"21138_CR79","unstructured":"Facebook AI Research (FAIR): Fvcore: A Light-weight Core Library for Computer Vision. Apache-2.0 license. Common utilities used by Detectron2, PySlowFast, etc"},{"key":"21138_CR80","doi-asserted-by":"crossref","unstructured":"Gao Y, He Y, Li X, Zhao B, Lin H, Liang Y, Zhong J, Zhang H, Wang J, Zeng Y et al (2024) An empirical study on low gpu utilization of deep learning jobs. In: Proceedings of the IEEE\/ACM 46th International Conference on Software Engineering, pp 1\u201313","DOI":"10.1145\/3597503.3639232"},{"key":"21138_CR81","unstructured":"Zoom Video Communications, Inc (n.d.) Zoom system requirements: Windows, macOS, Linux. https:\/\/support.zoom.com\/hc\/en\/article?id=zm_kb&sysparm_article=KB0060748. Zoom Support article. Accessed 13 Oct 2025"},{"key":"21138_CR82","unstructured":"Cisco Webex (2023) What are the Minimum Bandwidth Requirements for Sending and Receiving Video in Cisco Webex Meetings? https:\/\/help.webex.com\/en-us\/article\/WBX22158\/What-are-the-Minimum-Bandwidth-Requirements-for-Sending-and-Receiving-Video-inCisco-Webex-Meetings. Webex Help Center article. Last updated Dec 25, 2023. Accessed 13 Oct 2025"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-026-21138-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-026-21138-4","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-026-21138-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,29]],"date-time":"2026-01-29T14:40:24Z","timestamp":1769697624000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-026-21138-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,1,29]]},"references-count":82,"journal-issue":{"issue":"2","published-online":{"date-parts":[[2026,2]]}},"alternative-id":["21138"],"URL":"https:\/\/doi.org\/10.1007\/s11042-026-21138-4","relation":{},"ISSN":["1573-7721"],"issn-type":[{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,1,29]]},"assertion":[{"value":"10 June 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 December 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 December 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 January 2026","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflicts of Interest Statement"}}],"article-number":"51"}}