{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,28]],"date-time":"2026-06-28T08:00:39Z","timestamp":1782633639878,"version":"3.54.5"},"reference-count":30,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2025,2,13]],"date-time":"2025-02-13T00:00:00Z","timestamp":1739404800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,2,13]],"date-time":"2025-02-13T00:00:00Z","timestamp":1739404800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SIViP"],"published-print":{"date-parts":[[2025,4]]},"DOI":"10.1007\/s11760-025-03835-z","type":"journal-article","created":{"date-parts":[[2025,2,13]],"date-time":"2025-02-13T10:26:01Z","timestamp":1739442361000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Enhancing interpretability in film shot analysis through continuous shot integration and saliency maps"],"prefix":"10.1007","volume":"19","author":[{"given":"Fengtian","family":"Lu","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yuzhi","family":"Li","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Feng","family":"Tian","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2025,2,13]]},"reference":[{"issue":"28","key":"3835_CR1","doi-asserted-by":"crossref","first-page":"40289","DOI":"10.1007\/s11042-022-13111-8","volume":"81","author":"Z Chen","year":"2022","unstructured":"Chen, Z., Zhang, Y., Zhang, S., Yang, C.: Study on location bias of cnn for shot scale classification. Multimedia Tools and Applications 81(28), 40289\u201340309 (2022)","journal-title":"Multimedia Tools and Applications"},{"key":"3835_CR2","doi-asserted-by":"crossref","unstructured":"Chen, Zeyu, Zhang, Yana, Zhang, Lianyi, Yang, Cheng: Ro-textcnn based mul-move-net for camera motion classification. In: 2021 IEEE\/ACIS 20th International Fall Conference on Computer and Information Science (ICIS Fall), pp. 182\u2013186. IEEE (2021)","DOI":"10.1109\/ICISFall51598.2021.9627386"},{"key":"3835_CR3","doi-asserted-by":"crossref","unstructured":"Rao, Anyi, Wang, Jiaze, Xu, Linning, Jiang, Xuekun, Huang, Qingqiu, Zhou, Bolei, Lin, Dahua: A unified framework for shot type classification based on subject centric lens. In: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XI 16, pp. 17\u201334. Springer (2020)","DOI":"10.1007\/978-3-030-58621-8_2"},{"key":"3835_CR4","doi-asserted-by":"crossref","unstructured":"Zhang, Qinglong, Rao, Lu, Yang, Yubin: Group-cam: Group score-weighted visual explanations for deep convolutional networks. arXiv preprint arXiv:2103.13859 (2021)","DOI":"10.1109\/CVPRW50498.2020.00020"},{"key":"3835_CR5","unstructured":"Petsiuk, Vitali, Das, Abir, Saenko, Kate: Rise: Randomized input sampling for explanation of black-box models. arXiv preprint arXiv:1806.07421 (2018)"},{"key":"3835_CR6","doi-asserted-by":"crossref","unstructured":"Selvaraju, Ramprasaath\u00a0R., Cogswell, Michael, Das, Abhishek, Vedantam, Ramakrishna, Parikh, Devi, Batra, Dhruv: Grad-cam: Visual explanations from deep networks via gradient-based localization. In: Proceedings of the IEEE international conference on computer vision, pp. 618\u2013626 (2017)","DOI":"10.1109\/ICCV.2017.74"},{"key":"3835_CR7","doi-asserted-by":"crossref","unstructured":"Kapishnikov, Andrei, Bolukbasi, Tolga, Vi\u00e9gas, Fernanda, Terry, Michael: Xrai: Better attributions through regions. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 4948\u20134957 (2019)","DOI":"10.1109\/ICCV.2019.00505"},{"key":"3835_CR8","unstructured":"Devlin, Jacob, Chang, Ming-Wei, Lee, Kenton, Toutanova, Kristina: Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"3835_CR9","volume-title":"Karthik Narasimhan","author":"A Radford","year":"2018","unstructured":"Radford, A.: Karthik Narasimhan. Ilya Sutskever, et al. Improving language understanding by generative pre-training, Tim Salimans (2018)"},{"key":"3835_CR10","unstructured":"Fengtian, L., Li, Y., Tian, F.: Exploring challenge and explainable shot type classification using sam-guided approaches, pp. 1\u201310. Signal, Image and Video Processing (2024)"},{"key":"3835_CR11","doi-asserted-by":"crossref","unstructured":"He, Kaiming, Chen, Xinlei, Xie, Saining, Li, Yanghao, Doll\u00e1r, Piotr, Girshick, Ross: Masked autoencoders are scalable vision learners. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 16000\u201316009 (2022)","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"3835_CR12","first-page":"38204","volume":"35","author":"Y Tatsunami","year":"2022","unstructured":"Tatsunami, Y., Taki, M.: Sequencer: Deep lstm for image classification. Adv. Neural. Inf. Process. Syst. 35, 38204\u201338217 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"issue":"8","key":"3835_CR13","doi-asserted-by":"crossref","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"issue":"10","key":"3835_CR14","doi-asserted-by":"crossref","first-page":"1682","DOI":"10.1109\/TCSVT.2014.2345933","volume":"24","author":"MA Hasan","year":"2014","unstructured":"Hasan, M.A., Xu, M., He, X., Xu, C.: Camhid: Camera motion histogram descriptor and its application to cinematographic shot classification. IEEE Trans. Circuits Syst. Video Technol. 24(10), 1682\u20131695 (2014)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"3835_CR15","doi-asserted-by":"crossref","unstructured":"Prasertsakul, Pawin, Kondo, Toshiaki, Iida, Hiroyuki: Video shot classification using 2d motion histogram. In: 2017 14th International conference on electrical engineering\/electronics, computer, telecommunications and information technology (ECTI-CON), pp. 202\u2013205. IEEE (2017)","DOI":"10.1109\/ECTICon.2017.8096208"},{"key":"3835_CR16","unstructured":"Krizhevsky, Alex, Sutskever, Ilya, Hinton, Geoffrey\u00a0E.: Imagenet classification with deep convolutional neural networks. Advances in neural information processing systems, 25 (2012)"},{"key":"3835_CR17","doi-asserted-by":"crossref","unstructured":"Argaw, Dawit\u00a0Mureja, Heilbron, Fabian\u00a0Caba, Lee, Joon-Young, Woodson, Markus, Kweon, In\u00a0So: The anatomy of video editing: a dataset and benchmark suite for ai-assisted video editing. In: European Conference on Computer Vision, pp. 201\u2013218. Springer (2022)","DOI":"10.1007\/978-3-031-20074-8_12"},{"issue":"11","key":"3835_CR18","doi-asserted-by":"crossref","first-page":"2673","DOI":"10.1109\/78.650093","volume":"45","author":"M Schuster","year":"1997","unstructured":"Schuster, M., Paliwal, K.K.: Bidirectional recurrent neural networks. IEEE Trans. Signal Process. 45(11), 2673\u20132681 (1997)","journal-title":"IEEE Trans. Signal Process."},{"key":"3835_CR19","doi-asserted-by":"crossref","unstructured":"Liu, Ze, Lin, Yutong, Cao, Yue, Hu, Han, Wei, Yixuan, Zhang, Zheng, Lin, Stephen, Guo, Baining: Swin transformer: Hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"3835_CR20","doi-asserted-by":"crossref","unstructured":"Carreira, Joao, Zisserman, Andrew: Quo vadis, action recognition? a new model and the kinetics dataset. In: proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6299\u20136308 (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"3835_CR21","doi-asserted-by":"crossref","unstructured":"Wang, Limin, Xiong, Yuanjun, Wang, Zhe, Qiao, Yu, Lin, Dahua, Tang, Xiaoou, Van\u00a0Gool, Luc: Temporal segment networks: Towards good practices for deep action recognition. In European conference on computer vision, pp. 20\u201336. Springer (2016)","DOI":"10.1007\/978-3-319-46484-8_2"},{"key":"3835_CR22","doi-asserted-by":"crossref","unstructured":"Tran, Du, Bourdev, Lubomir, Fergus, Rob, Torresani, Lorenzo, Paluri, Manohar: Learning spatiotemporal features with 3d convolutional networks. In: Proceedings of the IEEE international conference on computer vision, pp. 4489\u20134497 (2015)","DOI":"10.1109\/ICCV.2015.510"},{"key":"3835_CR23","doi-asserted-by":"crossref","unstructured":"Tran, Du, Wang, Heng, Torresani, Lorenzo, Ray, Jamie, LeCun, Yann, Paluri, Manohar: A closer look at spatiotemporal convolutions for action recognition. In: Proceedings of the IEEE conference on Computer Vision and Pattern Recognition, pp. 6450\u20136459 (2018)","DOI":"10.1109\/CVPR.2018.00675"},{"key":"3835_CR24","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, Christoph: X3d: Expanding architectures for efficient video recognition. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 203\u2013213 (2020)","DOI":"10.1109\/CVPR42600.2020.00028"},{"key":"3835_CR25","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, Christoph, Fan, Haoqi, Malik, Jitendra, He, Kaiming: Slowfast networks for video recognition. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp. 6202\u20136211 (2019)","DOI":"10.1109\/ICCV.2019.00630"},{"key":"3835_CR26","doi-asserted-by":"crossref","unstructured":"Arnab, Anurag, Dehghani, Mostafa, Heigold, Georg, Sun, Chen, Lu\u010di\u0107, Mario, Schmid, Cordelia: Vivit: A video vision transformer. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp. 6836\u20136846 (2021)","DOI":"10.1109\/ICCV48922.2021.00676"},{"key":"3835_CR27","doi-asserted-by":"crossref","unstructured":"Fan, Haoqi, Xiong, Bo, Mangalam, Karttikeya, Li, Yanghao, Yan, Zhicheng, Malik, Jitendra, Feichtenhofer, Christoph: Multiscale vision transformers. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp. 6824\u20136835 (2021)","DOI":"10.1109\/ICCV48922.2021.00675"},{"key":"3835_CR28","unstructured":"Vaswani, Ashish, Shazeer, Noam, Parmar, Niki, Uszkoreit, Jakob, Jones, Llion, Gomez, Aidan\u00a0N., Kaiser, \u0141ukasz, Polosukhin, Illia: Attention is all you need. Advances in neural information processing systems, 30, (2017)"},{"key":"3835_CR29","unstructured":"Kay, Will, Carreira, Joao, Simonyan, Karen, Zhang, Brian, Hillier, Chloe, Vijayanarasimhan, Sudheendra, Viola, Fabio, Green, Tim, Back, Trevor, Natsev, Paul, et\u00a0al.: The kinetics human action video dataset. arXiv preprint arXiv:1705.06950 (2017)"},{"key":"3835_CR30","unstructured":"Dosovitskiy, Alexey, Beyer, Lucas, Kolesnikov, Alexander, Weissenborn, Dirk, Zhai, Xiaohua, Unterthiner, Thomas, Dehghani, Mostafa, Minderer, Matthias, Heigold, Georg, Gelly, Sylvain et\u00a0al.: An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"}],"container-title":["Signal, Image and Video Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-025-03835-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11760-025-03835-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-025-03835-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,2]],"date-time":"2025-04-02T01:18:46Z","timestamp":1743556726000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11760-025-03835-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2,13]]},"references-count":30,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2025,4]]}},"alternative-id":["3835"],"URL":"https:\/\/doi.org\/10.1007\/s11760-025-03835-z","relation":{},"ISSN":["1863-1703","1863-1711"],"issn-type":[{"value":"1863-1703","type":"print"},{"value":"1863-1711","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,2,13]]},"assertion":[{"value":"1 March 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 November 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 January 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 February 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}}],"article-number":"286"}}