{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,3,19]],"date-time":"2024-03-19T20:41:43Z","timestamp":1710880903014},"reference-count":31,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2024,1,5]],"date-time":"2024-01-05T00:00:00Z","timestamp":1704412800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,5]],"date-time":"2024-01-05T00:00:00Z","timestamp":1704412800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SIViP"],"published-print":{"date-parts":[[2024,4]]},"DOI":"10.1007\/s11760-023-02928-x","type":"journal-article","created":{"date-parts":[[2024,1,5]],"date-time":"2024-01-05T20:01:47Z","timestamp":1704484907000},"page":"2533-2542","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Exploring challenge and explainable shot type classification using SAM-guided approaches"],"prefix":"10.1007","volume":"18","author":[{"given":"Fengtian","family":"Lu","sequence":"first","affiliation":[]},{"given":"Yuzhi","family":"Li","sequence":"additional","affiliation":[]},{"given":"Feng","family":"Tian","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,1,5]]},"reference":[{"issue":"28","key":"2928_CR1","doi-asserted-by":"publisher","first-page":"40289","DOI":"10.1007\/s11042-022-13111-8","volume":"81","author":"Zeyu Chen","year":"2022","unstructured":"Chen, Zeyu, Zhang, Yana, Zhang, Suya, Yang, Cheng: Study on location bias of CNN for shot scale classification. Multimed. Tools Appl. 81(28), 40289\u201340309 (2022)","journal-title":"Multimed. Tools Appl."},{"key":"2928_CR2","doi-asserted-by":"crossref","unstructured":"Chen, Z., Zhang, Y., Zhang, L., Yang, C.: RO-TextCNN based MUL-MOVE-net for camera motion classification. In: 2021 IEEE\/ACIS 20th International Fall Conference on Computer and Information Science (ICIS Fall), pp. 182\u2013186. IEEE (2021)","DOI":"10.1109\/ICISFall51598.2021.9627386"},{"key":"2928_CR3","doi-asserted-by":"crossref","unstructured":"Rao, A., Wang, J., Xu, L., Jiang, X., Huang, Q., Zhou, B., Lin, D.: A unified framework for shot type classification based on subject centric lens. In: Computer Vision\u2014ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XI 16, pp. 17\u201334. Springer, Berlin (2020)","DOI":"10.1007\/978-3-030-58621-8_2"},{"key":"2928_CR4","doi-asserted-by":"crossref","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: European Conference on Computer Vision, pp. 213\u2013229. Springer, Berlin (2020)","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"2928_CR5","doi-asserted-by":"crossref","unstructured":"Kirillov, A., Mintun, E., Ravi, N., Mao, H., Rolland, C., Gustafson, L., Xiao, T., Whitehead, S., Berg, A.C., Lo, W.-Y., et al.: Segment anything. arXiv preprint arXiv:2304.02643 (2023)","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"2928_CR6","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., Lawrence Z., C.: Microsoft coco: common objects in context. In: Computer Vision\u2014ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6\u201312, 2014, Proceedings, Part V 13, pp. 740\u2013755. Springer, Berlin (2014)","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"2928_CR7","doi-asserted-by":"crossref","unstructured":"Liu, Z., Lin, Y., Cao, Y., Hu, H., Wei, Y., Zhang, Z., Lin, S., Guo, B.: Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"2928_CR8","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., et al.: An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"2928_CR9","doi-asserted-by":"crossref","unstructured":"Sun, C., Shrivastava, A., Singh, S., Gupta, A.: Revisiting unreasonable effectiveness of data in deep learning era. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 843\u2013852 (2017)","DOI":"10.1109\/ICCV.2017.97"},{"key":"2928_CR10","unstructured":"Touvron, H., Cord, M., Douze, M., Massa, F., Sablayrolles, A., J\u00e9gou, H.: Training data-efficient image transformers & distillation through attention. In: International Conference on Machine Learning, pp. 10347\u201310357. PMLR (2021)"},{"key":"2928_CR11","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.-J., Li, K., Fei-Fei, L.: Imagenet: a large-scale hierarchical image database. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition, pp. 248\u2013255. IEEE (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"2928_CR12","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. Adv. Neural Inf. Process. Syst. 28 (2015)"},{"key":"2928_CR13","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"key":"2928_CR14","unstructured":"Paszke, A., Gross, S., Massa, F., Lerer, A., Bradbury, J., Chanan, G., Killeen, T., Lin, Z., Gimelshein, N., Antiga, L., et al.: Pytorch: an imperative style, high-performance deep learning library. Adv. Neural Inf. Process. Syst. 32 (2019)"},{"key":"2928_CR15","doi-asserted-by":"crossref","unstructured":"Ma, J., Wang, B.: Segment anything in medical images. arXiv preprint arXiv:2304.12306 (2023)","DOI":"10.1038\/s41467-024-44824-z"},{"key":"2928_CR16","unstructured":"Cen, J., Wu, Y., Wang, K., Li, X., Yang, J., Pei, Y., Kong, L., Liu, Z., Chen, Q.: SAD: Segment any RGBD. arXiv preprint arXiv:2305.14207 (2023)"},{"key":"2928_CR17","unstructured":"Chen, T., Zhu, L., Ding, C., Cao, R., Wang, Y., Li, Z., Sun, L., Mao, P., Zang, Y.: Sam fails to segment anything?\u2014SAM-adapter: adapting SAM in underperformed scenes: camouflage, shadow, medical image segmentation, and more"},{"key":"2928_CR18","doi-asserted-by":"crossref","unstructured":"Cheng, H.K., Oh, S.W., Price, B., Schwing, A., Lee, J.-Y.: Tracking anything with decoupled video segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1316\u20131326 (2023)","DOI":"10.1109\/ICCV51070.2023.00127"},{"key":"2928_CR19","doi-asserted-by":"crossref","unstructured":"Hui, T.-W., Tang, X., Loy, C.C.: Liteflownet: a lightweight convolutional neural network for optical flow estimation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 8981\u20138989 (2018)","DOI":"10.1109\/CVPR.2018.00936"},{"key":"2928_CR20","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"2928_CR21","doi-asserted-by":"crossref","unstructured":"Tran, D., Wang, H., Torresani, L., Ray, J., LeCun, Y., Paluri, M.: A closer look at spatiotemporal convolutions for action recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6450\u20136459 (2018)","DOI":"10.1109\/CVPR.2018.00675"},{"key":"2928_CR22","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C.: X3d: Expanding architectures for efficient video recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 203\u2013213 (2020)","DOI":"10.1109\/CVPR42600.2020.00028"},{"key":"2928_CR23","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Fan, H., Malik, J., He, K.: Slowfast networks for video recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6202\u20136211 (2019)","DOI":"10.1109\/ICCV.2019.00630"},{"key":"2928_CR24","doi-asserted-by":"crossref","unstructured":"Arnab, A., Dehghani, M., Heigold, G., Sun, C., Lu\u010di\u0107, M., Schmid, C.: Vivit: A video vision transformer. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6836\u20136846 (2021)","DOI":"10.1109\/ICCV48922.2021.00676"},{"key":"2928_CR25","unstructured":"Kay, W., Carreira, J., Simonyan, K., Zhang, B., Hillier, C., Vijayanarasimhan, S., Viola, F., Green, T., Back, T., Natsev, P., et al.: The kinetics human action video dataset. arXiv preprint arXiv:1705.06950 (2017)"},{"key":"2928_CR26","doi-asserted-by":"crossref","unstructured":"Li, L., Zhang, X., Hu, W., Li, W., Zhu, P.: Soccer video shot classification based on color characterization using dominant sets clustering. In: Advances in Multimedia Information Processing-PCM 2009: 10th Pacific Rim Conference on Multimedia, Bangkok, Thailand, December 15\u201318, 2009 Proceedings 10, pp. 923\u2013929. Springer, Berlin (2009)","DOI":"10.1007\/978-3-642-10467-1_83"},{"issue":"10","key":"2928_CR27","doi-asserted-by":"publisher","first-page":"1682","DOI":"10.1109\/TCSVT.2014.2345933","volume":"24","author":"Muhammad Abul Hasan","year":"2014","unstructured":"Hasan, Muhammad Abul, Min, Xu., He, Xiangjian, Changsheng, Xu.: CAMHID: camera motion histogram descriptor and its application to cinematographic shot classification. IEEE Trans. Circuits Syst. Video Technol. 24(10), 1682\u20131695 (2014)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"2928_CR28","doi-asserted-by":"crossref","unstructured":"Prasertsakul, P., Kondo, T., Iida, H.: Video shot classification using 2d motion histogram. In: 2017 14th International Conference on Electrical Engineering\/Electronics, Computer, Telecommunications and Information Technology (ECTI-CON), pp. 202\u2013205. IEEE (2017)","DOI":"10.1109\/ECTICon.2017.8096208"},{"key":"2928_CR29","doi-asserted-by":"crossref","unstructured":"Vadis, Q., Carreira, J., Zisserman, A.: Action recognition? A new model and the kinetics dataset. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6299\u20136308 (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"2928_CR30","doi-asserted-by":"crossref","unstructured":"Wang, L., Xiong, Y., Wang, Z., Qiao, Y., Lin, D., Tang, X., Van Gool, L.: Temporal segment networks: towards good practices for deep action recognition. In: European Conference on Computer Vision, pp. 20\u201336. Springer, Berlin (2016)","DOI":"10.1007\/978-3-319-46484-8_2"},{"key":"2928_CR31","doi-asserted-by":"crossref","unstructured":"Tran, D., Bourdev, L., Fergus, R., Torresani, L., Paluri, M.: Learning spatiotemporal features with 3d convolutional networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 4489\u20134497 (2015)","DOI":"10.1109\/ICCV.2015.510"}],"container-title":["Signal, Image and Video Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-023-02928-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11760-023-02928-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-023-02928-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,19]],"date-time":"2024-03-19T20:25:33Z","timestamp":1710879933000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11760-023-02928-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,1,5]]},"references-count":31,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2024,4]]}},"alternative-id":["2928"],"URL":"https:\/\/doi.org\/10.1007\/s11760-023-02928-x","relation":{},"ISSN":["1863-1703","1863-1711"],"issn-type":[{"value":"1863-1703","type":"print"},{"value":"1863-1711","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,1,5]]},"assertion":[{"value":"10 November 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 November 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 November 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 January 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}}]}}