{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T19:03:23Z","timestamp":1777921403999,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":15,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3685517","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:49Z","timestamp":1729925989000},"page":"11218-11221","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":46,"title":["TransNet V2: An Effective Deep Network Architecture for Fast Shot Transition Detection"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6911-5517","authenticated-orcid":false,"given":"Tom\u00e1s","family":"Soucek","sequence":"first","affiliation":[{"name":"SIRET, Department of Software Engineering, Faculty of Mathematics and Physics, Charles University, Prague, Czech Republic"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3558-4144","authenticated-orcid":false,"given":"Jakub","family":"Lokoc","sequence":"additional","affiliation":[{"name":"SIRET, Department of Software Engineering, Faculty of Mathematics and Physics, Charles University, Prague, Czech Republic"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"TRECVID 2017: Evaluating Ad-hoc and Instance Video Search, Events Detection, Video Captioning and Hyperlinking. In Proceedings of TRECVID 2017. NIST, USA.","author":"Awad G.","unstructured":"G. Awad, A. Butt, J. Fiscus, M. Michel, D. Joy, W. Kraaij, A. Smeaton, G. Qu\u00e9not, M. Eskevich, E. Ordelman, G. Jones, and B. Huet. 2017. TRECVID 2017: Evaluating Ad-hoc and Instance Video Search, Events Detection, Video Captioning and Hyperlinking. In Proceedings of TRECVID 2017. NIST, USA."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/2733373.2806316"},{"key":"e_1_3_2_1_3_1","volume-title":"Computer Analysis of Images and Patterns,","author":"Baraldi Lorenzo","unstructured":"Lorenzo Baraldi, Costantino Grana, and Rita Cucchiara. 2015. Shot and Scene Detection via Hierarchical Clustering for Re-using Broadcast Video. In Computer Analysis of Images and Patterns,, George Azzopardi and Nicolai Petkov (Eds.). Springer International Publishing, Cham, 801--811."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patrec.2008.08.015"},{"key":"e_1_3_2_1_5_1","volume-title":"Ridiculously Fast Shot Boundary Detection with Fully Convolutional Neural Networks. CoRR","author":"Gygli Michael","year":"2017","unstructured":"Michael Gygli. 2017. Ridiculously Fast Shot Boundary Detection with Fully Convolutional Neural Networks. CoRR, Vol. abs\/1705.08214 (2017). arxiv: 1705.08214 http:\/\/arxiv.org\/abs\/1705.08214"},{"key":"e_1_3_2_1_6_1","volume-title":"Fast and Accurate Shot Boundary Detection through Spatio-temporal Convolutional Neural Networks. CoRR","author":"Hassanien Ahmed","year":"2017","unstructured":"Ahmed Hassanien, Mohamed A. Elgharib, Ahmed Selim, Mohamed Hefeeda, and Wojciech Matusik. 2017. Large-scale, Fast and Accurate Shot Boundary Detection through Spatio-temporal Convolutional Neural Networks. CoRR, Vol. abs\/1705.03281 (2017). arxiv: 1705.03281 http:\/\/arxiv.org\/abs\/1705.03281"},{"key":"e_1_3_2_1_7_1","volume-title":"Large-scale Video Classification with Convolutional Neural Networks. In The IEEE Conference on Computer Vision and Pattern Recognition (CVPR).","author":"Karpathy Andrej","year":"2014","unstructured":"Andrej Karpathy, George Toderici, Sanketh Shetty, Thomas Leung, Rahul Sukthankar, and Li Fei-Fei. 2014. Large-scale Video Classification with Convolutional Neural Networks. In The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3351046"},{"key":"e_1_3_2_1_9_1","volume-title":"Interactive Video Retrieval in the Age of Deep Learning - Detailed Evaluation of VBS 2019","author":"Rossetto L.","year":"2020","unstructured":"L. Rossetto, R. Gasser, J. Lokoc, W. Bailer, K. Schoeffmann, B. Muenzer, T. Soucek, P. A. Nguyen, P. Bolettieri, A. Leibetseder, and S. Vrochidis. 2020. Interactive Video Retrieval in the Age of Deep Learning - Detailed Evaluation of VBS 2019. IEEE Transactions on Multimedia (2020), 1--1."},{"key":"e_1_3_2_1_10_1","volume-title":"Butt","author":"Rossetto Luca","year":"2019","unstructured":"Luca Rossetto, Heiko Schuldt, George Awad, and Asad A. Butt. 2019. V3C -- A Research Video Collection. In MultiMedia Modeling. Springer International Publishing, Cham, 349--360."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2009.03.011"},{"key":"e_1_3_2_1_12_1","unstructured":"Tom\u00e1vs Souvcek. 2020. Deep learning based approaches for shot transition detection and known-item search in video. (2020)."},{"key":"e_1_3_2_1_13_1","volume-title":"Fast Video Shot Transition Localization with Deep Structured Models. CoRR","author":"Tang Shitao","year":"2018","unstructured":"Shitao Tang, Litong Feng, Zhanghui Kuang, Yimin Chen, and Wei Zhang. 2018. Fast Video Shot Transition Localization with Deep Structured Models. CoRR, Vol. abs\/1808.04234 (2018). arxiv: 1808.04234 http:\/\/arxiv.org\/abs\/1808.04234"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.510"},{"key":"e_1_3_2_1_15_1","volume-title":"Rethinking Spatiotemporal Feature Learning: Speed-Accuracy Trade-offs in Video Classification. In The European Conference on Computer Vision (ECCV).","author":"Xie Saining","year":"2018","unstructured":"Saining Xie, Chen Sun, Jonathan Huang, Zhuowen Tu, and Kevin Murphy. 2018. Rethinking Spatiotemporal Feature Learning: Speed-Accuracy Trade-offs in Video Classification. In The European Conference on Computer Vision (ECCV)."}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3685517","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3685517","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:28Z","timestamp":1750295848000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3685517"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":15,"alternative-id":["10.1145\/3664647.3685517","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3685517","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}