{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,6]],"date-time":"2026-06-06T16:17:57Z","timestamp":1780762677893,"version":"3.54.1"},"publisher-location":"Cham","reference-count":30,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030208868","type":"print"},{"value":"9783030208875","type":"electronic"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-030-20887-5_36","type":"book-chapter","created":{"date-parts":[[2019,5,27]],"date-time":"2019-05-27T12:03:59Z","timestamp":1558958639000},"page":"577-592","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":27,"title":["Fast Video Shot Transition Localization with Deep Structured Models"],"prefix":"10.1007","author":[{"given":"Shitao","family":"Tang","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Litong","family":"Feng","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zhanghui","family":"Kuang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yimin","family":"Chen","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Wei","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2019,5,28]]},"reference":[{"key":"36_CR1","doi-asserted-by":"crossref","unstructured":"Apostolidis, E., Mezaris, V.: Fast shot segmentation combining global and local visual descriptors. In: 2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 6583\u20136587. IEEE (2014)","DOI":"10.1109\/ICASSP.2014.6854873"},{"key":"36_CR2","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"801","DOI":"10.1007\/978-3-319-23192-1_67","volume-title":"Computer Analysis of Images and Patterns","author":"L Baraldi","year":"2015","unstructured":"Baraldi, L., Grana, C., Cucchiara, R.: Shot and scene detection via hierarchical clustering for re-using broadcast video. In: Azzopardi, G., Petkov, N. (eds.) CAIP 2015, Part I. LNCS, vol. 9256, pp. 801\u2013811. Springer, Cham (2015). \n                      https:\/\/doi.org\/10.1007\/978-3-319-23192-1_67"},{"key":"36_CR3","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo vadis, action recognition? A new model and the kinetics dataset. In: 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 4724\u20134733. IEEE (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"36_CR4","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: ImageNet: a large-scale hierarchical image database. In: IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2009, pp. 248\u2013255. IEEE (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"issue":"12","key":"36_CR5","doi-asserted-by":"publisher","first-page":"5187","DOI":"10.1109\/TIP.2014.2362652","volume":"23","author":"S Domnic","year":"2014","unstructured":"Domnic, S.: Walsh-Hadamard transform kernel-based feature vector for shot boundary detection. IEEE Trans. Image Process. 23(12), 5187\u20135197 (2014)","journal-title":"IEEE Trans. Image Process."},{"key":"36_CR6","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"768","DOI":"10.1007\/978-3-319-46487-9_47","volume-title":"Computer Vision \u2013 ECCV 2016","author":"V Escorcia","year":"2016","unstructured":"Escorcia, V., Caba Heilbron, F., Niebles, J.C., Ghanem, B.: DAPs: deep action proposals for action understanding. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016, Part III. LNCS, vol. 9907, pp. 768\u2013784. Springer, Cham (2016). \n                      https:\/\/doi.org\/10.1007\/978-3-319-46487-9_47"},{"key":"36_CR7","unstructured":"Gygli, M.: Ridiculously fast shot boundary detection with fully convolutional neural networks (2017). arXiv preprint: \n                      arXiv:1705.08214"},{"key":"36_CR8","unstructured":"Hara, K., Kataoka, H., Satoh, Y.: Can spatiotemporal 3D CNNs retrace the history of 2D CNNs and ImageNet? (2017). arXiv preprint: \n                      arXiv:1711.09577"},{"key":"36_CR9","unstructured":"Hassanien, A., Elgharib, M., Selim, A., Hefeeda, M., Matusik, W.: Large-scale, fast and accurate shot boundary detection through spatio-temporal convolutional neural networks (2017). arXiv preprint: \n                      arXiv:1705.03281"},{"key":"36_CR10","unstructured":"Huang, Q., Xiong, Y., Xiong, Y., Zhang, Y., Lin, D.: From trailers to storylines: an efficient way to learn from movies (2018). arXiv preprint: \n                      arXiv:1806.05341"},{"key":"36_CR11","unstructured":"Iandola, F.N., Han, S., Moskewicz, M.W., Ashraf, K., Dally, W.J., Keutzer, K.: SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and \n                      \n                        \n                      \n                      $$<$$\n                    0.5 MB model size (2016). arXiv preprint: \n                      arXiv:1602.07360"},{"key":"36_CR12","unstructured":"Kawai, Y., Sumiyoshi, H., Yagi, N.: Shot boundary detection at TRECVID 2007. In: TRECVID (2007)"},{"key":"36_CR13","unstructured":"Kay, W., et al.: The kinetics human action video dataset (2017). arXiv preprint: \n                      arXiv:1705.06950"},{"key":"36_CR14","doi-asserted-by":"crossref","unstructured":"Lin, T., Zhao, X., Shou, Z.: Single shot temporal action detection. In: Proceedings of the 2017 ACM on Multimedia Conference, pp. 988\u2013996. ACM (2017)","DOI":"10.1145\/3123266.3123343"},{"key":"36_CR15","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1007\/978-3-319-46448-0_2","volume-title":"Computer Vision \u2013 ECCV 2016","author":"W Liu","year":"2016","unstructured":"Liu, W., et al.: SSD: single shot MultiBox detector. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016, Part I. LNCS, vol. 9905, pp. 21\u201337. Springer, Cham (2016). \n                      https:\/\/doi.org\/10.1007\/978-3-319-46448-0_2"},{"key":"36_CR16","unstructured":"Liu, Z., Gibbon, D., Zavesky, E., Shahraray, B., Haffner, P.: At&t research at TRECVID 2007. In: Proceedings of TRECVID Workshop, pp. 19\u201326 (2007)"},{"issue":"12","key":"36_CR17","doi-asserted-by":"publisher","first-page":"5136","DOI":"10.1109\/TIP.2013.2282081","volume":"22","author":"ZM Lu","year":"2013","unstructured":"Lu, Z.M., Shi, Y.: Fast video shot boundary detection based on svd and pattern matching. IEEE Trans. Image Process. 22(12), 5136\u20135145 (2013)","journal-title":"IEEE Trans. Image Process."},{"key":"36_CR18","unstructured":"M\u00fchling, M., Ewerth, R., Stadelmann, T., Z\u00f6fel, C., Shi, B., Freisleben, B.: University of Marburg at TRECVID 2007: shot boundary detection and high level feature extraction. In: TRECVID (2007)"},{"key":"36_CR19","doi-asserted-by":"crossref","unstructured":"Qiu, Z., Yao, T., Mei, T.: Learning spatio-temporal representation with pseudo-3D residual networks. In: 2017 IEEE International Conference on Computer Vision (ICCV), pp. 5534\u20135542. IEEE (2017)","DOI":"10.1109\/ICCV.2017.590"},{"key":"36_CR20","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. In: Advances in Neural Information Processing Systems (NIPS) (2015)"},{"key":"36_CR21","doi-asserted-by":"crossref","unstructured":"Song, Y., Redi, M., Vallmitjana, J., Jaimes, A.: To click or not to click: automatic selection of beautiful thumbnails from videos. In: Proceedings of the 25th ACM International on Conference on Information and Knowledge Management, pp. 659\u2013668. ACM (2016)","DOI":"10.1145\/2983323.2983349"},{"key":"36_CR22","unstructured":"Wang, J., et al.: Learning fine-grained image similarity with deep ranking (2014). arXiv preprint: \n                      arXiv:1404.4661"},{"key":"36_CR23","doi-asserted-by":"crossref","unstructured":"Wang, L., Xiong, Y., Lin, D., Van Gool, L.: UntrimmedNets for weakly supervised action recognition and detection. In: IEEE Conference on Computer Vision and Pattern Recognition, vol. 2 (2017)","DOI":"10.1109\/CVPR.2017.678"},{"key":"36_CR24","doi-asserted-by":"crossref","unstructured":"Xu, H., Das, A., Saenko, K.: R-C3D: region convolutional 3D network for temporal activity detection. In: The IEEE International Conference on Computer Vision (ICCV), vol. 6, p. 8 (2017)","DOI":"10.1109\/ICCV.2017.617"},{"key":"36_CR25","doi-asserted-by":"crossref","unstructured":"Yuan, J., Li, J., Lin, F., Zhang, B.: A unified shot boundary detection framework based on graph partition model. In: Proceedings of the 13th Annual ACM International Conference on Multimedia, pp. 539\u2013542. ACM (2005)","DOI":"10.1145\/1101149.1101271"},{"issue":"2","key":"36_CR26","doi-asserted-by":"publisher","first-page":"168","DOI":"10.1109\/TCSVT.2006.888023","volume":"17","author":"J Yuan","year":"2007","unstructured":"Yuan, J., et al.: A formal study of shot boundary detection. IEEE Trans. Circ. Syst. Video Technol. 17(2), 168\u2013186 (2007)","journal-title":"IEEE Trans. Circ. Syst. Video Technol."},{"key":"36_CR27","doi-asserted-by":"crossref","unstructured":"Yusoff, Y., Christmas, W.J., Kittler, J.: Video shot cut detection using adaptive thresholding. In: BMVC, pp. 1\u201310 (2000)","DOI":"10.5244\/C.14.37"},{"key":"36_CR28","doi-asserted-by":"crossref","unstructured":"Zagoruyko, S., Komodakis, N.: Learning to compare image patches via convolutional neural networks. In: 2015 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 4353\u20134361. IEEE (2015)","DOI":"10.1109\/CVPR.2015.7299064"},{"key":"36_CR29","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"766","DOI":"10.1007\/978-3-319-46478-7_47","volume-title":"Computer Vision \u2013 ECCV 2016","author":"K Zhang","year":"2016","unstructured":"Zhang, K., Chao, W.-L., Sha, F., Grauman, K.: Video summarization with long short-term memory. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016, Part VII. LNCS, vol. 9911, pp. 766\u2013782. Springer, Cham (2016). \n                      https:\/\/doi.org\/10.1007\/978-3-319-46478-7_47"},{"key":"36_CR30","doi-asserted-by":"crossref","unstructured":"Zhao, Y., Xiong, Y., Wang, L., Wu, Z., Tang, X., Lin, D.: Temporal action detection with structured segment networks. In: The IEEE International Conference on Computer Vision (ICCV), vol. 8 (2017)","DOI":"10.1109\/ICCV.2017.317"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ACCV 2018"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-20887-5_36","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,27]],"date-time":"2019-05-27T12:23:54Z","timestamp":1558959834000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-20887-5_36"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9783030208868","9783030208875"],"references-count":30,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-20887-5_36","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"28 May 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ACCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Asian Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Perth, WA","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Australia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 December 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6 December 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"accv2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/accv2018.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"Microsoft CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"979","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"274","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"28% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"2.7","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}}]}}