{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,17]],"date-time":"2026-01-17T19:41:47Z","timestamp":1768678907831,"version":"3.49.0"},"publisher-location":"Cham","reference-count":60,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031730207","type":"print"},{"value":"9783031730214","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,21]],"date-time":"2024-11-21T00:00:00Z","timestamp":1732147200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,21]],"date-time":"2024-11-21T00:00:00Z","timestamp":1732147200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73021-4_18","type":"book-chapter","created":{"date-parts":[[2024,11,20]],"date-time":"2024-11-20T09:19:25Z","timestamp":1732094365000},"page":"300-317","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["Contextual Correspondence Matters: Bidirectional Graph Matching for\u00a0Video Summarization"],"prefix":"10.1007","author":[{"given":"Yunzuo","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Yameng","family":"Liu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,21]]},"reference":[{"key":"18_CR1","doi-asserted-by":"crossref","unstructured":"Apostolidis, E., Balaouras, G., Mezaris, V., Patras, I.: Combining global and local attention with positional encoding for video summarization. In: 2021 IEEE International Symposium on Multimedia, pp. 226\u2013234 (2021)","DOI":"10.1109\/ISM52913.2021.00045"},{"issue":"6","key":"18_CR2","doi-asserted-by":"publisher","first-page":"1048","DOI":"10.1109\/TPAMI.2009.28","volume":"31","author":"TS Caetano","year":"2009","unstructured":"Caetano, T.S., McAuley, J.J., Cheng, L., Le, Q.V., Smola, A.J.: Learning graph matching. IEEE Trans. Pattern Anal. Mach. Intell. 31(6), 1048\u20131058 (2009)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"18_CR3","doi-asserted-by":"crossref","unstructured":"Cai, S., Zuo, W., Davis, L.S., Zhang, L.: Weakly-supervised video summarization using variational encoder-decoder and web prior. In: Proceedings of the European Conference on Computer Vision, pp. 184\u2013200 (2018)","DOI":"10.1007\/978-3-030-01264-9_12"},{"issue":"4","key":"18_CR4","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3499027","volume":"18","author":"Y Cheng","year":"2022","unstructured":"Cheng, Y., Zhu, X., Qian, J., Wen, F., Liu, P.: Cross-modal graph matching network for image-text retrieval. ACM Trans. Multimed. Comput. Commun. Appl. 18(4), 1\u201323 (2022)","journal-title":"ACM Trans. Multimed. Comput. Commun. Appl."},{"key":"18_CR5","doi-asserted-by":"crossref","unstructured":"Chu, W.S., Song, Y., Jaimes, A.: Video co-summarization: video summarization by visual co-occurrence. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3584\u20133592 (2015)","DOI":"10.1109\/CVPR.2015.7298981"},{"key":"18_CR6","doi-asserted-by":"crossref","unstructured":"De\u00a0Avila, S.E.F., Lopes, A.P.B., da\u00a0Luz, A., Jr., de\u00a0Albuquerque\u00a0Ara\u00fajo, A.: VSUMM: a mechanism designed to produce static video summaries and a novel evaluation method. Pattern Recogn. Lett. 32(1), 56\u201368 (2011)","DOI":"10.1016\/j.patrec.2010.08.004"},{"key":"18_CR7","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: ImageNet: a large-scale hierarchical image database. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 248\u2013255 (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"18_CR8","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1016\/j.patrec.2020.12.016","volume":"143","author":"H Fu","year":"2021","unstructured":"Fu, H., Wang, H.: Self-attention binary neural tree for video summarization. Pattern Recogn. Lett. 143, 19\u201326 (2021)","journal-title":"Pattern Recogn. Lett."},{"key":"18_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"505","DOI":"10.1007\/978-3-319-10584-0_33","volume-title":"Computer Vision \u2013 ECCV 2014","author":"M Gygli","year":"2014","unstructured":"Gygli, M., Grabner, H., Riemenschneider, H., Van Gool, L.: Creating summaries from user videos. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8695, pp. 505\u2013520. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10584-0_33"},{"key":"18_CR10","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"18_CR11","doi-asserted-by":"crossref","unstructured":"He, X., et al.: Unsupervised video summarization with attentive conditional generative adversarial networks. In: Proceedings of the ACM International Conference on Multimedia, pp. 2296\u20132304 (2019)","DOI":"10.1145\/3343031.3351056"},{"issue":"8","key":"18_CR12","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"issue":"2","key":"18_CR13","doi-asserted-by":"publisher","first-page":"577","DOI":"10.1109\/TCSVT.2019.2890899","volume":"30","author":"C Huang","year":"2020","unstructured":"Huang, C., Wang, H.: A novel key-frames selection framework for comprehensive video summarization. IEEE Trans. Circuits Syst. Video Technol. 30(2), 577\u2013589 (2020)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"issue":"6","key":"18_CR14","doi-asserted-by":"publisher","first-page":"2654","DOI":"10.1109\/TIP.2018.2889265","volume":"28","author":"S Huang","year":"2018","unstructured":"Huang, S., Li, X., Zhang, Z., Wu, F., Han, J.: User-ranking video summarization with multi-stage spatio-temporal representation. IEEE Trans. Image Process. 28(6), 2654\u20132664 (2018)","journal-title":"IEEE Trans. Image Process."},{"key":"18_CR15","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2020.107567","volume":"109","author":"T Hussain","year":"2021","unstructured":"Hussain, T., Muhammad, K., Ding, W., Lloret, J., Baik, S.W., de Albuquerque, V.H.C.: A comprehensive survey of multi-view video summarization. Pattern Recogn. 109, 107567 (2021)","journal-title":"Pattern Recogn."},{"issue":"6","key":"18_CR16","doi-asserted-by":"publisher","first-page":"1709","DOI":"10.1109\/TCSVT.2019.2904996","volume":"30","author":"Z Ji","year":"2020","unstructured":"Ji, Z., Xiong, K., Pang, Y., Li, X.: Video summarization with attention-based encoder-decoder networks. IEEE Trans. Circuits Syst. Video Technol. 30(6), 1709\u20131717 (2020)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"18_CR17","doi-asserted-by":"crossref","unstructured":"Jung, Y., Cho, D., Kim, D., Woo, S., Kweon, I.S.: Discriminative feature learning for unsupervised video summarization. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a033, pp. 8537\u20138544 (2019)","DOI":"10.1609\/aaai.v33i01.33018537"},{"key":"18_CR18","doi-asserted-by":"crossref","unstructured":"Li, H., Ke, Q., Gong, M., Drummond, T.: Progressive video summarization via multimodal self-supervised learning. In: Proceedings of the IEEE Winter Conference on Applications of Computer Vision, pp. 5584\u20135593 (2023)","DOI":"10.1109\/WACV56688.2023.00554"},{"issue":"3","key":"18_CR19","first-page":"3904","volume":"45","author":"H Li","year":"2023","unstructured":"Li, H., Ke, Q., Gong, M., Zhang, R.: Video joint modelling based on hierarchical transformer for co-summarization. IEEE Trans. Pattern Anal. Mach. Intell. 45(3), 3904\u20133917 (2023)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"18_CR20","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2020.107677","volume":"111","author":"P Li","year":"2021","unstructured":"Li, P., Ye, Q., Zhang, L., Yuan, L., Xu, X., Shao, L.: Exploring global diverse attention via pairwise temporal relation for video summarization. Pattern Recogn. 111, 107677 (2021)","journal-title":"Pattern Recogn."},{"key":"18_CR21","doi-asserted-by":"crossref","unstructured":"Li, W., Liu, X., Yuan, Y.: Sigma: semantic-complete graph matching for domain adaptive object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5291\u20135300 (2022)","DOI":"10.1109\/CVPR52688.2022.00522"},{"issue":"8","key":"18_CR22","doi-asserted-by":"publisher","first-page":"3652","DOI":"10.1109\/TIP.2017.2695887","volume":"26","author":"X Li","year":"2017","unstructured":"Li, X., Zhao, B., Lu, X.: A general framework for edited video and raw video summarization. IEEE Trans. Image Process. 26(8), 3652\u20133664 (2017)","journal-title":"IEEE Trans. Image Process."},{"key":"18_CR23","doi-asserted-by":"crossref","unstructured":"Li, Y., Merialdo, B.: Multi-video summarization based on video-MMR. In: 11th International Workshop on Image Analysis for Multimedia Interactive Services WIAMIS 2010, pp.\u00a01\u20134 (2010)","DOI":"10.1109\/CBMI.2010.5529899"},{"key":"18_CR24","unstructured":"Li, Y., Gu, C., Dullien, T., Vinyals, O., Kohli, P.: Graph matching networks for learning the similarity of graph structured objects. In: International Conference on Machine Learning, pp. 3835\u20133845 (2019)"},{"issue":"4","key":"18_CR25","doi-asserted-by":"publisher","first-page":"1507","DOI":"10.1109\/TCSVT.2022.3214538","volume":"33","author":"M Liu","year":"2022","unstructured":"Liu, M., Jin, S., Yao, C., Lin, C., Zhao, Y.: Temporal consistency learning of inter-frames for video super-resolution. IEEE Trans. Circuits Syst. Video Technol. 33(4), 1507\u20131520 (2022)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"18_CR26","doi-asserted-by":"publisher","first-page":"1573","DOI":"10.1109\/TIP.2022.3143699","volume":"31","author":"T Liu","year":"2022","unstructured":"Liu, T., Meng, Q., Huang, J.J., Vlontzos, A., Rueckert, D., Kainz, B.: Video summarization through reinforcement learning with a 3D spatio-temporal U-net. IEEE Trans. Image Process. 31, 1573\u20131586 (2022)","journal-title":"IEEE Trans. Image Process."},{"key":"18_CR27","doi-asserted-by":"crossref","unstructured":"Liu, Y., Li, S., Wu, Y., Chen, C.W., Shan, Y., Qie, X.: UMT: unified multi-modal transformers for joint video moment retrieval and highlight detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3042\u20133051 (2022)","DOI":"10.1109\/CVPR52688.2022.00305"},{"key":"18_CR28","doi-asserted-by":"publisher","first-page":"319","DOI":"10.1016\/j.ins.2017.12.020","volume":"432","author":"I Mademlis","year":"2018","unstructured":"Mademlis, I., Tefas, A., Pitas, I.: A salient dictionary learning framework for activity video summarization via key-frame extraction. Inf. Sci. 432, 319\u2013331 (2018)","journal-title":"Inf. Sci."},{"key":"18_CR29","doi-asserted-by":"crossref","unstructured":"Mahasseni, B., Lam, M., Todorovic, S.: Unsupervised video summarization with adversarial LSTM networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 202\u2013211 (2017)","DOI":"10.1109\/CVPR.2017.318"},{"key":"18_CR30","doi-asserted-by":"crossref","unstructured":"Meng, J., Wang, S., Wang, H., Yuan, J., Tan, Y.P.: Video summarization via multi-view representative selection. In: Proceedings of the IEEE International Conference on Computer Vision Workshops, pp. 1189\u20131198 (2017)","DOI":"10.1109\/ICCVW.2017.144"},{"key":"18_CR31","doi-asserted-by":"crossref","unstructured":"Moon, W., Hyun, S., Park, S., Park, D., Heo, J.P.: Query-dependent video representation for moment retrieval and highlight detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 23023\u201323033 (2023)","DOI":"10.1109\/CVPR52729.2023.02205"},{"key":"18_CR32","doi-asserted-by":"crossref","unstructured":"Otani, M., Nakashima, Y., Rahtu, E., Heikkila, J.: Rethinking the evaluation of video summaries. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7596\u20137604 (2019)","DOI":"10.1109\/CVPR.2019.00778"},{"key":"18_CR33","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"647","DOI":"10.1007\/978-3-030-58595-2_39","volume-title":"Computer Vision \u2013 ECCV 2020","author":"J Park","year":"2020","unstructured":"Park, J., Lee, J., Kim, I.-J., Sohn, K.: SumGraph: video summarization via recursive graph modeling. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12370, pp. 647\u2013663. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58595-2_39"},{"key":"18_CR34","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"540","DOI":"10.1007\/978-3-319-10599-4_35","volume-title":"Computer Vision \u2013 ECCV 2014","author":"D Potapov","year":"2014","unstructured":"Potapov, D., Douze, M., Harchaoui, Z., Schmid, C.: Category-specific video summarization. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8694, pp. 540\u2013555. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10599-4_35"},{"key":"18_CR35","doi-asserted-by":"crossref","unstructured":"Rochan, M., Ye, L., Wang, Y.: Video summarization using fully convolutional sequence networks. In: Proceedings of the European Conference on Computer Vision, pp. 347\u2013363 (2018)","DOI":"10.1007\/978-3-030-01258-8_22"},{"issue":"1","key":"18_CR36","doi-asserted-by":"publisher","first-page":"61","DOI":"10.1109\/TNN.2008.2005605","volume":"20","author":"F Scarselli","year":"2008","unstructured":"Scarselli, F., Gori, M., Tsoi, A.C., Hagenbuchner, M., Monfardini, G.: The graph neural network model. IEEE Trans. Neural Networks 20(1), 61\u201380 (2008)","journal-title":"IEEE Trans. Neural Networks"},{"key":"18_CR37","doi-asserted-by":"crossref","unstructured":"Soldan, M., Xu, M., Qu, S., Tegner, J., Ghanem, B.: VLG-net: video-language graph matching network for video grounding. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 3224\u20133234 (2021)","DOI":"10.1109\/ICCVW54120.2021.00361"},{"key":"18_CR38","doi-asserted-by":"crossref","unstructured":"Song, Y., Vallmitjana, J., Stent, A., Jaimes, A.: TVSUM: summarizing web videos using titles. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5179\u20135187 (2015)","DOI":"10.1109\/CVPR.2015.7299154"},{"key":"18_CR39","doi-asserted-by":"crossref","unstructured":"Szegedy, C., et al.: Going deeper with convolutions. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (2015)","DOI":"10.1109\/CVPR.2015.7298594"},{"issue":"5","key":"18_CR40","doi-asserted-by":"publisher","first-page":"1423","DOI":"10.1109\/TCSVT.2018.2830102","volume":"29","author":"Z Tu","year":"2019","unstructured":"Tu, Z., Xie, W., Dauwels, J., Li, B., Yuan, J.: Semantic cues enhanced multimodality multistream CNN for action recognition. IEEE Trans. Circuits Syst. Video Technol. 29(5), 1423\u20131437 (2019)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"18_CR41","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, vol.\u00a030 (2017)"},{"key":"18_CR42","doi-asserted-by":"crossref","unstructured":"Wang, S., Wang, R., Yao, Z., Shan, S., Chen, X.: Cross-modal scene graph matching for relationship-aware image-text retrieval. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 1508\u20131517 (2020)","DOI":"10.1109\/WACV45572.2020.9093614"},{"issue":"5","key":"18_CR43","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3326362","volume":"38","author":"Y Wang","year":"2019","unstructured":"Wang, Y., Sun, Y., Liu, Z., Sarma, S.E., Bronstein, M.M., Solomon, J.M.: Dynamic graph CNN for learning on point clouds. ACM Trans. Graph. 38(5), 1\u201312 (2019)","journal-title":"ACM Trans. Graph."},{"key":"18_CR44","unstructured":"Wu, F., Fan, A., Baevski, A., Dauphin, Y.N., Auli, M.: Pay less attention with lightweight and dynamic convolutions. arXiv preprint arXiv:1901.10430 (2019)"},{"key":"18_CR45","doi-asserted-by":"crossref","unstructured":"Xu, M., Wang, H., Ni, B., Zhu, R., Sun, Z., Wang, C.: Cross-category video highlight detection via set-based learning. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7970\u20137979 (2021)","DOI":"10.1109\/ICCV48922.2021.00787"},{"key":"18_CR46","doi-asserted-by":"crossref","unstructured":"Zhang, K., Chao, W.L., Sha, F., Grauman, K.: Summary transfer: exemplar-based subset selection for video summarization. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1059\u20131067 (2016)","DOI":"10.1109\/CVPR.2016.120"},{"key":"18_CR47","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"766","DOI":"10.1007\/978-3-319-46478-7_47","volume-title":"Computer Vision \u2013 ECCV 2016","author":"K Zhang","year":"2016","unstructured":"Zhang, K., Chao, W.-L., Sha, F., Grauman, K.: Video summarization with long short-term memory. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9911, pp. 766\u2013782. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46478-7_47"},{"key":"18_CR48","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Liu, Y., Kang, W., Zheng, Y.: Mar-net: motion-assisted reconstruction network for unsupervised video summarization. IEEE Signal Process. Lett. (2023)","DOI":"10.1109\/LSP.2023.3313091"},{"key":"18_CR49","doi-asserted-by":"publisher","first-page":"2587","DOI":"10.1109\/LSP.2022.3227525","volume":"29","author":"Y Zhang","year":"2022","unstructured":"Zhang, Y., Liu, Y., Zhu, P., Kang, W.: Joint reinforcement and contrastive learning for unsupervised video summarization. IEEE Signal Process. Lett. 29, 2587\u20132591 (2022)","journal-title":"IEEE Signal Process. Lett."},{"key":"18_CR50","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Song, Z., Li, W.: Enhancement multi-module network for few-shot leaky cable fixture detection in railway tunnel. Signal Process. Image Commun. 116943 (2023)","DOI":"10.1016\/j.image.2023.116943"},{"issue":"6","key":"18_CR51","doi-asserted-by":"publisher","first-page":"1340","DOI":"10.1109\/TCSVT.2016.2539638","volume":"27","author":"Y Zhang","year":"2017","unstructured":"Zhang, Y., Tao, R., Wang, Y.: Motion-state-adaptive video summarization via spatiotemporal analysis. IEEE Trans. Circuits Syst. Video Technol. 27(6), 1340\u20131352 (2017)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"18_CR52","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2022.119467","volume":"216","author":"Y Zhang","year":"2023","unstructured":"Zhang, Y., Zhang, J., Liu, R., Zhu, P., Liu, Y.: Key frame extraction based on quaternion Fourier transform with multiple features fusion. Expert Syst. Appl. 216, 119467 (2023)","journal-title":"Expert Syst. Appl."},{"issue":"8","key":"18_CR53","doi-asserted-by":"publisher","first-page":"5181","DOI":"10.1109\/TNNLS.2021.3119969","volume":"34","author":"B Zhao","year":"2023","unstructured":"Zhao, B., Gong, M., Li, X.: Audiovisual video summarization. IEEE Trans. Neural Netw. Learn. Syst. 34(8), 5181\u20135188 (2023)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"issue":"5","key":"18_CR54","first-page":"2793","volume":"44","author":"B Zhao","year":"2022","unstructured":"Zhao, B., Li, H., Lu, X., Li, X.: Reconstructive sequence-graph network for video summarization. IEEE Trans. Pattern Anal. Mach. Intell. 44(5), 2793\u20132801 (2022)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"18_CR55","doi-asserted-by":"crossref","unstructured":"Zhao, B., Li, X., Lu, X.: Hierarchical recurrent neural network for video summarization. In: Proceedings of the ACM International Conference on Multimedia, pp. 863\u2013871 (2017)","DOI":"10.1145\/3123266.3123328"},{"key":"18_CR56","doi-asserted-by":"publisher","first-page":"360","DOI":"10.1016\/j.neucom.2021.10.039","volume":"468","author":"B Zhao","year":"2022","unstructured":"Zhao, B., Gong, M., Li, X.: Hierarchical multimodal transformer to summarize videos. Neurocomputing 468, 360\u2013369 (2022)","journal-title":"Neurocomputing"},{"key":"18_CR57","doi-asserted-by":"crossref","unstructured":"Zhao, B., Li, X., Lu, X.: HSA-RNN: hierarchical structure-adaptive RNN for video summarization. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7405\u20137414 (2018)","DOI":"10.1109\/CVPR.2018.00773"},{"key":"18_CR58","doi-asserted-by":"crossref","unstructured":"Zhou, K., Qiao, Y., Xiang, T.: Deep reinforcement learning for unsupervised video summarization with diversity-representativeness reward. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a032 (2018)","DOI":"10.1609\/aaai.v32i1.12255"},{"key":"18_CR59","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2021.108312","volume":"122","author":"W Zhu","year":"2022","unstructured":"Zhu, W., Lu, J., Han, Y., Zhou, J.: Learning multiscale hierarchical attention for video summarization. Pattern Recogn. 122, 108312 (2022)","journal-title":"Pattern Recogn."},{"key":"18_CR60","doi-asserted-by":"publisher","first-page":"948","DOI":"10.1109\/TIP.2020.3039886","volume":"30","author":"W Zhu","year":"2020","unstructured":"Zhu, W., Lu, J., Li, J., Zhou, J.: DSNet: a flexible detect-to-summarize network for video summarization. IEEE Trans. Image Process. 30, 948\u2013962 (2020)","journal-title":"IEEE Trans. Image Process."}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73021-4_18","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,20]],"date-time":"2024-11-20T09:46:33Z","timestamp":1732095993000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73021-4_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,21]]},"ISBN":["9783031730207","9783031730214"],"references-count":60,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73021-4_18","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,21]]},"assertion":[{"value":"21 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}