{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T03:45:20Z","timestamp":1743047120208,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":25,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819985364"},{"type":"electronic","value":"9789819985371"}],"license":[{"start":{"date-parts":[[2023,12,26]],"date-time":"2023-12-26T00:00:00Z","timestamp":1703548800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,12,26]],"date-time":"2023-12-26T00:00:00Z","timestamp":1703548800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-99-8537-1_13","type":"book-chapter","created":{"date-parts":[[2023,12,25]],"date-time":"2023-12-25T19:02:17Z","timestamp":1703530937000},"page":"158-169","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Multimodal Local Feature Enhancement Network for\u00a0Video Summarization"],"prefix":"10.1007","author":[{"given":"Zhaoyun","family":"Li","sequence":"first","affiliation":[]},{"given":"Xiwei","family":"Ren","sequence":"additional","affiliation":[]},{"given":"Fengyi","family":"Du","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,12,26]]},"reference":[{"key":"13_CR1","unstructured":"Baevski, A., Zhou, Y., Mohamed, A., Auli, M.: wav2vec 2.0: a framework for self-supervised learning of speech representations. Adv. Neural Inf. Process. Syst. 33, 12449\u201312460 (2020)"},{"key":"13_CR2","doi-asserted-by":"crossref","unstructured":"De\u00a0Avila, S.E.F., Lopes, A.P.B., da\u00a0Luz\u00a0Jr, A., de\u00a0Albuquerque\u00a0Ara\u00fajo, A.: VSUMM: a mechanism designed to produce static video summaries and a novel evaluation method. Pattern Recogn. Lett. 32(1), 56\u201368 (2011)","DOI":"10.1016\/j.patrec.2010.08.004"},{"key":"13_CR3","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1007\/978-3-030-21074-8_4","volume-title":"Computer Vision \u2013 ACCV 2018 Workshops","author":"J Fajtl","year":"2019","unstructured":"Fajtl, J., Sokeh, H.S., Argyriou, V., Monekosso, D., Remagnino, P.: Summarizing videos with attention. In: Carneiro, G., You, S. (eds.) ACCV 2018. LNCS, vol. 11367, pp. 39\u201354. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-21074-8_4"},{"key":"13_CR4","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"505","DOI":"10.1007\/978-3-319-10584-0_33","volume-title":"Computer Vision \u2013 ECCV 2014","author":"M Gygli","year":"2014","unstructured":"Gygli, M., Grabner, H., Riemenschneider, H., Van Gool, L.: Creating summaries from user videos. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8695, pp. 505\u2013520. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10584-0_33"},{"key":"13_CR5","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"270","DOI":"10.1007\/978-3-031-18913-5_21","volume-title":"Pattern Recognition and Computer Vision","author":"P Ji","year":"2022","unstructured":"Ji, P., Yang, B., Zhang, T., Zou, Y.: Consensus-guided keyword targeting for video captioning. In: Yu, S., et al. (eds.) PRCV 2022, Part III. LNCS, vol. 13536, pp. 270\u2013281. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-18913-5_21"},{"key":"13_CR6","doi-asserted-by":"crossref","unstructured":"Li, H., Ke, Q., Gong, M., Drummond, T.: Progressive video summarization via multimodal self-supervised learning. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 5584\u20135593 (2023)","DOI":"10.1109\/WACV56688.2023.00554"},{"issue":"8","key":"13_CR7","doi-asserted-by":"publisher","first-page":"3652","DOI":"10.1109\/TIP.2017.2695887","volume":"26","author":"X Li","year":"2017","unstructured":"Li, X., Zhao, B., Lu, X.: A general framework for edited video and raw video summarization. IEEE Trans. Image Process. 26(8), 3652\u20133664 (2017)","journal-title":"IEEE Trans. Image Process."},{"issue":"6","key":"13_CR8","doi-asserted-by":"publisher","first-page":"1923","DOI":"10.1109\/TCYB.2017.2718579","volume":"48","author":"X Li","year":"2017","unstructured":"Li, X., Zhao, B., Lu, X.: Key frame extraction in the summary space. IEEE Trans. Cybern. 48(6), 1923\u20131934 (2017)","journal-title":"IEEE Trans. Cybern."},{"key":"13_CR9","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2022.108840","volume":"131","author":"G Liang","year":"2022","unstructured":"Liang, G., Lv, Y., Li, S., Zhang, S., Zhang, Y.: Video summarization with a convolutional attentive adversarial network. Pattern Recogn. 131, 108840 (2022)","journal-title":"Pattern Recogn."},{"key":"13_CR10","first-page":"13988","volume":"34","author":"M Narasimhan","year":"2021","unstructured":"Narasimhan, M., Rohrbach, A., Darrell, T.: Clip-it! language-guided video summarization. Adv. Neural. Inf. Process. Syst. 34, 13988\u201314000 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"13_CR11","doi-asserted-by":"crossref","unstructured":"Otani, M., Nakashima, Y., Rahtu, E., Heikkila, J.: Rethinking the evaluation of video summaries. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7596\u20137604 (2019)","DOI":"10.1109\/CVPR.2019.00778"},{"key":"13_CR12","doi-asserted-by":"crossref","unstructured":"Panayotov, V., Chen, G., Povey, D., Khudanpur, S.: LibriSpeech: an ASR corpus based on public domain audio books. In: 2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 5206\u20135210. IEEE (2015)","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"13_CR13","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"540","DOI":"10.1007\/978-3-319-10599-4_35","volume-title":"Computer Vision \u2013 ECCV 2014","author":"D Potapov","year":"2014","unstructured":"Potapov, D., Douze, M., Harchaoui, Z., Schmid, C.: Category-specific video summarization. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8694, pp. 540\u2013555. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10599-4_35"},{"key":"13_CR14","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O.: ImageNet large scale visual recognition challenge. Int. J. Comput. Vision 115, 211\u2013252 (2015)","journal-title":"Int. J. Comput. Vision"},{"key":"13_CR15","doi-asserted-by":"crossref","unstructured":"Song, Y., Vallmitjana, J., Stent, A., Jaimes, A.: TVSum: summarizing web videos using titles. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5179\u20135187 (2015)","DOI":"10.1109\/CVPR.2015.7299154"},{"key":"13_CR16","doi-asserted-by":"crossref","unstructured":"Szegedy, C., et al.: Going deeper with convolutions. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp.\u00a01\u20139 (2015)","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"13_CR17","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"13_CR18","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.artmed.2015.08.006","volume":"66","author":"S Wang","year":"2016","unstructured":"Wang, S., et al.: Scalable gastroscopic video summarization via similar-inhibition dictionary selection. Artif. Intell. Med. 66, 1\u201313 (2016)","journal-title":"Artif. Intell. Med."},{"key":"13_CR19","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"766","DOI":"10.1007\/978-3-319-46478-7_47","volume-title":"Computer Vision \u2013 ECCV 2016","author":"K Zhang","year":"2016","unstructured":"Zhang, K., Chao, W.-L., Sha, F., Grauman, K.: Video summarization with long short-term memory. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9911, pp. 766\u2013782. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46478-7_47"},{"key":"13_CR20","unstructured":"Zhao, B., Gong, M., Li, X.: Audiovisual video summarization. IEEE Trans. Neural Netw. Learn. Syst. (2021)"},{"key":"13_CR21","doi-asserted-by":"publisher","first-page":"360","DOI":"10.1016\/j.neucom.2021.10.039","volume":"468","author":"B Zhao","year":"2022","unstructured":"Zhao, B., Gong, M., Li, X.: Hierarchical multimodal transformer to summarize videos. Neurocomputing 468, 360\u2013369 (2022)","journal-title":"Neurocomputing"},{"key":"13_CR22","doi-asserted-by":"crossref","unstructured":"Zhao, B., Li, X., Lu, X.: Hierarchical recurrent neural network for video summarization. In: Proceedings of the 25th ACM International Conference on Multimedia, pp. 863\u2013871 (2017)","DOI":"10.1145\/3123266.3123328"},{"key":"13_CR23","doi-asserted-by":"crossref","unstructured":"Zhao, B., Li, X., Lu, X.: HSA-RNN: hierarchical structure-adaptive RNN for video summarization. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7405\u20137414 (2018)","DOI":"10.1109\/CVPR.2018.00773"},{"key":"13_CR24","doi-asserted-by":"crossref","unstructured":"Zhou, K., Qiao, Y., Xiang, T.: Deep reinforcement learning for unsupervised video summarization with diversity-representativeness reward. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a032 (2018)","DOI":"10.1609\/aaai.v32i1.12255"},{"key":"13_CR25","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2021.108312","volume":"122","author":"W Zhu","year":"2022","unstructured":"Zhu, W., Lu, J., Han, Y., Zhou, J.: Learning multiscale hierarchical attention for video summarization. Pattern Recogn. 122, 108312 (2022)","journal-title":"Pattern Recogn."}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-99-8537-1_13","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,6]],"date-time":"2024-11-06T20:48:47Z","timestamp":1730926127000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-99-8537-1_13"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,26]]},"ISBN":["9789819985364","9789819985371"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-981-99-8537-1_13","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023,12,26]]},"assertion":[{"value":"26 December 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Xiamen","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 October 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/prcv2023.xmu.edu.cn\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Microsoft CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1420","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"532","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"37% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3,78","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3,69","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}