{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,16]],"date-time":"2025-10-16T10:07:25Z","timestamp":1760609245441,"version":"3.37.3"},"reference-count":45,"publisher":"Springer Science and Business Media LLC","issue":"45-46","license":[{"start":{"date-parts":[[2020,3,3]],"date-time":"2020-03-03T00:00:00Z","timestamp":1583193600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,3,3]],"date-time":"2020-03-03T00:00:00Z","timestamp":1583193600000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2020,12]]},"DOI":"10.1007\/s11042-020-08690-3","type":"journal-article","created":{"date-parts":[[2020,3,3]],"date-time":"2020-03-03T15:02:52Z","timestamp":1583247772000},"page":"33875-33890","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":12,"title":["Modality correlation-based video summarization"],"prefix":"10.1007","volume":"79","author":[{"given":"Xingrun","family":"Wang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9644-9723","authenticated-orcid":false,"given":"Xiushan","family":"Nie","sequence":"additional","affiliation":[]},{"given":"Xingbo","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Binze","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Yilong","family":"Yin","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,3,3]]},"reference":[{"key":"8690_CR1","doi-asserted-by":"crossref","unstructured":"Aner A, Kender JR (2002) Video summaries through mosaic-based shot and scene clustering. In: European conference on computer vision. Springer, Berlin, pp 388\u2013402","DOI":"10.1007\/3-540-47979-1_26"},{"key":"8690_CR2","doi-asserted-by":"crossref","unstructured":"Chakraborty S, Tickoo O, Iyer R (2015) Adaptive keyframe selection for video summarization. In: 2015 IEEE winter conference on applications of computer vision (WACV). IEEE, pp 702\u2013709","DOI":"10.1109\/WACV.2015.99"},{"issue":"1","key":"8690_CR3","doi-asserted-by":"publisher","first-page":"66","DOI":"10.1109\/TMM.2011.2166951","volume":"14","author":"Y Cong","year":"2012","unstructured":"Cong Y, Yuan J, Luo J (2012) Towards scalable summarization of consumer videos via sparse dictionary selection. IEEE Trans Multimed 14(1):66\u201375","journal-title":"IEEE Trans Multimed"},{"issue":"1","key":"8690_CR4","doi-asserted-by":"publisher","first-page":"56","DOI":"10.1016\/j.patrec.2010.08.004","volume":"32","author":"SEF De Avila","year":"2011","unstructured":"De Avila SEF, Lopes APB, da Luz A Jr, de Albuquerque Ara\u00fajo A (2011) Vsumm: a mechanism designed to produce static video summaries and a novel evaluation method. Pattern Recogn Lett 32(1):56\u201368","journal-title":"Pattern Recogn Lett"},{"key":"8690_CR5","unstructured":"Gong B, Chao WL, Grauman K, Sha F (2014) Diverse sequential subset selection for supervised video summarization. In: Advances in neural information processing systems, pp 2069\u20132077"},{"issue":"4","key":"8690_CR6","doi-asserted-by":"publisher","first-page":"729","DOI":"10.1109\/TCSVT.2012.2214871","volume":"23","author":"G Guan","year":"2013","unstructured":"Guan G, Wang Z, Lu S, Da Deng J, Feng DD (2013) Keypoint-based keyframe selection. IEEE Trans Circ Sys Video Technol 23(4):729\u2013734","journal-title":"IEEE Trans Circ Sys Video Technol"},{"key":"8690_CR7","doi-asserted-by":"crossref","unstructured":"Gygli M, Grabner H, Riemenschneider H, Van Gool L (2014) Creating summaries from user videos. In: European conference on computer vision. Springer, Berlin, pp 505\u2013520","DOI":"10.1007\/978-3-319-10584-0_33"},{"key":"8690_CR8","doi-asserted-by":"crossref","unstructured":"Hadi Y, Essannouni F, Thami ROH (2006) Video summarization by k-medoid clustering. In: Proceedings of the 2006 ACM symposium on applied computing. ACM, pp 1400\u20131401","DOI":"10.1145\/1141277.1141601"},{"issue":"7","key":"8690_CR9","doi-asserted-by":"publisher","first-page":"2723","DOI":"10.1109\/TIP.2013.2256919","volume":"22","author":"J Han","year":"2013","unstructured":"Han J, Ji X, Hu X, Zhu D, Li K, Jiang X, Cui G, Guo L, Liu T (2013) Representing and retrieving video shots in human-centric brain imaging space. IEEE Trans Image Process 22(7):2723\u20132736","journal-title":"IEEE Trans Image Process"},{"key":"8690_CR10","doi-asserted-by":"publisher","unstructured":"Han Y, Zhu L, Cheng Z, Li J, Liu X (2018) Discrete optimal graph clustering[J]. IEEE Trans Cybern. https:\/\/doi.org\/10.1109\/TCYB.2018.2881539","DOI":"10.1109\/TCYB.2018.2881539"},{"issue":"8","key":"8690_CR11","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter S, Schmidhuber J (1997) Long short-term memory. Neural Comput 9(8):1735\u20131780","journal-title":"Neural Comput"},{"key":"8690_CR12","doi-asserted-by":"crossref","unstructured":"Hong R, Tang J, Tan HK, Yan S, Ngo C, Chua TS (2009) Event driven summarization for web videos. In: Proceedings of the first SIGMM workshop on social media. ACM, pp 43\u201348","DOI":"10.1145\/1631144.1631154"},{"key":"8690_CR13","doi-asserted-by":"crossref","unstructured":"Hu T, Li Z, Su W, Mu X, Tang J (2017) Unsupervised video summaries using multiple features and image quality. In: 2017 IEEE third international conference on multimedia big data (BigMM). IEEE, pp 117\u2013120","DOI":"10.1109\/BigMM.2017.19"},{"key":"8690_CR14","doi-asserted-by":"crossref","unstructured":"Kang HW, Matsushita Y, Tang X, Chen XQ (2006) Space-time video montage. In: 2006 IEEE computer society conference on computer vision and pattern recognition (CVPR\u201906), vol 2. IEEE , pp 1331\u20131338","DOI":"10.1109\/CVPR.2006.284"},{"key":"8690_CR15","doi-asserted-by":"crossref","unstructured":"Khosla A, Hamid R, Lin CJ, Sundaresan N (2013) Large-scale video summarization using web-image priors. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2698\u20132705","DOI":"10.1109\/CVPR.2013.348"},{"key":"8690_CR16","unstructured":"Kiros R, Zhu Y, Salakhutdinov RR, Zemel R, Urtasun R, Torralba A, Fidler S (2015) Skip-thought vectors. In: Advances in neural information processing systems, pp 3294\u20133302"},{"issue":"7","key":"8690_CR17","doi-asserted-by":"publisher","first-page":"1212","DOI":"10.1016\/j.jvcir.2013.08.003","volume":"24","author":"SK Kuanar","year":"2013","unstructured":"Kuanar SK, Panda R, Chowdhury AS (2013) Video key frame extraction through dynamic delaunay clustering with a structural constraint. J Vis Commun Image Represent 24(7):1212\u20131227","journal-title":"J Vis Commun Image Represent"},{"key":"8690_CR18","unstructured":"Lee YJ, Ghosh J, Grauman K (2012) Discovering important people and objects for egocentric video summarization. In: 2012 IEEE conference on computer vision and pattern recognition (CVPR). IEEE, pp 1346\u20131353"},{"issue":"5","key":"8690_CR19","doi-asserted-by":"publisher","first-page":"1381","DOI":"10.1109\/TNNLS.2018.2868854","volume":"30","author":"J Li","year":"2018","unstructured":"Li J, Lu K, Huang Z, Zhu L, Shen HT (2018) Heterogeneous domain adaptation through progressive alignment. IEEE Trans Neural Netw Learning Sys 30 (5):1381\u20131391","journal-title":"IEEE Trans Neural Netw Learning Sys"},{"issue":"6","key":"8690_CR20","doi-asserted-by":"publisher","first-page":"2144","DOI":"10.1109\/TCYB.2018.2820174","volume":"49","author":"J Li","year":"2018","unstructured":"Li J, Lu K, Huang Z, Zhu L, Shen HT (2018) Transfer independently together: a generalized framework for domain adaptation. IEEE Trans Cybern 49 (6):2144\u20132155","journal-title":"IEEE Trans Cybern"},{"issue":"11","key":"8690_CR21","doi-asserted-by":"publisher","first-page":"3516","DOI":"10.1109\/TCYB.2016.2565898","volume":"47","author":"J Li","year":"2016","unstructured":"Li J, Wu Y, Zhao J, Lu K (2016) Low-rank discriminant embedding for multiview learning. IEEE Trans Cybern 47(11):3516\u20133529","journal-title":"IEEE Trans Cybern"},{"issue":"8","key":"8690_CR22","doi-asserted-by":"publisher","first-page":"3652","DOI":"10.1109\/TIP.2017.2695887","volume":"26","author":"X Li","year":"2017","unstructured":"Li X, Zhao B, Lu X (2017) A general framework for edited video and raw video summarization. IEEE Trans Image Process 26(8):3652\u20133664","journal-title":"IEEE Trans Image Process"},{"key":"8690_CR23","doi-asserted-by":"crossref","unstructured":"Lin D, Fidler S, Kong C, Urtasun R (2014) Visual semantic search: retrieving videos via complex textual queries. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2657\u20132664","DOI":"10.1109\/CVPR.2014.340"},{"issue":"12","key":"8690_CR24","doi-asserted-by":"publisher","first-page":"2178","DOI":"10.1109\/TPAMI.2010.31","volume":"32","author":"D Liu","year":"2010","unstructured":"Liu D, Hua G, Chen T (2010) A hierarchical visual model for video object summarization. IEEE Trans Pattern Anal Mach Intell 32(12):2178\u20132190","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"8690_CR25","doi-asserted-by":"crossref","unstructured":"Lu Z, Grauman K (2013) Story-driven summarization for egocentric video. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2714\u20132721","DOI":"10.1109\/CVPR.2013.350"},{"key":"8690_CR26","unstructured":"Ma YF, Lu L, Zhang HJ, Li M (2002) A user attention model for video summarization. In: Proceedings of the tenth ACM international conference on multimedia. ACM, pp 533\u2013542"},{"issue":"1-2","key":"8690_CR27","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1023\/A:1013241718521","volume":"16","author":"J Nam","year":"2002","unstructured":"Nam J, Tewfik AH (2002) Event-driven video abstraction and visualization. Multimed Tools Appl 16(1-2):55\u201377","journal-title":"Multimed Tools Appl"},{"issue":"3","key":"8690_CR28","doi-asserted-by":"publisher","first-page":"301","DOI":"10.1016\/0304-4076(88)90048-6","volume":"38","author":"WK Newey","year":"1988","unstructured":"Newey WK (1988) Adaptive estimation of regression models via moment restrictions. J Econ 38(3):301\u2013339","journal-title":"J Econ"},{"key":"8690_CR29","unstructured":"Ngo CW, Ma YF, Zhang HJ (2003) Automatic video summarization by graph modeling. In: Ninth IEEE international conference on computer vision, 2003. Proceedings. IEEE, pp 104\u2013109"},{"issue":"11","key":"8690_CR30","doi-asserted-by":"publisher","first-page":"5585","DOI":"10.1109\/TIP.2018.2852503","volume":"27","author":"Y Peng","year":"2018","unstructured":"Peng Y, Qi J, Yuan Y (2018) Modality-specific cross-modal similarity measurement with recurrent attention network. IEEE Trans Image Process 27 (11):5585\u20135599","journal-title":"IEEE Trans Image Process"},{"key":"8690_CR31","doi-asserted-by":"crossref","unstructured":"Potapov D, Douze M, Harchaoui Z, Schmid C (2014) Category-specific video summarization. In: European conference on computer vision. Springer, Berlin, pp 540\u2013555","DOI":"10.1007\/978-3-319-10599-4_35"},{"key":"8690_CR32","doi-asserted-by":"crossref","unstructured":"Rush AM, Chopra S, Weston J (2015) A neural attention model for abstractive sentence summarization. arXiv:1509.00685","DOI":"10.18653\/v1\/D15-1044"},{"key":"8690_CR33","unstructured":"Simonyan K, Zisserman A (2014) Very deep convolutional networks for large-scale image recognition. arXiv:1409.1556"},{"issue":"8","key":"8690_CR34","doi-asserted-by":"publisher","first-page":"1997","DOI":"10.1109\/TMM.2013.2271746","volume":"15","author":"J Song","year":"2013","unstructured":"Song J, Yang Y, Huang Z, Shen HT, Luo J (2013) Effective multiple feature hashing for large-scale near-duplicate video retrieval. IEEE Trans Multimed 15 (8):1997\u20132008","journal-title":"IEEE Trans Multimed"},{"key":"8690_CR35","unstructured":"Song Y, Vallmitjana J, Stent A, Jaimes A (2015) Tvsum: summarizing web videos using titles. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 5179\u20135187"},{"key":"8690_CR36","doi-asserted-by":"crossref","unstructured":"Sun K, Zhu J, Lei Z, Hou X, Zhang Q, Duan J, Qiu G (2017) Learning deep semantic attributes for user video summarization. In: 2017 IEEE international conference on multimedia and expo (ICME). IEEE, pp 643\u2013648","DOI":"10.1109\/ICME.2017.8019411"},{"issue":"7","key":"8690_CR37","doi-asserted-by":"publisher","first-page":"3157","DOI":"10.1109\/TIP.2016.2564638","volume":"25","author":"J Tang","year":"2016","unstructured":"Tang J, Wang K, Shao L (2016) Supervised matrix factorization hashing for cross-modal retrieval. IEEE Trans Image Process 25(7):3157\u20133166","journal-title":"IEEE Trans Image Process"},{"issue":"1","key":"8690_CR38","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1145\/1198302.1198305","volume":"3","author":"BT Truong","year":"2007","unstructured":"Truong BT, Venkatesh S (2007) Video abstraction: a systematic review and classification. ACM Trans Multimed Comput Commun Appl (TOMM) 3(1):3","journal-title":"ACM Trans Multimed Comput Commun Appl (TOMM)"},{"issue":"4","key":"8690_CR39","doi-asserted-by":"publisher","first-page":"643","DOI":"10.1016\/S0031-3203(96)00109-4","volume":"30","author":"HJ Zhang","year":"1997","unstructured":"Zhang HJ, Wu J, Zhong D, Smoliar SW (1997) An integrated system for content-based video retrieval and browsing. Pattern Recogn 30(4):643\u2013658","journal-title":"Pattern Recogn"},{"key":"8690_CR40","doi-asserted-by":"crossref","unstructured":"Zhang K, Chao WL, Sha F, Grauman K (2016) Video summarization with long short-term memory. In: European conference on computer vision. Springer, pp 766\u2013782","DOI":"10.1007\/978-3-319-46478-7_47"},{"issue":"11","key":"8690_CR41","doi-asserted-by":"publisher","first-page":"5469","DOI":"10.1109\/TIP.2016.2601493","volume":"25","author":"S Zhang","year":"2016","unstructured":"Zhang S, Zhu Y, Roy-Chowdhury AK (2016) Context-aware surveillance video summarization. IEEE Trans Image Process 25(11):5469\u20135478","journal-title":"IEEE Trans Image Process"},{"issue":"7","key":"8690_CR42","doi-asserted-by":"publisher","first-page":"1774","DOI":"10.1109\/TPAMI.2018.2847335","volume":"41","author":"Z Zhang","year":"2018","unstructured":"Zhang Z, Liu L, Shen F, Shen HT, Shao L (2018) Binary multi-view clustering. IEEE Trans Pattern Anal Mach Intell 41(7):1774\u20131782","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"10","key":"8690_CR43","doi-asserted-by":"publisher","first-page":"4645","DOI":"10.1109\/TNNLS.2017.2772264","volume":"29","author":"Z Zhang","year":"2017","unstructured":"Zhang Z, Shao L, Xu Y, Liu L, Yang J (2017) Marginal representation learning with graph structure self-adaptation. IEEE Trans Neural Netw Learn Sys 29 (10):4645\u20134659","journal-title":"IEEE Trans Neural Netw Learn Sys"},{"issue":"7","key":"8690_CR44","doi-asserted-by":"publisher","first-page":"3111","DOI":"10.1109\/TNNLS.2017.2712801","volume":"29","author":"Z Zhang","year":"2017","unstructured":"Zhang Z, Xu Y, Shao L, Yang J (2017) Discriminative block-diagonal representation learning for image recognition. IEEE Trans Neural Netw Learn Sys 29 (7):3111\u20133125","journal-title":"IEEE Trans Neural Netw Learn Sys"},{"key":"8690_CR45","unstructured":"Zhuang Y, Rui Y, Huang TS, Mehrotra S (1998) Adaptive key frame extraction using unsupervised clustering. In: 1998 international conference on image processing, 1998. ICIP 98. Proceedings, vol 1. IEEE, pp 866\u2013870"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-020-08690-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11042-020-08690-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-020-08690-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,3,3]],"date-time":"2021-03-03T00:30:38Z","timestamp":1614731438000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11042-020-08690-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,3,3]]},"references-count":45,"journal-issue":{"issue":"45-46","published-print":{"date-parts":[[2020,12]]}},"alternative-id":["8690"],"URL":"https:\/\/doi.org\/10.1007\/s11042-020-08690-3","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"type":"print","value":"1380-7501"},{"type":"electronic","value":"1573-7721"}],"subject":[],"published":{"date-parts":[[2020,3,3]]},"assertion":[{"value":"16 February 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 December 2019","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 January 2020","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 March 2020","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}