{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T05:21:21Z","timestamp":1755926481179},"reference-count":81,"publisher":"Springer Science and Business Media LLC","issue":"45-46","license":[{"start":{"date-parts":[[2020,8,15]],"date-time":"2020-08-15T00:00:00Z","timestamp":1597449600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,8,15]],"date-time":"2020-08-15T00:00:00Z","timestamp":1597449600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2020,12]]},"DOI":"10.1007\/s11042-020-09589-9","type":"journal-article","created":{"date-parts":[[2020,8,15]],"date-time":"2020-08-15T12:02:21Z","timestamp":1597492941000},"page":"33519-33546","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["A survey on description and modeling of audiovisual documents"],"prefix":"10.1007","volume":"79","author":[{"given":"Manel","family":"Fourati","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Anis","family":"Jedidi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Faiez","family":"Gargouri","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,8,15]]},"reference":[{"key":"9589_CR1","unstructured":"AE Abduraman, SA Berrani, and B Merialdo (2012). \u201cTV Program Structuring Techniques,\u201d TV Content Anal. Tech. Appl., p. 157"},{"key":"9589_CR2","doi-asserted-by":"crossref","unstructured":"S Antol, A Agrawal, J Lu, M Mitchell, D Batra, C Lawrence Zitnick, and D Parikh (2015). \u201cVqa: Visual question answering,\u201d in Proceedings of the IEEE international conference on computer vision, pp. 2425\u20132433","DOI":"10.1109\/ICCV.2015.279"},{"key":"9589_CR3","unstructured":"D Arthur and S Vassilvitskii (2007), \u201ck-means++: The advantages of careful seeding,\u201d in Proceedings of the eighteenth annual ACM-SIAM symposium on Discrete algorithms, pp. 1027\u20131035"},{"key":"9589_CR4","unstructured":"B Bachimont (1994). \u201cLe Contr\u00f4le Dans les Syst\u00e8mes \u00c0 Base de Connaissances Contribution \u00c0 l\u2019\u00c9pist\u00e9mologie de l'Intelligence Artificielle\u201d"},{"issue":"1","key":"9589_CR5","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1007\/s11042-010-0643-7","volume":"51","author":"L Ballan","year":"2011","unstructured":"Ballan L, Bertini M, Del Bimbo A, Seidenari L, Serra G (2011) Event detection and recognition for semantic annotation of video. Multimed Tools Appl 51(1):279\u2013302","journal-title":"Multimed Tools Appl"},{"key":"9589_CR6","unstructured":"S Banerjee and A Lavie (2005). \u201cMETEOR: An automatic metric for MT evaluation with improved correlation with human judgments,\u201d in Proceedings of the acl workshop on intrinsic and extrinsic evaluation measures for machine translation and\/or summarization, pp. 65\u201372"},{"issue":"1\/2","key":"9589_CR7","doi-asserted-by":"publisher","first-page":"49","DOI":"10.1108\/LR-05-2016-0047","volume":"66","author":"RK Bhardwaj","year":"2017","unstructured":"Bhardwaj RK, Margam M (2017) Metadata framework for online legal information system in indian environment. Libr Rev 66(1\/2):49\u201368","journal-title":"Libr Rev"},{"issue":"1","key":"9589_CR8","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1023\/A:1010933404324","volume":"45","author":"L Breiman","year":"2001","unstructured":"Breiman L (2001) Random forests. Mach Learn 45(1):5\u201332","journal-title":"Mach Learn"},{"issue":"1","key":"9589_CR9","doi-asserted-by":"publisher","first-page":"48","DOI":"10.1016\/j.cviu.2008.07.003","volume":"113","author":"GJ Burghouts","year":"2009","unstructured":"Burghouts GJ, Geusebroek J-M (2009) Performance evaluation of local colour invariants. Comput Vis Image Underst 113(1):48\u201362","journal-title":"Comput Vis Image Underst"},{"issue":"3","key":"9589_CR10","doi-asserted-by":"publisher","first-page":"1777","DOI":"10.1007\/s11042-013-1651-1","volume":"73","author":"M Caillet","year":"2014","unstructured":"Caillet M, Roisin C, Carrive J (2014) Multimedia applications for playing with digitized theater performances. Multimed Tools Appl 73(3):1777\u20131793","journal-title":"Multimed Tools Appl"},{"key":"9589_CR11","unstructured":"X Chang, Y Yang, A Hauptmann, EP Xing, and YL Yu 2015. \u201cSemantic concept discovery for large-scale zero-shot event detection,\u201d in Twenty-fourth international joint conference on artificial intelligence"},{"key":"9589_CR12","unstructured":"M Chen and A Hauptmann (1995). \u201cMosift: recognizing human actions in surveillance videos,\u201d"},{"issue":"1","key":"9589_CR13","doi-asserted-by":"publisher","first-page":"28","DOI":"10.1177\/0165551513507405","volume":"40","author":"MY Chuttur","year":"2014","unstructured":"Chuttur MY (2014) Investigating the effect of definitions and best practice guidelines on errors in Dublin Core metadata records. J Inf Sci 40(1):28\u201337","journal-title":"J Inf Sci"},{"key":"9589_CR14","doi-asserted-by":"crossref","unstructured":"N Dalal, B Triggs, and C Schmid (2006). \u201cHuman detection using oriented histograms of flow and appearance,\u201d in European conference on computer vision, pp. 428\u2013441","DOI":"10.1007\/11744047_33"},{"issue":"2\u20133","key":"9589_CR15","doi-asserted-by":"publisher","first-page":"331","DOI":"10.1007\/s11042-009-0387-4","volume":"46","author":"S Dasiopoulou","year":"2010","unstructured":"Dasiopoulou S, Tzouvaras V, Kompatsiaris I, Strintzis MG (2010) Enquiring MPEG-7 based multimedia ontologies. Multimed Tools Appl 46(2\u20133):331\u2013370","journal-title":"Multimed Tools Appl"},{"key":"9589_CR16","doi-asserted-by":"crossref","unstructured":"Z De Linde and N Kay (2016). The semiotics of subtitling. Routledge","DOI":"10.4324\/9781315538686"},{"issue":"5","key":"9589_CR17","doi-asserted-by":"publisher","first-page":"427","DOI":"10.1007\/s00530-013-0306-4","volume":"19","author":"M Del Fabro","year":"2013","unstructured":"Del Fabro M, B\u00f6sz\u00f6rmenyi L (2013) State-of-the-art and future challenges in video scene detection: a survey. Multimedia Systems 19(5):427\u2013454","journal-title":"Multimedia Systems"},{"issue":"2","key":"9589_CR18","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1007\/s13740-016-0060-9","volume":"5","author":"Y Deldjoo","year":"2016","unstructured":"Deldjoo Y, Elahi M, Cremonesi P, Garzotto F, Piazzolla P, Quadrana M (2016) Content-based video recommendation system based on stylistic visual features. J Data Semant 5(2):99\u2013113","journal-title":"J Data Semant"},{"issue":"4","key":"9589_CR19","doi-asserted-by":"publisher","first-page":"207","DOI":"10.1007\/s13735-018-0155-1","volume":"7","author":"Y Deldjoo","year":"2018","unstructured":"Deldjoo Y, Elahi M, Quadrana M, Cremonesi P (2018) Using visual features based on MPEG-7 and deep learning for movie recommendation. Int J Multimed Inf Retr 7(4):207\u2013219","journal-title":"Int J Multimed Inf Retr"},{"key":"9589_CR20","unstructured":"B Dervin (1992). \u201cFrom the mind\u2019s eye of the user: the sense-making qualitative-quantitative methodology,\u201d Sense-making Methodol. Read"},{"key":"9589_CR21","unstructured":"E Egyed-Zsigmond, Y Pri\u00e9, A Mille, and JM Pinon (2000). \u201cA graph based audio-visual document annotation and browsing system,\u201d in Content-Based Multimedia Information Access-Volume 2, pp. 1381\u20131389"},{"issue":"4","key":"9589_CR22","doi-asserted-by":"publisher","first-page":"1397","DOI":"10.1007\/s11042-014-1955-9","volume":"74","author":"N Elleuch","year":"2015","unstructured":"Elleuch N, Ben Ammar A, Alimi A (2015) A generic framework for semantic video indexing based on visual concepts\/contexts detection. Multimed Tools Appl 74(4):1397\u20131421","journal-title":"Multimed Tools Appl"},{"key":"9589_CR23","doi-asserted-by":"publisher","first-page":"404","DOI":"10.1016\/j.patcog.2019.01.038","volume":"90","author":"Z Fang","year":"2019","unstructured":"Fang Z, Liu J, Li Y, Qiao Y, Lu H (2019) Improving visual question answering using dropout and enhanced question encoder. Pattern Recogn 90:404\u2013414","journal-title":"Pattern Recogn"},{"issue":"2","key":"9589_CR24","doi-asserted-by":"publisher","first-page":"52","DOI":"10.4018\/IJMDEM.2015040104","volume":"6","author":"M Fourati","year":"2015","unstructured":"Fourati M, Jedidi A, Ben Hassin H, Gargouri F (2015) Towards fusion of textual and visual modalities for describing audiovisual documents. Int J Multimed Data Eng Manag 6(2):52\u201370","journal-title":"Int J Multimed Data Eng Manag"},{"key":"9589_CR25","doi-asserted-by":"crossref","unstructured":"Fourati M, Jedidi A, Gargouri F (2015) Topic and Thematic Description for Movies Documents. In: Arik S, Huang T, Lai WK, Liu Q (eds) Neural Information Processing SE - 54, vol. 9492. Springer International Publishing, pp 453\u2013462","DOI":"10.1007\/978-3-319-26561-2_54"},{"key":"9589_CR26","doi-asserted-by":"crossref","unstructured":"Z Gan, C Gan, X He, Y Pu, K Tran, J Gao, L Carin, and L Deng (2017). \u201cSemantic compositional networks for visual captioning,\u201d in Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 5630\u20135639","DOI":"10.1109\/CVPR.2017.127"},{"issue":"9","key":"9589_CR27","doi-asserted-by":"publisher","first-page":"2045","DOI":"10.1109\/TMM.2017.2729019","volume":"19","author":"L Gao","year":"2017","unstructured":"Gao L, Guo Z, Zhang H, Xu X, Shen HT (2017) Video captioning with attention-based LSTM and semantic consistency. IEEE Trans Multimed 19(9):2045\u20132055","journal-title":"IEEE Trans Multimed"},{"key":"9589_CR28","unstructured":"M Gluck (1997). \u201cMaking sense of semiotics: privileging respondents in revealing contextual geographic syntactic and semantic codes,\u201d in Proceedings of an international conference on Information seeking in context, pp. 53\u201366"},{"key":"9589_CR29","unstructured":"A Holzinger, G Searle, A Auinger, and M Ziefle (2011). \u201cInformatics as Semiotics Engineering: Lessons Learned from Design, Development and Evaluation of Ambient Assisted Living Applications for Elderly People BT - Universal Access in Human-Computer Interaction. Context Diversity,\u201d, pp. 183\u2013192"},{"key":"9589_CR30","doi-asserted-by":"crossref","unstructured":"NJ Janwe and KK Bhoyar (2013). \u201cVideo shot boundary detection based on JND color histogram,\u201d in 2013 IEEE Second International Conference on Image Information Processing (ICIIP-2013), pp. 476\u2013480","DOI":"10.1109\/ICIIP.2013.6707637"},{"key":"9589_CR31","unstructured":"Jedidi A (2005) Mod\u00e9lisation g\u00e9n\u00e9rique de documents multim\u00e9dia par des m\u00e9tadonn\u00e9es: m\u00e9canismes d\u2019annotation et d'interrogation. Universit\u00e9 Paul Sabatier-Toulouse III"},{"issue":"1","key":"9589_CR32","doi-asserted-by":"publisher","first-page":"42","DOI":"10.1109\/TMM.2009.2036235","volume":"12","author":"Y-G Jiang","year":"2009","unstructured":"Jiang Y-G, Yang J, Ngo C-W, Hauptmann AG (2009) Representations of keypoint-based semantic concept detection: a comprehensive study. IEEE Trans Multimed 12(1):42\u201353","journal-title":"IEEE Trans Multimed"},{"key":"9589_CR33","doi-asserted-by":"crossref","unstructured":"S Kim, H Hong, and J Nang (2015). \u201cA Gradual Shot Change Detection using Combination of Luminance and Motion Features for Frame Rate Up Conversion,\u201d in 2015 11th International Conference on Signal-Image Technology & Internet-Based Systems (SITIS), pp. 295\u2013299","DOI":"10.1109\/SITIS.2015.33"},{"issue":"2\u20133","key":"9589_CR34","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1007\/s11263-005-1838-7","volume":"64","author":"I Laptev","year":"2005","unstructured":"Laptev I (2005) On space-time interest points. Int J Comput Vis 64(2\u20133):107\u2013123","journal-title":"Int J Comput Vis"},{"issue":"3","key":"9589_CR35","doi-asserted-by":"publisher","first-page":"726","DOI":"10.1109\/TMM.2017.2751140","volume":"20","author":"L Li","year":"2017","unstructured":"Li L, Tang S, Zhang Y, Deng L, Tian Q (2017) Gla: global\u2013local attention for image description. IEEE Trans Multimed 20(3):726\u2013737","journal-title":"IEEE Trans Multimed"},{"key":"9589_CR36","unstructured":"Z Liu (2013). \u201cA semiotic interpretation of sense-making in information seeking,\u201d Libr. Philos. Pract., p. 1"},{"issue":"2","key":"9589_CR37","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1023\/B:VISI.0000029664.99615.94","volume":"60","author":"DG Lowe","year":"2004","unstructured":"Lowe DG (2004) Distinctive image features from scale-invariant keypoints. Int J Comput Vis 60(2):91\u2013110","journal-title":"Int J Comput Vis"},{"issue":"12","key":"9589_CR38","doi-asserted-by":"publisher","first-page":"5136","DOI":"10.1109\/TIP.2013.2282081","volume":"22","author":"Z-M Lu","year":"2013","unstructured":"Lu Z-M, Shi Y (2013) Fast video shot boundary detection based on SVD and pattern matching. IEEE Trans Image Process 22(12):5136\u20135145","journal-title":"IEEE Trans Image Process"},{"issue":"7","key":"9589_CR39","doi-asserted-by":"publisher","first-page":"1482","DOI":"10.1109\/TMM.2017.2671447","volume":"19","author":"B Luo","year":"2017","unstructured":"Luo B, Li H, Meng F, Wu Q, Huang C (2017) Video object segmentation via global consistency aware query strategy. IEEE Trans Multimed 19(7):1482\u20131493","journal-title":"IEEE Trans Multimed"},{"key":"9589_CR40","doi-asserted-by":"crossref","unstructured":"I Mademlis, N Nikolaidis, and I Pitas (2015). \u201cStereoscopic video description for key-frame extraction in movie summarization,\u201d in 2015 23rd European Signal Processing Conference (EUSIPCO), pp. 819\u2013823","DOI":"10.1109\/EUSIPCO.2015.7362497"},{"key":"9589_CR41","unstructured":"JP Martin (2005). \u201cDescription s\u00e9miotique de contenus audiovisuels,\u201d Universit\u00e9 de Paris-Sud. Facult\u00e9 des Sciences d\u2019Orsay (Essonne)"},{"key":"9589_CR42","doi-asserted-by":"crossref","unstructured":"P Mickan and E Lopez (2016). Text-based research and teaching: a social semiotic perspective on language in use. Springer","DOI":"10.1057\/978-1-137-59849-3"},{"issue":"1","key":"9589_CR43","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1016\/j.infoandorg.2016.12.001","volume":"27","author":"J Mingers","year":"2017","unstructured":"Mingers J, Willcocks L (2017) An integrative semiotic methodology for IS research. Inf Organ 27(1):17\u201336","journal-title":"Inf Organ"},{"issue":"1","key":"9589_CR44","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1002\/(SICI)1097-4571(199401)45:1<20::AID-ASI3>3.0.CO;2-N","volume":"45","author":"RCT Morris","year":"1994","unstructured":"Morris RCT (1994) Toward a user-centered information service. J Am Soc Inf Sci 45(1):20\u201330","journal-title":"J Am Soc Inf Sci"},{"issue":"3","key":"9589_CR45","doi-asserted-by":"publisher","first-page":"86","DOI":"10.1109\/MMUL.2006.63","volume":"13","author":"M Naphade","year":"2006","unstructured":"Naphade M, Smith JR, Tesic J, Chang S-F, Hsu W, Kennedy L, Hauptmann A, Curtis J (2006) Large-scale concept ontology for multimedia. IEEE Multimed 13(3):86\u201391","journal-title":"IEEE Multimed"},{"issue":"3","key":"9589_CR46","doi-asserted-by":"publisher","first-page":"145","DOI":"10.1023\/A:1011139631724","volume":"42","author":"A Oliva","year":"2001","unstructured":"Oliva A, Torralba A (2001) Modeling the shape of the scene: a holistic representation of the spatial envelope. Int J Comput Vis 42(3):145\u2013175","journal-title":"Int J Comput Vis"},{"key":"9589_CR47","doi-asserted-by":"crossref","unstructured":"F Orlandi, J Debattista, IA Hassan, C Conran, M Latifi, M Nicholson, FA Salim, D Turner, O Conlan, and D O\u2019sullivan (2018). \u201cLeveraging Knowledge Graphs of Movies and Their Content for Web-Scale Analysis,\u201d in 2018 14th International Conference on Signal-Image Technology & Internet-Based Systems (SITIS), pp. 609\u2013616","DOI":"10.1109\/SITIS.2018.00098"},{"key":"9589_CR48","unstructured":"K Papineni, S Roukos, T Ward, and WJ Zhu (2002). \u201cBLEU: a method for automatic evaluation of machine translation,\u201d in Proceedings of the 40th annual meeting on association for computational linguistics, pp. 311\u2013318"},{"issue":"4","key":"9589_CR49","first-page":"38","volume":"64","author":"U Patel","year":"2013","unstructured":"Patel U, Shah P, Panchal P (2013) Shot detection using pixel wise difference with adaptive threshold and color histogram method in compressed and uncompressed video. Int J Comput Appl 64(4):38\u201344","journal-title":"Int J Comput Appl"},{"key":"9589_CR50","unstructured":"Peirce CS (2009) Writings of Charles S. Peirce: A Chronological Edition, Volume 8: 1890\u20131892, vol. 8. Indiana University Press"},{"issue":"5","key":"9589_CR51","doi-asserted-by":"publisher","first-page":"255","DOI":"10.1007\/s00530-008-0140-2","volume":"14","author":"J-P Poli","year":"2008","unstructured":"Poli J-P (2008) An automatic television stream structuring system for television archives holders. Multimedia Systems 14(5):255\u2013275","journal-title":"Multimedia Systems"},{"key":"9589_CR52","unstructured":"S Ren, K He, R Girshick, and J Sun (2015). \u201cFaster r-cnn: Towards real-time object detection with region proposal networks,\u201d in Adv Neural Inf Proces Syst, pp. 91\u201399"},{"key":"9589_CR53","doi-asserted-by":"publisher","first-page":"234","DOI":"10.1016\/j.ins.2014.02.017","volume":"277","author":"AM Rinaldi","year":"2014","unstructured":"Rinaldi AM (2014) A multimedia ontology model based on linguistic properties and audio-visual features. Inf. Sci. (Ny). 277:234\u2013246","journal-title":"Inf. Sci. (Ny)."},{"key":"9589_CR54","unstructured":"LA Rowe, JS Boreczky, and CA Eads (1994). \u201cIndexes for user access to large video databases,\u201d in IS&T\/SPIE 1994 International Symposium on Electronic Imaging: Science and Technology, pp. 150\u2013161"},{"key":"9589_CR55","doi-asserted-by":"crossref","unstructured":"S\u00e1nchez-Nielsen E, Ch\u00e1vez-Guti\u00e9rrez F, Lorenzo-Navarro J (2019) A semantic parliamentary multimedia approach for retrieval of video clips with content understanding. Multimedia Systems:1\u201318","DOI":"10.1007\/s00530-019-00610-2"},{"issue":"18","key":"9589_CR56","doi-asserted-by":"publisher","first-page":"18657","DOI":"10.1007\/s11042-017-4350-5","volume":"76","author":"S Shrivastav","year":"2017","unstructured":"Shrivastav S, Kumar S, Kumar K (2017) Towards an ontology based framework for searching multimedia contents on the web. Multimed Tools Appl 76(18):18657\u201318686","journal-title":"Multimed Tools Appl"},{"key":"9589_CR57","doi-asserted-by":"crossref","unstructured":"LF Sikos (2017). \u201cThe Semantic Gap,\u201d in Description Logics in Multimedia Reasoning, Springer, pp. 51\u201366","DOI":"10.1007\/978-3-319-54066-5_3"},{"key":"9589_CR58","doi-asserted-by":"crossref","unstructured":"LF Sikos (2018). \u201cOntology-based structured video annotation for content-based video retrieval via spatiotemporal reasoning,\u201d in Bridging the Semantic Gap in Image and Video Analysis, Springer, pp. 97\u2013122","DOI":"10.1007\/978-3-319-73891-8_6"},{"key":"9589_CR59","doi-asserted-by":"crossref","unstructured":"LF Sikos and DMW Powers (2015). \u201cKnowledge-driven video information retrieval with LOD: from semi-structured to structured video metadata,\u201d in Proceedings of the Eighth Workshop on Exploiting Semantic Annotations in Information Retrieval, pp. 35\u201337","DOI":"10.1145\/2810133.2810141"},{"issue":"4","key":"9589_CR60","doi-asserted-by":"publisher","first-page":"411","DOI":"10.1016\/j.cviu.2009.03.011","volume":"114","author":"AF Smeaton","year":"2010","unstructured":"Smeaton AF, Over P, Doherty AR (2010) Video shot boundary detection: seven years of TRECVid activity. Comput Vis Image Underst 114(4):411\u2013418","journal-title":"Comput Vis Image Underst"},{"key":"9589_CR61","doi-asserted-by":"publisher","first-page":"175","DOI":"10.1016\/j.patcog.2017.03.021","volume":"75","author":"J Song","year":"2018","unstructured":"Song J, Gao L, Liu L, Zhu X, Sebe N (2018) Quantization-based hashing: a general framework for scalable image and video retrieval. Pattern Recogn 75:175\u2013187","journal-title":"Pattern Recogn"},{"issue":"11","key":"9589_CR62","doi-asserted-by":"publisher","first-page":"4999","DOI":"10.1109\/TIP.2016.2601260","volume":"25","author":"J Song","year":"2016","unstructured":"Song J, Gao L, Nie F, Shen HT, Yan Y, Sebe N (2016) Optimized graph learning using partial tags and multiple features for image and video annotation. IEEE Trans Image Process 25(11):4999\u20135011","journal-title":"IEEE Trans Image Process"},{"key":"9589_CR63","unstructured":"J Song, Y Guo, L Gao, X Li, A Hanjalic, and HT Shen (2018). \u201cFrom deterministic to generative: multimodal stochastic RNNs for video captioning,\u201d IEEE Trans. neural networks Learn. Syst"},{"issue":"7","key":"9589_CR64","doi-asserted-by":"publisher","first-page":"3210","DOI":"10.1109\/TIP.2018.2814344","volume":"27","author":"J Song","year":"2018","unstructured":"Song J, Zhang H, Li X, Gao L, Wang M, Hong R (2018) Self-supervised video hashing with hierarchical binary auto-encoder. IEEE Trans Image Process 27(7):3210\u20133221","journal-title":"IEEE Trans Image Process"},{"key":"9589_CR65","unstructured":"P Stockinger (2003). \u201cLe document audiovisuel,\u201d Hermes, Lavoisier"},{"key":"9589_CR66","unstructured":"P Stockinger (2011). Les archives audiovisuelles : description, indexation et publication. Lavoisier"},{"key":"9589_CR67","doi-asserted-by":"crossref","unstructured":"Stockinger P (2013) Audiovisual archives: digital text and discourse analysis. John Wiley & Sons","DOI":"10.1002\/9781118561980"},{"key":"9589_CR68","doi-asserted-by":"crossref","unstructured":"A Tamrakar, S Ali, Q Yu, J Liu, O Javed, A Divakaran, H Cheng, and H Sawhney (2012). \u201cEvaluation of low-level features and their combinations for complex event detection in open source videos,\u201d in 2012 IEEE Conference on Computer Vision and Pattern Recogn, pp. 3681\u20133688","DOI":"10.1109\/CVPR.2012.6248114"},{"key":"9589_CR69","doi-asserted-by":"crossref","unstructured":"Tang P, Wang C, Wang X, Liu W, Zeng W, Wang J (2019) Object detection in videos by high quality object linking. IEEE Trans. Pattern Anal. Mach. Intell","DOI":"10.1109\/TPAMI.2019.2910529"},{"key":"9589_CR70","doi-asserted-by":"crossref","unstructured":"R Vedantam, C Lawrence Zitnick, and D Parikh (2015). \u201cCider: Consensus-based image description evaluation,\u201d in Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 4566\u20134575","DOI":"10.1109\/CVPR.2015.7299087"},{"issue":"4","key":"9589_CR71","doi-asserted-by":"publisher","first-page":"510","DOI":"10.1109\/LSP.2016.2611485","volume":"24","author":"X Wang","year":"2016","unstructured":"Wang X, Gao L, Song J, Shen H (2016) Beyond frame-level CNN: saliency-aware 3-D CNN with LSTM for video action recognition. IEEE Signal Process Lett 24(4):510\u2013514","journal-title":"IEEE Signal Process Lett"},{"issue":"3","key":"9589_CR72","doi-asserted-by":"publisher","first-page":"634","DOI":"10.1109\/TMM.2017.2749159","volume":"20","author":"X Wang","year":"2017","unstructured":"Wang X, Gao L, Wang P, Sun X, Liu X (2017) Two-stream 3-d convnet fusion for action recognition in videos with arbitrary size and length. IEEE Trans Multimed 20(3):634\u2013644","journal-title":"IEEE Trans Multimed"},{"key":"9589_CR73","doi-asserted-by":"crossref","unstructured":"W Wang, J Shen, and F Porikli (2015). \u201cSaliency-aware geodesic video object segmentation,\u201d in Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 3395\u20133402","DOI":"10.1109\/CVPR.2015.7298961"},{"key":"9589_CR74","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1016\/j.cviu.2017.05.001","volume":"163","author":"Q Wu","year":"2017","unstructured":"Wu Q, Teney D, Wang P, Shen C, Dick A, van den Hengel A (2017) Visual question answering: a survey of methods and datasets. Comput Vis Image Underst 163:21\u201340","journal-title":"Comput Vis Image Underst"},{"issue":"19","key":"9589_CR75","doi-asserted-by":"publisher","first-page":"12155","DOI":"10.1007\/s11042-015-3112-5","volume":"75","author":"Z Xu","year":"2016","unstructured":"Xu Z, Hu C, Mei L (2016) Video structured description technology based intelligence analysis of surveillance videos for public security applications. Multimed Tools Appl 75(19):12155\u201312172","journal-title":"Multimed Tools Appl"},{"key":"9589_CR76","doi-asserted-by":"crossref","unstructured":"Z Xu, F Zhi, C Liang, M Lin, and X Luo (2014). \u201cSemantic annotation of traffic video resources,\u201d in 2014 IEEE 13th International Conference on Cognitive Informatics and Cognitive Computing, pp. 323\u2013328","DOI":"10.1109\/ICCI-CC.2014.6921478"},{"issue":"2","key":"9589_CR77","doi-asserted-by":"publisher","first-page":"51","DOI":"10.1080\/19386389.2011.570654","volume":"11","author":"CM Yasser","year":"2011","unstructured":"Yasser CM (2011) An analysis of problems in metadata records. J Libr Metadata 11(2):51\u201362","journal-title":"J Libr Metadata"},{"key":"9589_CR78","doi-asserted-by":"crossref","unstructured":"G Ye, Y Li, H Xu, D Liu, and SF Chang (2015). \u201cEventnet: A large scale structured concept library for complex event detection in video,\u201d in Proceedings of the 23rd ACM international conference on Multimedia, pp. 471\u2013480","DOI":"10.1145\/2733373.2806221"},{"key":"9589_CR79","doi-asserted-by":"crossref","unstructured":"Q You, H Jin, Z Wang, C Fang, and J Luo (2016). \u201cImage captioning with semantic attention,\u201d in Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 4651\u20134659","DOI":"10.1109\/CVPR.2016.503"},{"key":"9589_CR80","unstructured":"W Zhou, H Li, and Q Tian (2017). \u201cRecent advance in content-based image retrieval: A literature survey,\u201d arXiv Prepr. arXiv1706.06064"},{"issue":"10","key":"9589_CR81","doi-asserted-by":"publisher","first-page":"5645","DOI":"10.1007\/s11042-015-2531-7","volume":"75","author":"T Zlitni","year":"2016","unstructured":"Zlitni T, Bouaziz B, Mahdi W (2016) Automatic topics segmentation for TV news video using prior knowledge. Multimed Tools Appl 75(10):5645\u20135672","journal-title":"Multimed Tools Appl"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-020-09589-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-020-09589-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-020-09589-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,7]],"date-time":"2022-11-07T06:18:36Z","timestamp":1667801916000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-020-09589-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,8,15]]},"references-count":81,"journal-issue":{"issue":"45-46","published-print":{"date-parts":[[2020,12]]}},"alternative-id":["9589"],"URL":"https:\/\/doi.org\/10.1007\/s11042-020-09589-9","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"value":"1380-7501","type":"print"},{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,8,15]]},"assertion":[{"value":"26 June 2018","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 May 2020","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 August 2020","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 August 2020","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}