{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T15:21:19Z","timestamp":1773415279321,"version":"3.50.1"},"publisher-location":"Cham","reference-count":48,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030166564","type":"print"},{"value":"9783030166571","type":"electronic"}],"license":[{"start":{"date-parts":[[2019,4,12]],"date-time":"2019-04-12T00:00:00Z","timestamp":1555027200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-16657-1_99","type":"book-chapter","created":{"date-parts":[[2019,4,11]],"date-time":"2019-04-11T20:04:30Z","timestamp":1555013070000},"page":"1060-1069","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["A State-of-Art Review on Automatic Video Annotation Techniques"],"prefix":"10.1007","author":[{"given":"Krunal","family":"Randive","sequence":"first","affiliation":[]},{"given":"R.","family":"Mohan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,4,12]]},"reference":[{"key":"99_CR1","unstructured":"Feng, S.L., Manmatha, R., Lavrenko, V.: Multiple Bernoulli relevance models for image and video annotation. In: IEEE Computer Society Conference on Computer Vision and Pattern Recognition, pp. 1002\u20131009 (2004)"},{"key":"99_CR2","doi-asserted-by":"crossref","unstructured":"Jeon, J., Lavrenko, V., Manmatha, R.: Automatic image annotation and retrieval using cross-media relevance models. In: Proceedings of the 26th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 119\u2013126 (2003)","DOI":"10.1145\/860435.860459"},{"key":"99_CR3","doi-asserted-by":"crossref","unstructured":"Liu, J., Wang, B., Li, M., et al.: Dual cross-media relevance model for image annotation. In: Proceedings of the 15th International Conference on Multimedia, pp. 605\u2013614 (2007)","DOI":"10.1145\/1291233.1291380"},{"issue":"5","key":"99_CR4","doi-asserted-by":"publisher","first-page":"2259","DOI":"10.1109\/TIP.2016.2542021","volume":"25","author":"J Ni\u00f1o-Casta\u00f1eda","year":"2016","unstructured":"Ni\u00f1o-Casta\u00f1eda, J., Fr\u00edas-Vel\u00e1zquez, A., Bo, N.B., Slembrouck, M., Guan, J., Debard, G., Vanrumste, B., Tuytelaars, T., Philips, W.: Scalable semi-automatic annotation for multi-camera person tracking. IEEE Trans. Image Process. 25(5), 2259\u20132274 (2016)","journal-title":"IEEE Trans. Image Process."},{"issue":"3","key":"99_CR5","doi-asserted-by":"publisher","first-page":"465","DOI":"10.1109\/TMM.2009.2012919","volume":"11","author":"M Wang","year":"2009","unstructured":"Wang, M., Hua, X.S., Tang, J., Hong, R.: Beyond distance measurement: constructing neighborhood similarity for video annotation. IEEE Trans. Multimed. 11(3), 465\u2013476 (2009)","journal-title":"IEEE Trans. Multimed."},{"key":"99_CR6","doi-asserted-by":"crossref","unstructured":"Wang, C., Zhang, L., Zhang, H.J.: Learning to reduce the semantic gap in web image retrieval and annotation. In: Proceedings of the 31st Annual International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 355\u2013362 (2008)","DOI":"10.1145\/1390334.1390396"},{"issue":"2","key":"99_CR7","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1109\/TMM.2016.2614426","volume":"19","author":"CL Chou","year":"2017","unstructured":"Chou, C.L., Chen, H.T., Lee, S.Y.: Multimodal video-to-near-scene annotation. IEEE Trans. Multimed. 19(2), 354\u2013366 (2017)","journal-title":"IEEE Trans. Multimed."},{"key":"99_CR8","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1016\/j.neucom.2016.09.087","volume":"228","author":"S Xia","year":"2017","unstructured":"Xia, S., Chen, P., Zhang, J., Li, X., Wang, B.: Utilization of rotation-invariant uniform LBP histogram distribution and statistics of connected regions in automatic image annotation based on multi-label learning. Neurocomputing 228, 11\u201318 (2017)","journal-title":"Neurocomputing"},{"key":"99_CR9","doi-asserted-by":"crossref","unstructured":"Qi, G.J., Hua, X.S., Rui, Y., Tang, J., Mei, T., Zhang, H.J.: Correlative multi-label video annotation. In: Proceedings of the 15th ACM International Conference on Multimedia, pp. 17\u201326 (2007)","DOI":"10.1145\/1291233.1291245"},{"key":"99_CR10","doi-asserted-by":"crossref","unstructured":"Jain, S.D., Grauman, K.: Click carving: segmenting objects in video with point clicks (2016). arXiv preprint: arXiv:1607.01115","DOI":"10.1609\/hcomp.v4i1.13288"},{"issue":"5","key":"99_CR11","doi-asserted-by":"publisher","first-page":"6111","DOI":"10.1007\/s11042-016-3253-1","volume":"76","author":"H Song","year":"2017","unstructured":"Song, H., Wu, X., Liang, W., Jia, Y.: Recognizing key segments of videos for video annotation by learning from web image sets. Multimed. Tools Appl. 76(5), 6111\u20136126 (2017)","journal-title":"Multimed. Tools Appl."},{"key":"99_CR12","doi-asserted-by":"crossref","unstructured":"Sch\u00f6ning, J., Faion, P., Heidemann, G., Krumnack, U.: Providing video annotations in multimedia containers for visualization and research. In: 2017 IEEE Winter Conference on Applications of Computer Vision (WACV), pp. 650\u2013659 (2017)","DOI":"10.1109\/WACV.2017.78"},{"key":"99_CR13","doi-asserted-by":"crossref","unstructured":"Shah, R., Zimmermann, R.: Tag recommendation and ranking. In: Multimodal Analysis of User-Generated Multimedia Content, pp. 101\u2013138 (2017)","DOI":"10.1007\/978-3-319-61807-4_4"},{"key":"99_CR14","doi-asserted-by":"crossref","unstructured":"Moxley, E., Mei, T., Hua, X.S., Ma, W.Y., Manjunath, B.S.: Automatic video annotation through search and mining. In: 2008 IEEE International Conference on Multimedia and Expo, pp. 685\u2013688 (2008)","DOI":"10.1109\/ICME.2008.4607527"},{"issue":"5","key":"99_CR15","doi-asserted-by":"publisher","first-page":"733","DOI":"10.1109\/TCSVT.2009.2017400","volume":"19","author":"M Wang","year":"2009","unstructured":"Wang, M., Hua, X.S., Hong, R., Tang, J., Qi, G.J., Song, Y.: Unified video annotation via multigraph learning. IEEE Trans. Circ. Syst. Video Technol. 19(5), 733\u2013746 (2009)","journal-title":"IEEE Trans. Circ. Syst. Video Technol."},{"key":"99_CR16","doi-asserted-by":"crossref","unstructured":"Sch\u00f6ning, J., Faion, P., Heidemann, G.: Pixel-wise ground truth annotation in videos. In: ICPRAM, vol. 6, p. 11 (2016)","DOI":"10.1145\/2815833.2816947"},{"issue":"11","key":"99_CR17","doi-asserted-by":"publisher","first-page":"4999","DOI":"10.1109\/TIP.2016.2601260","volume":"25","author":"J Song","year":"2016","unstructured":"Song, J., Gao, L., Nie, F., Shen, H.T., Yan, Y., Sebe, N.: Optimized graph learning using partial tags and multiple features for image and video annotation. IEEE Trans. Image Process. 25(11), 4999\u20135011 (2016)","journal-title":"IEEE Trans. Image Process."},{"key":"99_CR18","doi-asserted-by":"crossref","unstructured":"Gao, L., Song, J., Nie, F., Yan, Y., Sebe, N., Tao Shen, H.: Optimal graph learning with partial tags and multiple features for image and video annotation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4371\u20134379 (2015)","DOI":"10.1109\/CVPR.2015.7299066"},{"issue":"9","key":"99_CR19","doi-asserted-by":"publisher","first-page":"1746","DOI":"10.1109\/TCSVT.2015.2475815","volume":"26","author":"X Qian","year":"2016","unstructured":"Qian, X., Liu, X., Ma, X., Lu, D., Xu, C.: What is happening in the video?\u2014Annotate video by sentence. IEEE Trans. Circ. Syst. Video Technol. 26(9), 1746\u20131757 (2016)","journal-title":"IEEE Trans. Circ. Syst. Video Technol."},{"key":"99_CR20","doi-asserted-by":"crossref","unstructured":"Sikos, L.F.: Ontology-based structured video annotation for content-based video retrieval via spatiotemporal reasoning. In: Bridging the Semantic Gap in Image and Video Analysis, pp. 97\u2013122. Springer, Cham (2018)","DOI":"10.1007\/978-3-319-73891-8_6"},{"issue":"4","key":"99_CR21","doi-asserted-by":"publisher","first-page":"80","DOI":"10.1109\/MMUL.2010.4","volume":"17","author":"L Ballan","year":"2010","unstructured":"Ballan, L., Bertini, M., Del Bimbo, A., Serra, G.: Video annotation and retrieval using ontologies and rule learning. IEEE Multimed. 17(4), 80\u201388 (2010)","journal-title":"IEEE Multimed."},{"issue":"2","key":"99_CR22","doi-asserted-by":"publisher","first-page":"1167","DOI":"10.1007\/s11042-013-1363-6","volume":"72","author":"A Altadmri","year":"2014","unstructured":"Altadmri, A., Ahmed, A.: A framework for automatic semantic video annotation. Multimed. Tools Appl. 72(2), 1167\u20131191 (2014)","journal-title":"Multimed. Tools Appl."},{"issue":"12","key":"99_CR23","doi-asserted-by":"publisher","first-page":"14437","DOI":"10.1007\/s11042-016-3705-7","volume":"76","author":"LF Sikos","year":"2017","unstructured":"Sikos, L.F.: RDF-powered semantic video annotation tools with concept mapping to linked data for next-generation video indexing: a comprehensive review. Multimed. Tools Appl. 76(12), 14437\u201314460 (2017)","journal-title":"Multimed. Tools Appl."},{"key":"99_CR24","doi-asserted-by":"crossref","unstructured":"Bloehdorn, S., Petridis, K., Saathoff, C., Simou, N., Tzouvaras, V., Avrithis, Y., Handschuh, S., Kompatsiaris, Y., Staab, S., Strintzis, M.G.: Semantic annotation of images and videos for multimedia analysis. In: European Semantic Web Conference, pp. 592\u2013607 (2005)","DOI":"10.1007\/11431053_40"},{"issue":"10","key":"99_CR25","doi-asserted-by":"publisher","first-page":"5719","DOI":"10.1007\/s11042-015-2537-1","volume":"75","author":"M Zarka","year":"2016","unstructured":"Zarka, M., Ammar, A.B., Alimi, A.M.: Fuzzy reasoning framework to improve semantic video interpretation. Multimed. Tools Appl. 75(10), 5719\u20135750 (2016)","journal-title":"Multimed. Tools Appl."},{"issue":"1","key":"99_CR26","first-page":"909","volume":"2","author":"K Khurana","year":"2013","unstructured":"Khurana, K., Chandak, M.B.: Study of various video annotation techniques. Int. J. Adv. Res. Comput. Commun. Eng. 2(1), 909\u2013914 (2013)","journal-title":"Int. J. Adv. Res. Comput. Commun. Eng."},{"issue":"1","key":"99_CR27","doi-asserted-by":"publisher","first-page":"246","DOI":"10.1016\/j.eswa.2014.07.046","volume":"42","author":"TH Duong","year":"2015","unstructured":"Duong, T.H., Nguyen, N.T., Truong, H.B., Nguyen, V.H.: A collaborative algorithm for semantic video annotation using a consensus-based social network analysis. Expert Syst. Appl. 42(1), 246\u2013258 (2015)","journal-title":"Expert Syst. Appl."},{"key":"99_CR28","doi-asserted-by":"publisher","first-page":"66","DOI":"10.1016\/j.patrec.2016.09.014","volume":"96","author":"Y Wang","year":"2017","unstructured":"Wang, Y., Luo, Z., Jodoin, P.M.: Interactive deep learning method for segmenting moving objects. Pattern Recogn. Lett. 96, 66\u201375 (2017)","journal-title":"Pattern Recogn. Lett."},{"key":"99_CR29","doi-asserted-by":"crossref","unstructured":"Yao, L., Torabi, A., Cho, K., Ballas, N., Pal, C., Larochelle, H., Courville, A.: Describing videos by exploiting temporal structure. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 4507\u20134515 (2015)","DOI":"10.1109\/ICCV.2015.512"},{"key":"99_CR30","doi-asserted-by":"crossref","unstructured":"Wu, Z., Yao, T., Fu, Y., Jiang, Y.G.: Deep learning for video classification and captioning (2016). arXiv preprint: arXiv:1609.06782","DOI":"10.1145\/3122865.3122867"},{"key":"99_CR31","doi-asserted-by":"crossref","unstructured":"Yu, S., Cai, H., Liu, A.: Multi-semantic video annotation with semantic network. In: 2016 International Conference on Cyberworlds (CW), pp. 239\u2013242, September 2016","DOI":"10.1109\/CW.2016.49"},{"key":"99_CR32","doi-asserted-by":"crossref","unstructured":"Koller, O., Ney, H., Bowden, R.: Deep hand: how to train a CNN on 1 million hand images when your data is continuous and weakly labelled. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3793\u20133802 (2016)","DOI":"10.1109\/CVPR.2016.412"},{"issue":"11","key":"99_CR33","doi-asserted-by":"publisher","first-page":"2196","DOI":"10.1109\/TMM.2016.2614227","volume":"18","author":"H Liao","year":"2016","unstructured":"Liao, H., Chen, L., Song, Y., Ming, H.: Visualization-based active learning for video annotation. IEEE Trans. Multimed. 18(11), 2196\u20132205 (2016)","journal-title":"IEEE Trans. Multimed."},{"key":"99_CR34","doi-asserted-by":"publisher","first-page":"761","DOI":"10.1016\/j.sigpro.2015.01.001","volume":"120","author":"Y Liu","year":"2016","unstructured":"Liu, Y., Feng, X., Zhou, Z.: Multimodal video classification with stacked contractive autoencoders. Signal Process. 120, 761\u2013766 (2016)","journal-title":"Signal Process."},{"key":"99_CR35","doi-asserted-by":"crossref","unstructured":"Maharaj, T., Ballas, N., Rohrbach, A., Courville, A.C., Pal, C.J.: A dataset and exploration of models for understanding video data through fill-in-the-blank question-answering. In: CVPR, pp. 7359\u20137368 (2017)","DOI":"10.1109\/CVPR.2017.778"},{"key":"99_CR36","doi-asserted-by":"crossref","unstructured":"Pan, P., Xu, Z., Yang, Y., Wu, F., Zhuang, Y.: Hierarchical recurrent neural encoder for video representation with application to captioning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1029\u20131038 (2016)","DOI":"10.1109\/CVPR.2016.117"},{"key":"99_CR37","unstructured":"Zhang, C., Tian, Y.: Automatic video description generation via LSTM with joint two-stream encoding. In: 2016 23rd International Conference on Pattern Recognition (ICPR), pp. 2924\u20132929 (2016)"},{"key":"99_CR38","unstructured":"Torabi, A., Tandon, N., Sigal, L.: Learning language-visual embedding for movie understanding with natural-language (2016). arXiv preprint: arXiv:1609.08124"},{"key":"99_CR39","doi-asserted-by":"crossref","unstructured":"Song, J., Guo, Z., Gao, L., Liu, W., Zhang, D., Shen, H.T.: Hierarchical LSTM with adjusted temporal attention for video captioning (2017). arXiv preprint: arXiv:1706.01231","DOI":"10.24963\/ijcai.2017\/381"},{"key":"99_CR40","doi-asserted-by":"crossref","unstructured":"Jiang, H., Lu, Y., Xue, J.: Automatic soccer video event detection based on a deep neural network combined CNN and RNN. In: 2016 IEEE 28th International Conference on Tools with Artificial Intelligence (ICTAI), pp. 490\u2013494 (2016)","DOI":"10.1109\/ICTAI.2016.0081"},{"key":"99_CR41","doi-asserted-by":"crossref","unstructured":"Karayil, T., Blandfort, P., Borth, D., Dengel, A.: Generating affective captions using concept and syntax transition networks. In: Proceedings of the 2016 ACM on Multimedia Conference, pp. 1111\u20131115 (2016)","DOI":"10.1145\/2911996.2930060"},{"key":"99_CR42","doi-asserted-by":"crossref","unstructured":"Ashangani, K., Wickramasinghe, K.U., De Silva, D.W.N., Gamwara, V.M., Nugaliyadde, A., Mallawarachchi, Y.: Semantic video search by automatic video annotation using TensorFlow. In: Manufacturing & Industrial Engineering Symposium (MIES), pp. 1\u20134 (2016)","DOI":"10.1109\/MIES.2016.7779985"},{"key":"99_CR43","doi-asserted-by":"crossref","unstructured":"Pan, Y., Mei, T., Yao, T., Li, H., Rui, Y.: Jointly modeling embedding and translation to bridge video and language. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4594\u20134602 (2016)","DOI":"10.1109\/CVPR.2016.497"},{"key":"99_CR44","doi-asserted-by":"crossref","unstructured":"Pan, Y., Yao, T., Li, H., Mei, T.: Video captioning with transferred semantic attributes. In: CVPR, vol. 2, p. 3 (2017)","DOI":"10.1109\/CVPR.2017.111"},{"key":"99_CR45","doi-asserted-by":"crossref","unstructured":"Xue, Y., Song, Y., Li, C., Chiang, A.T., Ning, X.: Automatic video annotation system for archival sports video. In: 2017 IEEE Winter Applications of Computer Vision Workshops (WACVW), pp. 23\u201328 (2017)","DOI":"10.1109\/WACVW.2017.11"},{"issue":"2","key":"99_CR46","doi-asserted-by":"publisher","first-page":"894","DOI":"10.1109\/TASE.2015.2418223","volume":"13","author":"L Zhang","year":"2016","unstructured":"Zhang, L., Hong, R., Nie, L., Hong, C.: A biologically inspired automatic system for media quality assessment. IEEE Trans. Autom. Sci. Eng. 13(2), 894\u2013902 (2016)","journal-title":"IEEE Trans. Autom. Sci. Eng."},{"issue":"2","key":"99_CR47","doi-asserted-by":"publisher","first-page":"553","DOI":"10.1007\/s00464-017-5878-1","volume":"32","author":"C Loukas","year":"2018","unstructured":"Loukas, C.: Video content analysis of surgical procedures. Surg. Endosc. 32(2), 553\u2013568 (2018)","journal-title":"Surg. Endosc."},{"key":"99_CR48","doi-asserted-by":"crossref","unstructured":"Hudelist, M.A., Husslein, H., M\u00fcnzer, B., Kletz, S., Schoeffmann, K.: A tool to support surgical quality assessment. In: 2017 IEEE Third International Conference on Multimedia Big Data (BigMM), pp. 238\u2013239 (2017)","DOI":"10.1109\/BigMM.2017.45"}],"container-title":["Advances in Intelligent Systems and Computing","Intelligent Systems Design and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-16657-1_99","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,15]],"date-time":"2024-02-15T07:13:13Z","timestamp":1707981193000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-16657-1_99"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,4,12]]},"ISBN":["9783030166564","9783030166571"],"references-count":48,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-16657-1_99","relation":{},"ISSN":["2194-5357","2194-5365"],"issn-type":[{"value":"2194-5357","type":"print"},{"value":"2194-5365","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,4,12]]},"assertion":[{"value":"12 April 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ISDA 2018","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Intelligent Systems Design and Applications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Vellore","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"India","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6 December 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 December 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"isda2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.mirlabs.net\/isda18\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}