{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T13:36:11Z","timestamp":1776173771121,"version":"3.50.1"},"publisher-location":"Cham","reference-count":53,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030585259","type":"print"},{"value":"9783030585266","type":"electronic"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-58526-6_12","type":"book-chapter","created":{"date-parts":[[2020,10,6]],"date-time":"2020-10-06T21:03:07Z","timestamp":1602018187000},"page":"191-207","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["Recurrent Image Annotation with Explicit Inter-label Dependencies"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6958-5017","authenticated-orcid":false,"given":"Ayushi","family":"Dutta","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2317-2641","authenticated-orcid":false,"given":"Yashaswi","family":"Verma","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6767-7057","authenticated-orcid":false,"given":"C. V.","family":"Jawahar","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,10,7]]},"reference":[{"issue":"9","key":"12_CR1","doi-asserted-by":"publisher","first-page":"1757","DOI":"10.1016\/j.patcog.2004.03.009","volume":"39","author":"MR Boutell","year":"2004","unstructured":"Boutell, M.R., Luo, J., Shen, X., Brown, C.M.: Learning multi-label scene classification. Pattern Recogn. 39(9), 1757\u20131771 (2004)","journal-title":"Pattern Recogn."},{"key":"12_CR2","doi-asserted-by":"crossref","unstructured":"Bucak, S.S., Jin, R., Jain, A.K.: Multi-label learning with incomplete class assignments. In: CVPR (2011)","DOI":"10.1109\/CVPR.2011.5995734"},{"issue":"3","key":"12_CR3","doi-asserted-by":"publisher","first-page":"394","DOI":"10.1109\/TPAMI.2007.61","volume":"29","author":"G Carneiro","year":"2007","unstructured":"Carneiro, G., Chan, A.B., Moreno, P.J., Vasconcelos, N.: Supervised learning of semantic classes for image annotation and retrieval. IEEE Trans. Pattern Anal. Mach. Intell. 29(3), 394\u2013410 (2007)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"12_CR4","doi-asserted-by":"crossref","unstructured":"Chen, S.F., Chen, Y.C., Yeh, C.K., Wang, Y.C.F.: Order-free RNN with visual attention for multi-label classification. In: AAAI (2018)","DOI":"10.1609\/aaai.v32i1.12230"},{"key":"12_CR5","doi-asserted-by":"crossref","unstructured":"Chen, T., Wang, Z., Li, G., Lin, L.: Recurrent attentional reinforcement learning for multi-label image recognition. In: AAAI. pp. 6730\u20136737 (2018)","DOI":"10.1609\/aaai.v32i1.12281"},{"key":"12_CR6","doi-asserted-by":"crossref","unstructured":"Chua, T.S, Tang, J., Hong, R., Li, H., Luo, Z., Zheng, Y.: Nus-wide: a real-world web image database from national university of Singapore. In: In CIVR (2009)","DOI":"10.1145\/1646396.1646452"},{"key":"12_CR7","doi-asserted-by":"crossref","unstructured":"Escalante, H.J., H\u00e9rnadez, C.A., Sucar, L.E., Montes, M.: Late fusion of heterogeneous methods for multimedia image retrieval. In: MIR (2008)","DOI":"10.1145\/1460096.1460125"},{"key":"12_CR8","doi-asserted-by":"crossref","unstructured":"Fang, H., et al.: From captions to visual concepts and back. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1473\u20131482 (2015)","DOI":"10.1109\/CVPR.2015.7298754"},{"key":"12_CR9","unstructured":"Feng, S.L., Manmatha, R., Lavrenko, V.: Multiple Bernoulli relevance models for image and video annotation. In: CVPR (2004)"},{"key":"12_CR10","unstructured":"Frome, A., et al.: Devise: a deep visual-semantic embedding model. In: Neural Information Processing Systems (NIPS) (2013)"},{"key":"12_CR11","unstructured":"Gong, Y., Jia, Y., Leung, T.K., Toshev, A., Ioffe, S.: Deep convolutional ranking for multilabel image annotation. In: ICLR (2014)"},{"key":"12_CR12","doi-asserted-by":"crossref","unstructured":"Guillaumin, M., Mensink, T., Verbeek, J., Schmid, C.: TagProp: discriminative metric learning in nearest neighbour models for image auto-annotation. In: ICCV (2009)","DOI":"10.1109\/ICCV.2009.5459266"},{"key":"12_CR13","doi-asserted-by":"crossref","unstructured":"Guo, H., Zheng, K., Fan, X., Yu, H., Wang, S.: Visual attention consistency under image transforms for multi-label image classification. In: CVPR. pp. 729\u2013739 (2019)","DOI":"10.1109\/CVPR.2019.00082"},{"key":"12_CR14","unstructured":"Hariharan, B., Zelnik-Manor, L., Vishwanathan, S.V.N., Varma, M.: Large scale max-margin multi-label classification with priors. In: ICML (2010)"},{"key":"12_CR15","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.90"},{"issue":"9","key":"12_CR16","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(9), 1735\u20131780 (1997). https:\/\/doi.org\/10.1162\/neco.1997.9.8.1735","journal-title":"Neural Comput."},{"key":"12_CR17","doi-asserted-by":"crossref","unstructured":"Hu, H., Zhou, G.T., Deng, Z., Liao, Z., Mori, G.: Learning structured inference neural networks with label relations. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.323"},{"key":"12_CR18","unstructured":"Jin, J., Nakayama, H.: Annotation order matters: Recurrent image annotator for arbitrary length image tagging. In: ICPR (2016)"},{"key":"12_CR19","doi-asserted-by":"crossref","unstructured":"Joachims, T.: Optimizing search engines using clickthrough data. In: KDD (2002)","DOI":"10.1145\/775047.775067"},{"key":"12_CR20","doi-asserted-by":"crossref","unstructured":"Johnson, J., Ballan, L., Fei-Fei, L.: Love thy neighbors: image annotation by exploiting image metadata. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.525"},{"key":"12_CR21","doi-asserted-by":"crossref","unstructured":"Johnson, J., Karpathy, A., Fei-Fei, L.: Densecap: fully convolutional localization networks for dense captioning. In: CVPR, pp. 4565\u20134574 (2015)","DOI":"10.1109\/CVPR.2016.494"},{"key":"12_CR22","first-page":"1097","volume":"25","author":"A Krizhevsky","year":"2012","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: Imagenet classification with deep convolutional neural networks. Adv. Neural Inf. Process. Syst. 25, 1097\u20131105 (2012)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"12_CR23","doi-asserted-by":"crossref","unstructured":"Lan, T., Mori, G.: A max-margin riffled independence model for image tag ranking. In: Computer Vision and Pattern Recognition (CVPR) (2013)","DOI":"10.1109\/CVPR.2013.399"},{"key":"12_CR24","unstructured":"Lavrenko, V., Manmatha, R., Jeon, J.: A model for learning the semantics of pictures. In: NIPS (2003)"},{"issue":"4","key":"12_CR25","first-page":"2294","volume":"31","author":"C Li","year":"2019","unstructured":"Li, C., Liu, C., Duan, L., Gao, P., Zheng, K.: Reconstruction regularized deep metric learning for multi-label image classification. IEEE Trans. Neural Netw. Learn. Syst. 31(4), 2294\u20132303 (2019)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"12_CR26","doi-asserted-by":"crossref","unstructured":"Li, L., Wang, S., Jiang, S., Huang, Q.: Attentive recurrent neural network for weak-supervised multi-label image classification. In: ACM Multimedia, pp. 1092\u20131100 (2018)","DOI":"10.1145\/3240508.3240649"},{"key":"12_CR27","doi-asserted-by":"crossref","unstructured":"Li, Q., Qiao, M., Bian, W., Tao, D.: Conditional graphical lasso for multi-label image classification. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.325"},{"key":"12_CR28","unstructured":"Li, X., Zhao, F., Guo, Y.: Multi-label image classification with a probabilistic label enhancement model. In: Proceedings Uncertainty in Artificial Intelligence (2014)"},{"key":"12_CR29","doi-asserted-by":"crossref","unstructured":"Li, Y., Song, Y., Luo, J.: Improving pairwise ranking for multi-label image classification. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.199"},{"key":"12_CR30","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"TY Lin","year":"2014","unstructured":"Lin, T.Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"12_CR31","doi-asserted-by":"crossref","unstructured":"Liu, F., Xiang, T., Hospedales, T.M., Yang, W., Sun, C.: Semantic regularisation for recurrent image annotation. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.443"},{"key":"12_CR32","doi-asserted-by":"crossref","unstructured":"Liu, Y., Sheng, L., Shao, J., Yan, J., Xiang, S., Pan, C.: Multi-label image classification via knowledge distillation from weakly-supervised detection. In: ACM Multimedia, pp. 700\u2013708 (2018)","DOI":"10.1145\/3240508.3240567"},{"key":"12_CR33","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"316","DOI":"10.1007\/978-3-540-88690-7_24","volume-title":"Computer Vision \u2013 ECCV 2008","author":"A Makadia","year":"2008","unstructured":"Makadia, A., Pavlovic, V., Kumar, S.: A new baseline for image annotation. In: Forsyth, D., Torr, P., Zisserman, A. (eds.) ECCV 2008. LNCS, vol. 5304, pp. 316\u2013329. Springer, Heidelberg (2008). https:\/\/doi.org\/10.1007\/978-3-540-88690-7_24"},{"issue":"11","key":"12_CR34","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1145\/219717.219748","volume":"38","author":"GA Miller","year":"1995","unstructured":"Miller, G.A.: Wordnet: a lexical database for English. Commun. ACM (CACM) 38(11), 39\u201341 (1995)","journal-title":"Commun. ACM (CACM)"},{"key":"12_CR35","doi-asserted-by":"crossref","unstructured":"Murthy, V.N., Maji, S., Manmatha, R.: Automatic image annotation using deep learning representations. In: ICMR (2015)","DOI":"10.1145\/2671188.2749391"},{"key":"12_CR36","doi-asserted-by":"publisher","first-page":"1720","DOI":"10.1109\/TIP.2018.2881928","volume":"28","author":"Y Niu","year":"2017","unstructured":"Niu, Y., Lu, Z., Wen, J.R., Xiang, T., Chang, S.F.: Multi-modal multi-scale deep learning for large-scale image annotation. IEEE Trans. Image Process. 28, 1720\u20131731 (2017)","journal-title":"IEEE Trans. Image Process."},{"key":"12_CR37","doi-asserted-by":"crossref","unstructured":"Rasiwasia, N., et al.: A new approach to cross-modal multimedia retrieval. In: ACM MM (2010)","DOI":"10.1145\/1873951.1873987"},{"issue":"3","key":"12_CR38","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O., et al.: Imagenet large scale visual recognition challenge. Int. J. Comput. Vision 115(3), 211\u2013252 (2015)","journal-title":"Int. J. Comput. Vision"},{"key":"12_CR39","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. In: ICLR (2015)"},{"key":"12_CR40","doi-asserted-by":"crossref","unstructured":"Szegedy, C., et al.: Going deeper with convolutions. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"12_CR41","doi-asserted-by":"crossref","unstructured":"Tsai, C.P., Lee, Y.H.: Adversarial learning of label dependency: a novel framework for multi-class classification. ICASSP pp. 3847\u20133851 (2019)","DOI":"10.1109\/ICASSP.2019.8682549"},{"key":"12_CR42","doi-asserted-by":"crossref","unstructured":"Uricchio, T., Ballan, L., Seidenari, L., Bimbo, A.D.: Automatic image annotation via label transfer in the semantic space (2016). CoRR abs\/1605.04770","DOI":"10.1016\/j.patcog.2017.05.019"},{"key":"12_CR43","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"836","DOI":"10.1007\/978-3-642-33712-3_60","volume-title":"Computer Vision \u2013 ECCV 2012","author":"Y Verma","year":"2012","unstructured":"Verma, Y., Jawahar, C.V.: Image annotation using metric learning in semantic neighbourhoods. In: Fitzgibbon, A., Lazebnik, S., Perona, P., Sato, Y., Schmid, C. (eds.) ECCV 2012. LNCS, vol. 7574, pp. 836\u2013849. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-33712-3_60"},{"key":"12_CR44","doi-asserted-by":"crossref","unstructured":"Verma, Y., Jawahar, C.V.: Exploring SVM for image annotation in presence of confusing labels. In: BMVC (2013)","DOI":"10.5244\/C.27.25"},{"issue":"1","key":"12_CR45","doi-asserted-by":"publisher","first-page":"126","DOI":"10.1007\/s11263-016-0927-0","volume":"121","author":"Y Verma","year":"2017","unstructured":"Verma, Y., Jawahar, C.V.: Image annotation by propagating labels from semantic neighbourhoods. Int. J. Comput. Vision 121(1), 126\u2013148 (2017)","journal-title":"Int. J. Comput. Vision"},{"key":"12_CR46","doi-asserted-by":"crossref","unstructured":"Vinyals, O., Toshev, A., Bengio, S., Erhan, D.: Show and tell: a neural image caption generator. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"12_CR47","unstructured":"Wang, C., Blei, D., Fei-Fei, L.: Simultaneous image classification and annotation. In: Proceedings CVPR (2009)"},{"key":"12_CR48","doi-asserted-by":"crossref","unstructured":"Wang, J., Yang, Y., Mao, J., Huang, Z., Huang, C., Xu, W.: CNN-RNN: a unified framework for multi-label image classification. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.251"},{"key":"12_CR49","unstructured":"Weston, J., Bengio, S., Usunier, N.: WSABIE: scaling up to large vocabulary image annotation. In: IJCAI (2011)"},{"key":"12_CR50","unstructured":"Yazici, V.O., Gonzalez-Garcia, A., Ramisa, A., Twardowski, B., van de Weijer, J.: Orderless recurrent models for multi-label classification (2019). CoRR abs\/1911.09996"},{"key":"12_CR51","unstructured":"You, R., Guo, Z., Cui, L., Long, X., Bao, Y., Wen, S.: Cross-modality attention with semantic graph embedding for multi-label classification (2019). CoRR abs\/1912.07872"},{"key":"12_CR52","doi-asserted-by":"crossref","unstructured":"Zhu, F., Li, H., Ouyang, W., Yu, N., Wang, X.: Learning spatial regularization with image-level supervisions for multi-label image classification. In: CVPR, pp. 2027\u20132036 (2017)","DOI":"10.1109\/CVPR.2017.219"},{"issue":"2","key":"12_CR53","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1109\/TMM.2007.911822","volume":"10","author":"Y Zhuang","year":"2008","unstructured":"Zhuang, Y., Yang, Y., Wu, F.: Mining semantic correlation of heterogeneous multimedia data for cross-media retrieval. IEEE Trans. Multimedia 10(2), 221\u2013229 (2008)","journal-title":"IEEE Trans. Multimedia"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2020"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-58526-6_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,6]],"date-time":"2024-10-06T00:11:25Z","timestamp":1728173485000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-58526-6_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030585259","9783030585266"],"references-count":53,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-58526-6_12","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"7 October 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Glasgow","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 August 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 August 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2020.eu\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"OpenReview","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5025","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1360","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"27% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"7","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually due to the COVID-19 pandemic. From the ECCV Workshops 249 full papers, 18 short papers, and 21 further contributions were published out of a total of 467 submissions.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}