{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,4]],"date-time":"2026-04-04T15:16:00Z","timestamp":1775315760459,"version":"3.50.1"},"reference-count":36,"publisher":"Springer Science and Business Media LLC","issue":"10","license":[{"start":{"date-parts":[[2018,12,15]],"date-time":"2018-12-15T00:00:00Z","timestamp":1544832000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100001809","name":"the National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["61672202"],"award-info":[{"award-number":["61672202"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Vis Comput"],"published-print":{"date-parts":[[2019,10]]},"DOI":"10.1007\/s00371-018-01615-0","type":"journal-article","created":{"date-parts":[[2018,12,15]],"date-time":"2018-12-15T09:31:05Z","timestamp":1544866265000},"page":"1361-1371","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":22,"title":["Multi-label image classification with recurrently learning semantic dependencies"],"prefix":"10.1007","volume":"35","author":[{"given":"Long","family":"Chen","sequence":"first","affiliation":[]},{"given":"Ronggui","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Juan","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Lixia","family":"Xue","sequence":"additional","affiliation":[]},{"given":"Min","family":"Hu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,12,15]]},"reference":[{"key":"1615_CR1","first-page":"1097","volume":"25","author":"A Krizhevsky","year":"2012","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: Imagenet classification with deep convolutional neural networks. Adv. Neural Inf. Process. Syst. 25, 1097\u20131105 (2012)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"1615_CR2","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"1615_CR3","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. In: International Conference on Learning Representations (2015)"},{"key":"1615_CR4","doi-asserted-by":"crossref","unstructured":"Szegedy, C., Liu, W., Jia, Y., et al.: Going deeper with convolutions. In: 2015 IEEE Conference on Computer Vision and Pattern Recognition, pp. 1\u20139 (2015)","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"1615_CR5","doi-asserted-by":"crossref","unstructured":"Huang, G., Liu, Z., Maaten, L., van der Weinberger, K.Q.: Densely connected convolutional networks. In: IEEE Conference on Computer Vision and Pattern Recognition, pp. 2261\u20132269 (2017)","DOI":"10.1109\/CVPR.2017.243"},{"key":"1615_CR6","doi-asserted-by":"crossref","unstructured":"Hu, J., Shen, L., Sun, G.: Squeeze-and-excitation networks. Comput. Vis. Pattern Recogn. 7132\u20137141 (2018)","DOI":"10.1109\/CVPR.2018.00745"},{"key":"1615_CR7","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Delving deep into rectifiers: surpassing human-level performance on imagenet classification. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1026\u20131034 (2015)","DOI":"10.1109\/ICCV.2015.123"},{"key":"1615_CR8","doi-asserted-by":"crossref","unstructured":"Razavian, A.S., Azizpour, H., Sullivan, J., Carlsson, S.: CNN features off-the-shelf: an astounding baseline for recognition. In: Proceedings of the 2014 IEEE Conference on Computer Vision and Pattern Recognition Workshops, pp. 512\u2013519 (2014)","DOI":"10.1109\/CVPRW.2014.131"},{"key":"1615_CR9","doi-asserted-by":"crossref","unstructured":"Nguyen, T.V.: Salient Object detection via objectness proposals. In: AAAI\u201915 Proceedings of the Twenty-Ninth AAAI Conference on Artificial Intelligence, pp. 4286\u20134287 (2015)","DOI":"10.1609\/aaai.v29i1.9279"},{"key":"1615_CR10","doi-asserted-by":"publisher","first-page":"154","DOI":"10.1007\/s11263-013-0620-5","volume":"104","author":"JRR Uijlings","year":"2013","unstructured":"Uijlings, J.R.R., van de Sande, K.E.A., Gevers, T., Smeulders, A.W.M.: Selective search for object recognition. Int. J. Comput. Vis. 104, 154\u2013171 (2013)","journal-title":"Int. J. Comput. Vis."},{"key":"1615_CR11","doi-asserted-by":"crossref","unstructured":"Zitnick, C.L., Doll\u00e1r, P.: Edge boxes: locating object proposals from edges. In European Conference on Computer Vision, pp. 391\u2013405 (2014)","DOI":"10.1007\/978-3-319-10602-1_26"},{"key":"1615_CR12","doi-asserted-by":"crossref","unstructured":"Arbel\u00e1ez, P.A., Pont-Tuset, J., Barron, J.T., Marqu\u00e9s, F., Malik, J.: Multiscale combinatorial grouping. In: Proceedings of the 2014 IEEE Conference on Computer Vision and Pattern Recognition, pp. 328\u2013335 (2014)","DOI":"10.1109\/CVPR.2014.49"},{"key":"1615_CR13","doi-asserted-by":"publisher","first-page":"1901","DOI":"10.1109\/TPAMI.2015.2491929","volume":"38","author":"Y Wei","year":"2016","unstructured":"Wei, Y., Xia, W., Lin, M., et al.: Hcp: A flexible cnn framework for multi-label image classification. IEEE Trans. Pattern Anal. Mach. Intell. 38, 1901\u20131907 (2016)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1615_CR14","doi-asserted-by":"crossref","unstructured":"Chatfield, K., Simonyan, K., Vedaldi, A., Zisserman, A.: Return of the devil in the details: delving deep into convolutional nets. In: British Machine Vision Conference (2014)","DOI":"10.5244\/C.28.6"},{"key":"1615_CR15","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.-J., Li, K., Fei-Fei, L.: ImageNet: a large-scale hierarchical image database. In: IEEE Conference on Computer Vision and Pattern Recognition, pp. 248\u2013255 (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"1615_CR16","unstructured":"Gong, Y., Jia, Y., Leung, T., Toshev, A., Ioffe, S.: Deep convolutional ranking for multilabel image annotation. In: International Conference on Learning Representations (2014)"},{"key":"1615_CR17","unstructured":"van der Gaag, L.C., Feelders, A.J.: Probabilistic Graphical Models. Lecture Notes in Artificial Intelligence (2014). https:\/\/www.springer.com\/cn\/book\/9783319114323"},{"key":"1615_CR18","doi-asserted-by":"crossref","unstructured":"Jin, J., Nakayama, H.: Annotation order matters: recurrent image annotator for arbitrary length image tagging. In: International Conference on Pattern Recognition (ICPR), pp. 2452\u20132457 (2016)","DOI":"10.1109\/ICPR.2016.7900004"},{"key":"1615_CR19","doi-asserted-by":"crossref","unstructured":"Wang, J., Yang, Y., Mao, J., Huang, Z., Huang, C., Xu, W.: CNN\u2013RNN: A unified framework for multi-label image classification. In: IEEE Conference on Computer Vision and Pattern Recognition, pp. 2285\u20132294 (2016)","DOI":"10.1109\/CVPR.2016.251"},{"key":"1615_CR20","doi-asserted-by":"publisher","first-page":"2801","DOI":"10.1109\/TMM.2018.2812605","volume":"20","author":"J Zhang","year":"2018","unstructured":"Zhang, J., Wu, Q., Shen, C., Zhang, J., Lu, J.: Multi-label image classification with regional latent semantic dependencies. IEEE Trans. Multimed. 20, 2801\u2013 2813 (2018)","journal-title":"IEEE Trans. Multimed."},{"key":"1615_CR21","doi-asserted-by":"crossref","unstructured":"Chen, Q., Song, Z., Hua, Y., Huang, Z., Yan, S.: Hierarchical matching with side information for image classification. In: IEEE Conference on Computer Vision and Pattern Recognition, pp. 3426\u20133433 (2012)","DOI":"10.1109\/CVPR.2012.6248083"},{"key":"1615_CR22","doi-asserted-by":"crossref","unstructured":"Dong, J., Xia, W., Chen, Q., Feng, J., Huang, Z., Yan, S.: Subcategory-aware object classification. In: IEEE Conference on Computer Vision and Pattern Recognition, pp. 827\u2013834 (2013)","DOI":"10.1109\/CVPR.2013.112"},{"key":"1615_CR23","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1023\/B:VISI.0000029664.99615.94","volume":"60","author":"DG Lowe","year":"2004","unstructured":"Lowe, D.G.: Distinctive image features from scale-invariant keypoints. Int. J. Comput. Vis. 60, 91\u2013110 (2004)","journal-title":"Int. J. Comput. Vis."},{"key":"1615_CR24","doi-asserted-by":"crossref","unstructured":"Dalal, N., Triggs, B.: Histograms of oriented gradients for human detection. In: IEEE Computer Society Conference on Computer Vision and Pattern Recognition, pp. 886\u2013893 (2005)","DOI":"10.1109\/CVPR.2005.177"},{"key":"1615_CR25","doi-asserted-by":"publisher","first-page":"51","DOI":"10.1016\/0031-3203(95)00067-4","volume":"29","author":"T Ojala","year":"1996","unstructured":"Ojala, T., Pietik\u00e4inen, M., Harwood, D.: A comparative study of texture measures with classification based on featured distributions. Pattern Recogn. 29, 51\u201359 (1996)","journal-title":"Pattern Recogn."},{"key":"1615_CR26","doi-asserted-by":"crossref","unstructured":"Li, Y., Song, Y., Luo, J.: Improving pairwise ranking for multi-label image classification. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 1837\u20131845 (2017)","DOI":"10.1109\/CVPR.2017.199"},{"key":"1615_CR27","doi-asserted-by":"crossref","unstructured":"Yang, H., Zhou, J.T., Zhang, Y., Gao, B., Wu, J., Cai, J.: Exploit bounding box annotations for multi-label object recognition. In: IEEE Conference on Computer Vision and Pattern Recognition, pp. 280\u2013288 (2016)","DOI":"10.1109\/CVPR.2016.37"},{"key":"1615_CR28","doi-asserted-by":"publisher","first-page":"222","DOI":"10.1007\/s11263-013-0636-x","volume":"105","author":"J S\u00e1nchez","year":"2013","unstructured":"S\u00e1nchez, J., Perronnin, F., Mensink, T., Verbeek, J.J.: Image classification with the fisher vector: theory and practice. Int. J. Comput. Vis. 105, 222\u2013245 (2013)","journal-title":"Int. J. Comput. Vis."},{"key":"1615_CR29","doi-asserted-by":"crossref","unstructured":"Wang, Z., Chen, T., Li, G., Xu, R., Lin, L.: Multi-label image recognition by recurrently discovering attentional regions. In: IEEE International Conference on Computer Vision (ICCV), pp. 464\u2013472 (2017)","DOI":"10.1109\/ICCV.2017.58"},{"key":"1615_CR30","first-page":"2017","volume":"2","author":"M Jaderberg","year":"2015","unstructured":"Jaderberg, M., Simonyan, K., Zisserman, A., Kavukcuoglu, K.: Spatial transformer networks. Neural Inf. Process. Syst. 2, 2017\u20132025 (2015)","journal-title":"Neural Inf. Process. Syst."},{"key":"1615_CR31","doi-asserted-by":"publisher","first-page":"98","DOI":"10.1007\/s11263-014-0733-5","volume":"111","author":"M Everingham","year":"2015","unstructured":"Everingham, M., Eslami, S.A., Van Gool, L., Williams, C.K., Winn, J., Zisserman, A.: The pascal visual object classes challenge: a retrospective. Int. J. Comput. Vis. 111, 98\u2013136 (2015)","journal-title":"Int. J. Comput. Vis."},{"key":"1615_CR32","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Maire, M., Belongie, S.J., Hays, J., et al.: Microsoft COCO: common objects in context. In: European Conference on Computer Vision, pp. 740\u2013755 (2014)","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"1615_CR33","doi-asserted-by":"crossref","unstructured":"Zhu, F., Li, H., Ouyang, W., Yu, N., Wang, X.: Learning spatial regularization with image-level supervisions for multi-label image classification. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5513\u20135522 (2017)","DOI":"10.1109\/CVPR.2017.219"},{"key":"1615_CR34","unstructured":"Xu, K., Ba, J., Kiros, R., Cho, K., Courville, A., Salakhudinov, R., Bengio, Y.: Show, attend and tell: neural image caption generation with visual attention. In: International Conference on Machine Learning, pp. 2048\u20132057 (2015)"},{"key":"1615_CR35","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9, 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"1615_CR36","first-page":"91","volume":"1","author":"S Ren","year":"2015","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster r-cnn: towards real-time object detection with region proposal networks. Adv. Neural Inf. Process. Syst. 1, 91\u201399 (2015)","journal-title":"Adv. Neural Inf. Process. Syst."}],"container-title":["The Visual Computer"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-018-01615-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s00371-018-01615-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-018-01615-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,4]],"date-time":"2026-04-04T14:25:35Z","timestamp":1775312735000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s00371-018-01615-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,12,15]]},"references-count":36,"journal-issue":{"issue":"10","published-print":{"date-parts":[[2019,10]]}},"alternative-id":["1615"],"URL":"https:\/\/doi.org\/10.1007\/s00371-018-01615-0","relation":{},"ISSN":["0178-2789","1432-2315"],"issn-type":[{"value":"0178-2789","type":"print"},{"value":"1432-2315","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,12,15]]},"assertion":[{"value":"15 December 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}