{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,24]],"date-time":"2026-03-24T16:28:47Z","timestamp":1774369727288,"version":"3.50.1"},"reference-count":39,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2024,9,20]],"date-time":"2024-09-20T00:00:00Z","timestamp":1726790400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,9,20]],"date-time":"2024-09-20T00:00:00Z","timestamp":1726790400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2024,10]]},"DOI":"10.1007\/s00530-024-01493-8","type":"journal-article","created":{"date-parts":[[2024,9,20]],"date-time":"2024-09-20T14:03:02Z","timestamp":1726840982000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Dual-stream multi-label image classification model enhanced by feature reconstruction"],"prefix":"10.1007","volume":"30","author":[{"given":"Liming","family":"Hu","sequence":"first","affiliation":[]},{"given":"Mingxuan","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Anjie","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Zhijun","family":"Fang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,9,20]]},"reference":[{"key":"1493_CR1","doi-asserted-by":"publisher","first-page":"1494","DOI":"10.1016\/j.patcog.2013.09.029","volume":"47","author":"E Montanes","year":"2014","unstructured":"Montanes, E., Senge, R., Barranquero, J., Quevedo, J.R., Coz, J.J., H\u00fcllermeier, E.: Dependent binary relevance models for multi-label classification. Pattern Recognit. 47, 1494\u20131508 (2014)","journal-title":"Pattern Recognit."},{"key":"1493_CR2","doi-asserted-by":"crossref","unstructured":"Wang, J., Yang, Y., Mao, J., Huang, Z., Huang, C., Xu, W.: CNN-RNN: a unified framework for multi-label image classification. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2285\u20132294 (2016)","DOI":"10.1109\/CVPR.2016.251"},{"key":"1493_CR3","unstructured":"Yazici, V.O., Gonzalez-Garcia, A., Ramisa, A., Twardowski, B., Weijer, J.V.d.: Orderless recurrent models for multi-label classification. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13440\u201313449 (2020)"},{"issue":"8","key":"1493_CR4","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"1493_CR5","doi-asserted-by":"crossref","unstructured":"Chen, Z.-M., Wei, X.-S., Wang, P., Guo, Y.: Multi-label image recognition with graph convolutional networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5177\u20135186 (2019)","DOI":"10.1109\/CVPR.2019.00532"},{"key":"1493_CR6","doi-asserted-by":"crossref","unstructured":"Ye, J., He, J., Peng, X., Wu, W., Qiao, Y.: Attention-driven dynamic graph convolutional network for multi-label image recognition. In: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XXI 16, pp. 649\u2013665. Springer (2020)","DOI":"10.1007\/978-3-030-58589-1_39"},{"key":"1493_CR7","doi-asserted-by":"crossref","unstructured":"Wang, Y., He, D., Li, F., Long, X., Zhou, Z., Ma, J., Wen, S.: Multi-label classification with label graph superimposing. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 34, pp. 12265\u201312272 (2020)","DOI":"10.1609\/aaai.v34i07.6909"},{"key":"1493_CR8","doi-asserted-by":"publisher","first-page":"1696","DOI":"10.1109\/TMM.2020.3002185","volume":"23","author":"J Xu","year":"2020","unstructured":"Xu, J., Tian, H., Wang, Z., Wang, Y., Kang, W., Chen, F.: Joint input and output space learning for multi-label image classification. IEEE Trans. Multimedia 23, 1696\u20131707 (2020)","journal-title":"IEEE Trans. Multimedia"},{"key":"1493_CR9","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. In: Advances in neural information processing systems, vol. 30 (2017)"},{"key":"1493_CR10","doi-asserted-by":"crossref","unstructured":"Lanchantin, J., Wang, T., Ordonez, V., Qi, Y.: General multi-label image classification with transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16478\u201316488 (2021)","DOI":"10.1109\/CVPR46437.2021.01621"},{"key":"1493_CR11","doi-asserted-by":"crossref","unstructured":"Zhang, L., Liu, J., Bao, Y., Wang, J.: Region-awared transformer with asymmetric loss in multi-label classification. In: ICASSP 2023\u20142023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 1\u20135. IEEE (2023)","DOI":"10.1109\/ICASSP49357.2023.10095686"},{"key":"1493_CR12","doi-asserted-by":"crossref","unstructured":"Liu, Z., Lin, Y., Cao, Y., Hu, H., Wei, Y., Zhang, Z., Lin, S., Guo, B.: Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"1493_CR13","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., et al.: An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"1493_CR14","doi-asserted-by":"crossref","unstructured":"Chen, T., Xu, M., Hui, X., Wu, H., Lin, L.: Learning semantic-specific graph representation for multi-label image recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 522\u2013531 (2019)","DOI":"10.1109\/ICCV.2019.00061"},{"key":"1493_CR15","doi-asserted-by":"publisher","first-page":"3000","DOI":"10.1109\/TIP.2023.3266161","volume":"32","author":"J Zhang","year":"2023","unstructured":"Zhang, J., Ren, J., Zhang, Q., Liu, J., Jiang, X.: Spatial context-aware object-attentional network for multi-label image classification. IEEE Trans. Image Process. 32, 3000\u20133012 (2023)","journal-title":"IEEE Trans. Image Process."},{"key":"1493_CR16","first-page":"924","volume":"32","author":"Y Wu","year":"2023","unstructured":"Wu, Y., Feng, S., Zhao, G., Jin, Y.: Transformer driven matching selection mechanism for multi-label image classification. IEEE Trans. Circuits Syst. Video Technol. 32, 924\u2013937 (2023)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"issue":"8","key":"1493_CR17","doi-asserted-by":"publisher","first-page":"1971","DOI":"10.1109\/TMM.2019.2894964","volume":"21","author":"F Lyu","year":"2019","unstructured":"Lyu, F., Wu, Q., Hu, F., Wu, Q., Tan, M.: Attend and imagine: multi-label image classification with visual attention and recurrent neural networks. IEEE Trans. Multimedia 21(8), 1971\u20131981 (2019)","journal-title":"IEEE Trans. Multimedia"},{"key":"1493_CR18","doi-asserted-by":"publisher","first-page":"1143","DOI":"10.1109\/TMM.2023.3277279","volume":"26","author":"W Zhou","year":"2024","unstructured":"Zhou, W., Jiang, W., Chen, D., Hu, H., Su, T.: Mining semantic information with dual relation graph network for multi-label image classification. IEEE Trans. Multimedia 26, 1143\u20131157 (2024). https:\/\/doi.org\/10.1109\/TMM.2023.3277279","journal-title":"IEEE Trans. Multimedia"},{"key":"1493_CR19","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2022.109203","volume":"136","author":"W Zhou","year":"2023","unstructured":"Zhou, W., Dou, P., Su, T., Hu, H., Zheng, Z.: Feature learning network with transformer for multi-label image classification. Pattern Recognit. 136, 109203 (2023)","journal-title":"Pattern Recognit."},{"issue":"11","key":"1493_CR20","doi-asserted-by":"publisher","first-page":"6788","DOI":"10.1109\/TCSVT.2023.3268997","volume":"33","author":"Y Wu","year":"2023","unstructured":"Wu, Y., Feng, S., Wang, Y.: Semantic-aware graph matching mechanism for multi-label image recognition. IEEE Trans. Circuits Syst. Video Technol. 33(11), 6788\u20136803 (2023). https:\/\/doi.org\/10.1109\/TCSVT.2023.3268997","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"issue":"2","key":"1493_CR21","doi-asserted-by":"publisher","first-page":"423","DOI":"10.1109\/TPAMI.2018.2798607","volume":"41","author":"T Baltru\u0161aitis","year":"2018","unstructured":"Baltru\u0161aitis, T., Ahuja, C., Morency, L.-P.: Multimodal machine learning: a survey and taxonomy. IEEE Trans. Pattern Anal. Mach. Intell. 41(2), 423\u2013443 (2018)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1493_CR22","doi-asserted-by":"crossref","unstructured":"Cornia, M., Stefanini, M., Baraldi, L., Cucchiara, R.: Meshed-memory transformer for image captioning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10578\u201310587 (2020)","DOI":"10.1109\/CVPR42600.2020.01059"},{"key":"1493_CR23","unstructured":"Mokady, R., Hertz, A., Bermano, A.H.: ClipCap: CLIP prefix for image captioning. arXiv preprint arXiv:2111.09734 (2021)"},{"key":"1493_CR24","doi-asserted-by":"crossref","unstructured":"Ding, Y., Yu, J., Liu, B., Hu, Y., Cui, M., Wu, Q.: MuKEA: multimodal knowledge extraction and accumulation for knowledge-based visual question answering. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5089\u20135098 (2022)","DOI":"10.1109\/CVPR52688.2022.00503"},{"key":"1493_CR25","unstructured":"Radford, A., Kim, J.W., Hallacy, C., Ramesh, A., Goh, G., Agarwal, S., Sastry, G., Askell, A., Mishkin, P., Clark, J., et al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"1493_CR26","doi-asserted-by":"crossref","unstructured":"Liu, Z., Hu, H., Lin, Y., Yao, Z., Xie, Z., Wei, Y., Ning, J., Cao, Y., Zhang, Z., Dong, L., Wei, F., Guo, B.: Swin transformer v2: Scaling up capacity and resolution (2022)","DOI":"10.1109\/CVPR52688.2022.01170"},{"key":"1493_CR27","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., Zitnick, C.L.: Microsoft COCO: common objects in context. In: Computer Vision\u2013ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6\u201312, 2014, Proceedings, Part V 13, pp. 740\u2013755. Springer (2014)","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"1493_CR28","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham, M., Van Gool, L., Williams, C.K., Winn, J., Zisserman, A.: The pascal visual object classes (VOC) challenge. Int. J. Comput. Vision 88, 303\u2013338 (2010)","journal-title":"Int. J. Comput. Vision"},{"key":"1493_CR29","doi-asserted-by":"crossref","unstructured":"Chua, T.-S., Tang, J., Hong, R., Li, H., Luo, Z., Zheng, Y.: NUS-WIDE: a real-world web image database from National University of Singapore. In: Proceedings of the ACM International Conference on Image and Video Retrieval, pp. 1\u20139 (2009)","DOI":"10.1145\/1646396.1646452"},{"key":"1493_CR30","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"1493_CR31","doi-asserted-by":"crossref","unstructured":"Zhu, F., Li, H., Ouyang, W., Yu, N., Wang, X.: Learning spatial regularization with image-level supervisions for multi-label image classification. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5513\u20135522 (2017)","DOI":"10.1109\/CVPR.2017.219"},{"key":"1493_CR32","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"issue":"3","key":"1493_CR33","doi-asserted-by":"publisher","first-page":"1371","DOI":"10.1109\/TPAMI.2020.3025814","volume":"44","author":"T Chen","year":"2020","unstructured":"Chen, T., Lin, L., Chen, R., Hui, X., Wu, H.: Knowledge-guided multi-label few-shot learning for general image recognition. IEEE Trans. Pattern Anal. Mach. Intell. 44(3), 1371\u20131384 (2020)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"9","key":"1493_CR34","doi-asserted-by":"publisher","first-page":"1901","DOI":"10.1109\/TPAMI.2015.2491929","volume":"38","author":"Y Wei","year":"2015","unstructured":"Wei, Y., Xia, W., Lin, M., Huang, J., Ni, B., Dong, J., Zhao, Y., Yan, S.: HCP: a flexible CNN framework for multi-label image classification. IEEE Trans. Pattern Anal. Mach. Intell. 38(9), 1901\u20131907 (2015)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1493_CR35","doi-asserted-by":"crossref","unstructured":"Wang, Z., Chen, T., Li, G., Xu, R., Lin, L.: Multi-label image recognition by recurrently discovering attentional regions. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 464\u2013472 (2017)","DOI":"10.1109\/ICCV.2017.58"},{"key":"1493_CR36","doi-asserted-by":"crossref","unstructured":"Zhu, K., Wu, J.: Residual attention: a simple but effective method for multi-label recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 184\u2013193 (2021)","DOI":"10.1109\/ICCV48922.2021.00025"},{"key":"1493_CR37","doi-asserted-by":"publisher","first-page":"4013","DOI":"10.1109\/TMM.2022.3171095","volume":"25","author":"X Deng","year":"2022","unstructured":"Deng, X., Feng, S., Lyu, G., Wang, T., Lang, C.: Beyond word embeddings: heterogeneous prior knowledge driven multi-label image classification. IEEE Trans. Multimedia 25, 4013\u20134025 (2022)","journal-title":"IEEE Trans. Multimedia"},{"key":"1493_CR38","doi-asserted-by":"crossref","unstructured":"Wu, Y., Liu, H., Feng, S., Jin, Y., Lyu, G., Wu, Z.: GM-MLIC: graph matching based multi-label image classification. arXiv preprint arXiv:2104.14762 (2021)","DOI":"10.24963\/ijcai.2021\/163"},{"key":"1493_CR39","doi-asserted-by":"publisher","first-page":"2570","DOI":"10.1109\/TIP.2022.3148867","volume":"31","author":"Z-M Chen","year":"2022","unstructured":"Chen, Z.-M., Cui, Q., Zhao, B., Song, R., Zhang, X., Yoshie, O.: SST: spatial and semantic transformers for multi-label image recognition. IEEE Trans. Image Process. 31, 2570\u20132583 (2022). https:\/\/doi.org\/10.1109\/TIP.2022.3148867","journal-title":"IEEE Trans. Image Process."}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-024-01493-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-024-01493-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-024-01493-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T18:15:10Z","timestamp":1730139310000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-024-01493-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,20]]},"references-count":39,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2024,10]]}},"alternative-id":["1493"],"URL":"https:\/\/doi.org\/10.1007\/s00530-024-01493-8","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"value":"0942-4962","type":"print"},{"value":"1432-1882","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,9,20]]},"assertion":[{"value":"2 April 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 September 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 September 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no conflict of interest to declare that are relevant to the content of this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"281"}}