{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T11:40:03Z","timestamp":1755862803279,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":25,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,2,2]],"date-time":"2024-02-02T00:00:00Z","timestamp":1706832000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,2,2]]},"DOI":"10.1145\/3651671.3651752","type":"proceedings-article","created":{"date-parts":[[2024,6,7]],"date-time":"2024-06-07T18:55:50Z","timestamp":1717786550000},"page":"447-452","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Convolutionally Enhanced Feature Fusion Visual Transformer for Fine-Grained Visual Classification"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2744-0455","authenticated-orcid":false,"given":"Min","family":"Huang","sequence":"first","affiliation":[{"name":"College of Software Engineering, Zhengzhou University of Light Industry, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0491-9893","authenticated-orcid":false,"given":"Saixing","family":"Zhu","sequence":"additional","affiliation":[{"name":"College of Software Engineering, Zhengzhou University of Light Industry, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5399-362X","authenticated-orcid":false,"given":"Zehua","family":"Wang","sequence":"additional","affiliation":[{"name":"College of Software Engineering, Zhengzhou University of Light Industry, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8270-9747","authenticated-orcid":false,"given":"Shuanghong","family":"Qu","sequence":"additional","affiliation":[{"name":"College of Software Engineering\/College of Mathematics and Information Science, Zhengzhou University of Light Industry, China and \rSchool of Science and Technology, University of Camerino, Italy"}]}],"member":"320","published-online":{"date-parts":[[2024,6,7]]},"reference":[{"key":"e_1_3_2_1_1_1","first-page":"849","volume-title":"T. Part-based R-CNNs for Fine-grained Category Detection. in Proc. ECCV","author":"Zhang N.","year":"2014","unstructured":"Zhang, N., Donahue, J., Girshick, R., Darrell, T. Part-based R-CNNs for Fine-grained Category Detection. in Proc. ECCV, Zurich, Switzerland, 2014, pp. 834\u2013849."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2017.10.002"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2017.2723400"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.476"},{"key":"e_1_3_2_1_5_1","volume-title":"An image is worth 16x16 words: Transformers for image recognition at scale","author":"Dosovitskiy A.","year":"2020","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X. H., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., Uszkoreit, J., Houlsby, N. An image is worth 16x16 words: Transformers for image recognition at scale. 2020, arXiv:2010.11929."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20077-9_17"},{"key":"e_1_3_2_1_7_1","first-page":"7146","volume-title":"Proc. ICCV","author":"Guo R. H.","year":"2021","unstructured":"Guo, R. H., Niu, D. T., Qu, L., Li, Z. B. SOTR: Segmenting objects with transformers. in Proc. ICCV, Montreal, Canada, 2021, pp. 7137\u20137146."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i1.19967"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.5244\/C.35.192"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747591"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00436"},{"key":"e_1_3_2_1_12_1","first-page":"1545","article-title":"Learning semantically enhanced feature for fine-grained image classification","volume":"27","author":"Luo W.","year":"2022","unstructured":"Luo, W., Zhang, H. M., Li, J., Wei, X. S. Learning semantically enhanced feature for fine-grained image classification. IEEE Trans. SPL, vol. 27, pp. 1545\u20131549, 2022, 10.1109\/LSP.2020.3020227.","journal-title":"IEEE Trans. SPL"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"e_1_3_2_1_14_1","first-page":"5470","volume-title":"Proc. CVPR","author":"Wang H. Y.","year":"2021","unstructured":"Wang, H. Y., Zhu, Y. K., Adam, H., Yuille, A., Chen, L. C. Max-deeplab: End-to-end panoptic segmentation with mask transformers. in Proc. CVPR, 2021, pp. 5459\u20135470."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00061"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01186"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00062"},{"key":"e_1_3_2_1_18_1","volume-title":"The Caltech-UCSD Birds-200-2011 Dataset","author":"Wah C.","year":"2011","unstructured":"Wah, C., Branson, S., Welinder, P., Perona, P., Belongie, S. The Caltech-UCSD Birds-200-2011 Dataset. California Institute of Technology, CNS-TR-2011-001, 2011."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6968"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58565-5_10"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00106"},{"key":"e_1_3_2_1_22_1","volume-title":"SIM-trans: Structure information modeling transformer for fine-grained visual categorization","author":"Sun H. B.","year":"2022","unstructured":"Sun, H. B., He, X. T., Peng, Y. X. SIM-trans: Structure information modeling transformer for fine-grained visual categorization. 2022, arXiv:2208.14607."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3065386"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.11223"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00105"}],"event":{"name":"ICMLC 2024: 2024 16th International Conference on Machine Learning and Computing","acronym":"ICMLC 2024","location":"Shenzhen China"},"container-title":["Proceedings of the 2024 16th International Conference on Machine Learning and Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3651671.3651752","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3651671.3651752","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T11:17:32Z","timestamp":1755861452000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3651671.3651752"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,2,2]]},"references-count":25,"alternative-id":["10.1145\/3651671.3651752","10.1145\/3651671"],"URL":"https:\/\/doi.org\/10.1145\/3651671.3651752","relation":{},"subject":[],"published":{"date-parts":[[2024,2,2]]},"assertion":[{"value":"2024-06-07","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}