{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T05:04:41Z","timestamp":1750309481933,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":28,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,12,6]],"date-time":"2024-12-06T00:00:00Z","timestamp":1733443200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,12,6]]},"DOI":"10.1145\/3709026.3709052","type":"proceedings-article","created":{"date-parts":[[2025,2,15]],"date-time":"2025-02-15T10:05:41Z","timestamp":1739613941000},"page":"83-90","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Depth-aware Image Captioning with Bus Dataset for Visually Impaired Individuals"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-1373-1849","authenticated-orcid":false,"given":"Wang","family":"Xiang","sequence":"first","affiliation":[{"name":"Department of Electronic and Information Engineering, Changchun University, Changchun, China, Changchun, Jilin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-9076-247X","authenticated-orcid":false,"given":"Li","family":"Sanqian","sequence":"additional","affiliation":[{"name":"Research lnstitute of Trustworthy Autonomous Systems and Department of Computer Science and Engineering, Southern University of Science and TeChinaology, Shenzhen, 518055, China, Shenzhen, Guangdong, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8160-2841","authenticated-orcid":false,"given":"Higashita","family":"Risa","sequence":"additional","affiliation":[{"name":"Research lnstitute of Trustworthy Autonomous Systems and Department of Computer Science and Engineering, Southern University of Science and TeChinaology, Shenzhen, 518055, China, Shenzhen, Guangdong, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-4431-0610","authenticated-orcid":false,"given":"Li","family":"Jie","sequence":"additional","affiliation":[{"name":"Department of Electronic and Information Engineering, Changchun University, Changchun, China, Changchun, Jilin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-8891-8395","authenticated-orcid":false,"given":"Qin","family":"Hongwu","sequence":"additional","affiliation":[{"name":"Department of Electronic and Information Engineering, Changchun University, Changchun, China, Changchun, Jilin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6281-6505","authenticated-orcid":false,"given":"Liu","family":"Jiang","sequence":"additional","affiliation":[{"name":"Research lnstitute of Trustworthy Autonomous Systems and Department of Computer Science and Engineering, Southern University of Science and TeChinaology, Shenzhen, 518055, China, Shenzhen, Guangdong, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,2,15]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"crossref","unstructured":"Jon Almaz\u00e1n Albert Gordo Alicia Forn\u00e9s and Ernest Valveny. 2014. Word spotting and recognition with embedded attributes. IEEE transactions on pattern analysis and machine intelligence 36 12 (2014) 2552\u20132566.","DOI":"10.1109\/TPAMI.2014.2339814"},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46454-1_24"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00636"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00192"},{"key":"e_1_3_3_1_6_2","unstructured":"Satanjeev Banerjee and Alon Lavie. 2004. Meteor: an automatic metric for MT evaluation with high levels of correlation with human judgments. Proceedings of ACL-WMT (2004) 65\u201372."},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"crossref","unstructured":"Piotr Bojanowski Edouard Grave Armand Joulin and Tomas Mikolov. 2017. Enriching word vectors with subword information. Transactions of the association for computational linguistics 5 (2017) 135\u2013146.","DOI":"10.1162\/tacl_a_00051"},{"key":"e_1_3_3_1_8_2","unstructured":"Jacob Devlin. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1810.04805 (2018)."},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.1117\/12.2325523"},{"key":"e_1_3_3_1_10_2","first-page":"74","volume-title":"Text summarization branches out","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74\u201381."},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00983"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"crossref","unstructured":"Yuhang Liu Wei Wei Daowan Peng Xian-Ling Mao Zhiyong He and Pan Zhou. 2022. Depth-aware and semantic guided relational attention network for visual question answering. IEEE Transactions on Multimedia 25 (2022) 5344\u20135357.","DOI":"10.1109\/TMM.2022.3190686"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"crossref","unstructured":"Gang Lv Yining Sun Fudong Nian Maofei Zhu Wenliang Tang and Zhenzhen Hu. 2023. COME: Clip-OCR and master object for text image captioning. Image and Vision Computing 136 (2023) 104751.","DOI":"10.1016\/j.imavis.2023.104751"},{"key":"e_1_3_3_1_14_2","unstructured":"World\u00a0Health Organization. Blindness and vision impairment. [Online]. (????). www.who.int\/news-room\/fact-sheets\/detail\/blindness-and-visual-impairment\/."},{"key":"e_1_3_3_1_15_2","first-page":"311","volume-title":"Proceedings of the 40th annual meeting of the Association for Computational Linguistics","author":"Papineni Kishore","year":"2002","unstructured":"Kishore Papineni, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002. Bleu: a method for automatic evaluation of machine translation. In Proceedings of the 40th annual meeting of the Association for Computational Linguistics. 311\u2013318."},{"key":"e_1_3_3_1_16_2","unstructured":"Shaoqing Ren Kaiming He Ross Girshick and Jian Sun. 2015. Faster r-cnn: Towards real-time object detection with region proposal networks. Advances in neural information processing systems 28 (2015)."},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58536-5_44"},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"crossref","unstructured":"Arisa Ueda Wei Yang and Komei Sugiura. 2023. Switching text-based image encoders for captioning images with text. IEEE Access 11 (2023) 55706\u201355715.","DOI":"10.1109\/ACCESS.2023.3282444"},{"key":"e_1_3_3_1_19_2","unstructured":"Ashish Vaswani. 2017. Attention is all you need. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1706.03762 (2017)."},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"e_1_3_3_1_21_2","unstructured":"Oriol Vinyals Meire Fortunato and Navdeep Jaitly. 2015. Pointer networks. Advances in neural information processing systems 28 (2015)."},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00136"},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"crossref","unstructured":"Qi Wang Hongyu Deng Xue Wu Zhenguo Yang Yun Liu Yazhou Wang and Gefei Hao. 2023. LCM-Captioner: A lightweight text-based image captioning method with collaborative mechanism between vision and text. Neural Networks 162 (2023) 318\u2013329.","DOI":"10.1016\/j.neunet.2023.03.010"},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i4.16389"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","DOI":"10.1145\/3240508.3240583"},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612571"},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01245"},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00987"}],"event":{"name":"CSAI 2024: 2024 8th International Conference on Computer Science and Artificial Intelligence (CSAI)","acronym":"CSAI 2024","location":"Beijing China"},"container-title":["Proceedings of the 2024 8th International Conference on Computer Science and Artificial Intelligence"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3709026.3709052","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3709026.3709052","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:31Z","timestamp":1750295851000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3709026.3709052"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,6]]},"references-count":28,"alternative-id":["10.1145\/3709026.3709052","10.1145\/3709026"],"URL":"https:\/\/doi.org\/10.1145\/3709026.3709052","relation":{},"subject":[],"published":{"date-parts":[[2024,12,6]]},"assertion":[{"value":"2025-02-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}