{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:55:03Z","timestamp":1781538903877,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":42,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T00:00:00Z","timestamp":1781481600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/100016698","name":"the National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62074055"],"award-info":[{"award-number":["62074055"]}],"id":[{"id":"10.13039\/100016698","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,6,16]]},"DOI":"10.1145\/3805622.3810697","type":"proceedings-article","created":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T14:42:57Z","timestamp":1781534577000},"page":"1346-1354","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["QMTD: Query Vector Guided Multi-Scale Text Detection"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9924-0685","authenticated-orcid":false,"given":"Zhiqiang","family":"You","sequence":"first","affiliation":[{"name":"Hunan University, Changsha, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-4483-9498","authenticated-orcid":false,"given":"Yutong","family":"Jiang","sequence":"additional","affiliation":[{"name":"Hunan University, Changsha, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1387-2153","authenticated-orcid":false,"given":"Shenguang","family":"Huang","sequence":"additional","affiliation":[{"name":"Ningbo Port Information Communication Co., Ltd., Ningbo, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-7935-3912","authenticated-orcid":false,"given":"Zhangjie","family":"Liu","sequence":"additional","affiliation":[{"name":"Hunan University, Changsha, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-5189-9534","authenticated-orcid":false,"given":"Gaode","family":"Wu","sequence":"additional","affiliation":[{"name":"Ningbo Port Information Communication Co., Ltd., Ningbo, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-9843-1619","authenticated-orcid":false,"given":"Haoyu","family":"Wang","sequence":"additional","affiliation":[{"name":"Meta, NewYork, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,15]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00959"},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02659"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00135"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"publisher","unstructured":"Ruoxin Cheng Zhiqiang You Sovorn Chea Gaode Wu Kan Xia and Shenguang Huang. 2025. Fast and Lightweight Automatic Shipping Container Attributes Spotting. IEEE Transactions on Instrumentation and Measurement 74 (2025) art. no. 2531913. 10.1109\/TIM.2025.3573765","DOI":"10.1109\/TIM.2025.3573765"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2017.157"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2018.8546066"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"crossref","unstructured":"Bo Du Jian Ye Jing Zhang Juhua Liu and Dacheng Tao. 2022. I3cl: Intra-and inter-instance collaborative learning for arbitrary-shaped scene text detection. International Journal of Computer Vision 130 8 (2022) 1961\u20131977.","DOI":"10.1007\/s11263-022-01616-6"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.254"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.322"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2015.7333942"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3478328"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01325"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01483"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"crossref","unstructured":"Minghui Liao Baoguang Shi and Xiang Bai. 2018. Textboxes++: A single-shot oriented scene text detector. IEEE transactions on image processing 27 8 (2018) 3676\u20133690.","DOI":"10.1109\/TIP.2018.2825107"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6812"},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"crossref","unstructured":"Minghui Liao Zhisheng Zou Zhaoyi Wan Cong Yao and Xiang Bai. 2022. Real-time scene text detection with differentiable binarization and adaptive scale fusion. IEEE transactions on pattern analysis and machine intelligence 45 1 (2022) 919\u2013931.","DOI":"10.1109\/TPAMI.2022.3155612"},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00913"},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00983"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01216-8_2"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","unstructured":"Zhiwen Shao Shengtian Jiang Hancheng Zhu Xuehuai Shi Canlin Li Lizhuang Ma and Dit-Yan Yeung. 2026. TextRSR: Enhanced Arbitrary-Shaped Scene Text Representation Via Robust Subspace Recovery. IEEE Transactions on Multimedia (2026) 1\u201314. 10.1109\/TMM.2026.3651034","DOI":"10.1109\/TMM.2026.3651034"},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"publisher","unstructured":"Zhiwen Shao Yuchen Su Yong Zhou Fanrong Meng Hancheng Zhu Bing Liu and Rui Yao. 2024. CT-Net: Arbitrary-Shaped Text Detection via Contour Transformer. IEEE Transactions on Circuits and Systems for Video Technology 34 3 (2024) 1815\u20131826. 10.1109\/TCSVT.2023.3299087","DOI":"10.1109\/TCSVT.2023.3299087"},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"publisher","unstructured":"Aishwarya Soni and Tanima Dutta. 2026. Towards Improving Arbitrary-Shaped Text Detection with Boundary Adaptation in Noisy Scene Images. IEEE MultiMedia (2026) 1\u201310. 10.1109\/MMUL.2026.3652839","DOI":"10.1109\/MMUL.2026.3652839"},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00452"},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00436"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3350988"},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00956"},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00853"},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547882"},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/139"},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10096961"},{"key":"e_1_3_3_1_32_2","unstructured":"Liu Yuliang Jin Lianwen Zhang Shuaitao and Zhang Sheng. 2017. Detecting curve text in the wild: New dataset and new solution. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1712.02170 (2017)."},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01080"},{"key":"e_1_3_3_1_34_2","unstructured":"Hao Zhang Feng Li Shilong Liu Lei Zhang Hang Su Jun Zhu Lionel\u00a0M Ni and Heung-Yeung Shum. 2022. Dino: Detr with improved denoising anchor boxes for end-to-end object detection. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2203.03605 (2022)."},{"key":"e_1_3_3_1_35_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01733"},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.findings-emnlp.1395"},{"key":"e_1_3_3_1_37_2","doi-asserted-by":"crossref","unstructured":"Shi-Xue Zhang Chun Yang Xiaobin Zhu and Xu-Cheng Yin. 2023. Arbitrary shape text detection via boundary transformer. IEEE Transactions on Multimedia 26 (2023) 1747\u20131760.","DOI":"10.1109\/TMM.2023.3286657"},{"key":"e_1_3_3_1_38_2","unstructured":"Shi-Xue Zhang Xiaobin Zhu Lei Chen Jie-Bo Hou and Xu-Cheng Yin. 2022. Arbitrary shape text detection via segmentation with probability maps. IEEE transactions on pattern analysis and machine intelligence 45 3 (2022) 2736\u20132750."},{"key":"e_1_3_3_1_39_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00972"},{"key":"e_1_3_3_1_40_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00134"},{"key":"e_1_3_3_1_41_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00569"},{"key":"e_1_3_3_1_42_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10447371"},{"key":"e_1_3_3_1_43_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00314"}],"event":{"name":"ICMR '26: International Conference on Multimedia Retrieval","location":"Amsterdam The Netherlands","acronym":"ICMR '26","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 2026 International Conference on Multimedia Retrieval"],"original-title":[],"deposited":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:11:53Z","timestamp":1781536313000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3805622.3810697"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6,15]]},"references-count":42,"alternative-id":["10.1145\/3805622.3810697","10.1145\/3805622"],"URL":"https:\/\/doi.org\/10.1145\/3805622.3810697","relation":{},"subject":[],"published":{"date-parts":[[2026,6,15]]},"assertion":[{"value":"2026-06-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}