{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T00:01:59Z","timestamp":1765497719623,"version":"3.48.0"},"publisher-location":"New York, NY, USA","reference-count":17,"publisher":"ACM","funder":[{"name":"Guangdong Provincial Key Lab of Integrated Communication, Sensing and Computation for Ubiquitous Internet of Things","award":["2023B1212010007"],"award-info":[{"award-number":["2023B1212010007"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,10]]},"DOI":"10.1145\/3746252.3761468","type":"proceedings-article","created":{"date-parts":[[2025,11,7]],"date-time":"2025-11-07T23:55:33Z","timestamp":1762559733000},"page":"6747-6751","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["<scp>VoiceVisSystem<\/scp>\n                    : End-to-End Voice-driven Data Visualization Generation from Natural Language Questions"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8470-7246","authenticated-orcid":false,"given":"Haodi","family":"Zhang","sequence":"first","affiliation":[{"name":"Shenzhen University, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-2550-7574","authenticated-orcid":false,"given":"Xiaohui","family":"Tang","sequence":"additional","affiliation":[{"name":"Shenzhen University, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-9385-2218","authenticated-orcid":false,"given":"Xinhe","family":"Zhang","sequence":"additional","affiliation":[{"name":"Shenzhen University, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-1787-4795","authenticated-orcid":false,"given":"Jihua","family":"Zhou","sequence":"additional","affiliation":[{"name":"Shenzhen University, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2221-9807","authenticated-orcid":false,"given":"Yuanfeng","family":"Song","sequence":"additional","affiliation":[{"name":"WeBank Co., Ltd, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,11,10]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU51503.2021.9688253"},{"key":"e_1_3_2_1_2_1","volume-title":"Text-to-viz: Automatic generation of infographics from proportion-related natural language statements","author":"Cui Weiwei","year":"2019","unstructured":"Weiwei Cui, Xiaoyu Zhang, Yun Wang, He Huang, Bei Chen, Lei Fang, Haidong Zhang, Jian-Guan Lou, and Dongmei Zhang. 2019. Text-to-viz: Automatic generation of infographics from proportion-related natural language statements. IEEE transactions on visualization and computer graphics, Vol. 26, 1 (2019), 906-916."},{"key":"e_1_3_2_1_3_1","volume-title":"Data2vis: Automatic generation of data visualizations using sequence-to-sequence recurrent neural networks","author":"Dibia Victor","year":"2019","unstructured":"Victor Dibia and \u00c7a\u011fatay Demiralp. 2019. Data2vis: Automatic generation of data visualizations using sequence-to-sequence recurrent neural networks. IEEE computer graphics and applications, Vol. 39, 5 (2019), 33-46."},{"key":"e_1_3_2_1_4_1","volume-title":"SONAR: sentence-level multimodal and language-agnostic representations. arXiv preprint arXiv:2308.11466","author":"Duquenne Paul-Ambroise","year":"2023","unstructured":"Paul-Ambroise Duquenne, Holger Schwenk, and Beno^it Sagot. 2023. SONAR: sentence-level multimodal and language-agnostic representations. arXiv preprint arXiv:2308.11466 (2023)."},{"key":"e_1_3_2_1_5_1","volume-title":"Paraformer: Fast and Accurate Parallel Transformer for Non-autoregressive End-to-End Speech Recognition. In INTERSPEECH.","author":"Gao Zhifu","year":"2022","unstructured":"Zhifu Gao, Shiliang Zhang, Ian McLoughlin, and Zhijie Yan. 2022. Paraformer: Fast and Accurate Parallel Transformer for Non-autoregressive End-to-End Speech Recognition. In INTERSPEECH."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1444"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3448016.3457261"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/2882903.2899394"},{"key":"e_1_3_2_1_9_1","volume-title":"Formalizing visualization design knowledge as constraints: Actionable and extensible models in draco","author":"Moritz Dominik","year":"2018","unstructured":"Dominik Moritz, Chenglong Wang, Greg L Nelson, Halden Lin, Adam M Smith, Bill Howe, and Jeffrey Heer. 2018. Formalizing visualization design knowledge as constraints: Actionable and extensible models in draco. IEEE transactions on visualization and computer graphics, Vol. 25, 1 (2018), 438-448."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00778-019-00588-3"},{"key":"e_1_3_2_1_11_1","volume-title":"Raymond Chi-Wing Wong, and Xuefang Zhao","author":"Song Yuanfeng","year":"2024","unstructured":"Yuanfeng Song, Raymond Chi-Wing Wong, and Xuefang Zhao. 2024. Speech-to-SQL: toward speech-driven SQL query generation from natural language question. The VLDB Journal (2024), 1-23."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3514221.3520158"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539330"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3514221.3520150"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3092931.3092937"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/s41019-020-00151-z"},{"key":"e_1_3_2_1_17_1","volume-title":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases.","author":"Zhang Haodi","year":"2025","unstructured":"Haodi Zhang, Xinhe Zhang, Jihua Zhou, Kaishun Wu, Yuanfeng Song, and Raymond Chi-Wing Wong. 2025. Speech-to-Visualization: Toward End-to-End Speech-Driven Data Visualization Generation from Natural Language Questions. In Joint European Conference on Machine Learning and Knowledge Discovery in Databases."}],"event":{"name":"CIKM '25: The 34th ACM International Conference on Information and Knowledge Management","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"],"location":"Seoul Republic of Korea","acronym":"CIKM '25"},"container-title":["Proceedings of the 34th ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746252.3761468","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,11]],"date-time":"2025-12-11T23:59:17Z","timestamp":1765497557000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746252.3761468"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,10]]},"references-count":17,"alternative-id":["10.1145\/3746252.3761468","10.1145\/3746252"],"URL":"https:\/\/doi.org\/10.1145\/3746252.3761468","relation":{},"subject":[],"published":{"date-parts":[[2025,11,10]]},"assertion":[{"value":"2025-11-10","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}