{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T02:51:14Z","timestamp":1765507874625,"version":"3.48.0"},"publisher-location":"New York, NY, USA","reference-count":42,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,10]]},"DOI":"10.1145\/3746252.3761432","type":"proceedings-article","created":{"date-parts":[[2025,11,8]],"date-time":"2025-11-08T01:03:27Z","timestamp":1762563807000},"page":"3942-3951","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Decoder-only Pre-training Enhancement for Spatio-temporal Traffic Forecasting"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8349-9131","authenticated-orcid":false,"given":"Tao","family":"Yu","sequence":"first","affiliation":[{"name":"Hikvision Research Institute, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3366-0016","authenticated-orcid":false,"given":"Junhong","family":"Wan","sequence":"additional","affiliation":[{"name":"Hikvision Research Institute, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8931-3665","authenticated-orcid":false,"given":"Yao","family":"Fu","sequence":"additional","affiliation":[{"name":"Hikvision Research Institute, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3482-8538","authenticated-orcid":false,"given":"Weihao","family":"Jiang","sequence":"additional","affiliation":[{"name":"Hikvision Research Institute, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-0877-0490","authenticated-orcid":false,"given":"Jiang","family":"Zhu","sequence":"additional","affiliation":[{"name":"Hikvision Research Institute, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,11,10]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al.","author":"Achiam Josh","year":"2023","unstructured":"Josh Achiam, Steven Adler, Sandhini Agarwal, Lama Ahmad, Ilge Akkaya, Florencia Leoni Aleman, Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al., 2023. Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)."},{"key":"e_1_3_2_2_2_1","first-page":"17804","article-title":"Adaptive graph convolutional recurrent network for traffic forecasting","author":"Bai Lei","year":"2020","unstructured":"Lei Bai, Lina Yao, Can Li, Xianzhi Wang, and Can Wang. 2020. Adaptive graph convolutional recurrent network for traffic forecasting. In NeurIPS. 17804-17815.","journal-title":"NeurIPS."},{"key":"e_1_3_2_2_3_1","first-page":"5","article-title":"An intuitive proof of the data processing inequality","volume":"12","author":"Beaudry Normand J.","year":"2012","unstructured":"Normand J. Beaudry and Renato Renner. 2012. An intuitive proof of the data processing inequality. Quantum Inf. Comput., Vol. 12, 5-6 (2012), 432-441.","journal-title":"Quantum Inf. Comput."},{"key":"e_1_3_2_2_4_1","first-page":"1877","article-title":"Language models are few-shot learners","author":"Brown Tom","year":"2020","unstructured":"Tom Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared D Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, Amanda Askell, et al., 2020. Language models are few-shot learners. In NeurIPS. 1877-1901.","journal-title":"NeurIPS."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.3141\/1748-12"},{"key":"e_1_3_2_2_6_1","first-page":"2741","article-title":"Brainnet: Epileptic wave detection from seeg with hierarchical graph diffusion learning","author":"Chen Junru","year":"2022","unstructured":"Junru Chen, Yang Yang, Tao Yu, Yingying Fan, Xiaolong Mo, and Carl Yang. 2022. Brainnet: Epileptic wave detection from seeg with hierarchical graph diffusion learning. In SIGKDD. 2741-2751.","journal-title":"SIGKDD."},{"key":"e_1_3_2_2_7_1","first-page":"2900","article-title":"Towards spatio-temporal aware traffic time series forecasting","author":"Cirstea Razvan-Gabriel","year":"2022","unstructured":"Razvan-Gabriel Cirstea, Bin Yang, Chenjuan Guo, Tung Kieu, and Shirui Pan. 2022. Towards spatio-temporal aware traffic time series forecasting. In ICDE. 2900-2913.","journal-title":"ICDE."},{"key":"e_1_3_2_2_8_1","first-page":"2965","article-title":"Historical inertia: A neglected but powerful baseline for long sequence time-series forecasting","author":"Cui Yue","year":"2021","unstructured":"Yue Cui, Jiandong Xie, and Kai Zheng. 2021. Historical inertia: A neglected but powerful baseline for long sequence time-series forecasting. In CIKM. 2965-2969.","journal-title":"CIKM."},{"key":"e_1_3_2_2_9_1","first-page":"269","article-title":"St-norm: Spatial and temporal normalization for multi-variate time series forecasting","author":"Deng Jinliang","year":"2021","unstructured":"Jinliang Deng, Xiusi Chen, Renhe Jiang, Xuan Song, and Ivor W Tsang. 2021. St-norm: Spatial and temporal normalization for multi-variate time series forecasting. In SIGKDD. 269-278.","journal-title":"SIGKDD."},{"key":"e_1_3_2_2_10_1","first-page":"631","article-title":"Heterogeneity-informed meta-parameter learning for spatiotemporal time series forecasting","author":"Dong Zheng","year":"2024","unstructured":"Zheng Dong, Renhe Jiang, Haotian Gao, Hangchen Liu, Jinliang Deng, Qingsong Wen, and Xuan Song. 2024. Heterogeneity-informed meta-parameter learning for spatiotemporal time series forecasting. In SIGKDD. 631-641.","journal-title":"SIGKDD."},{"key":"e_1_3_2_2_11_1","first-page":"324","article-title":"Using LSTM and GRU neural network methods for traffic flow prediction","author":"Fu Rui","year":"2016","unstructured":"Rui Fu, Zuo Zhang, and Li Li. 2016. Using LSTM and GRU neural network methods for traffic flow prediction. In YAC. 324-328.","journal-title":"YAC."},{"key":"e_1_3_2_2_12_1","first-page":"3998","article-title":"Spatial-Temporal-Decoupled Masked Pre-training for Spatiotemporal Forecasting","author":"Gao Haotian","year":"2024","unstructured":"Haotian Gao, Renhe Jiang, Zheng Dong, Jinliang Deng, Yuxin Ma, and Xuan Song. 2024. Spatial-Temporal-Decoupled Masked Pre-training for Spatiotemporal Forecasting. In IJCAI. 3998-4006.","journal-title":"IJCAI."},{"key":"e_1_3_2_2_13_1","first-page":"981","article-title":"Forecasting urban traffic flow by SVR","author":"Gong Jun","year":"2013","unstructured":"Jun Gong, Lin Qi, Mingyue Liu, and Xiuyang Chen. 2013. Forecasting urban traffic flow by SVR. In CCDC. 981-984.","journal-title":"CCDC."},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.3301922"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i4.25556"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i7.25976"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2022.117921"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2023.3333824"},{"key":"e_1_3_2_2_19_1","first-page":"4171","article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","author":"Ming-Wei Chang Jacob Devlin","year":"2019","unstructured":"Jacob Devlin Ming-Wei Chang Kenton and Lee Kristina Toutanova. 2019. Bert: Pre-training of deep bidirectional transformers for language understanding. In NAACL. 4171-4186.","journal-title":"NAACL."},{"key":"e_1_3_2_2_20_1","unstructured":"Yaguang Li Rose Yu Cyrus Shahabi and Yan Liu. 2018. Diffusion convolutional recurrent neural network: Data-driven traffic forecasting. In ICLR."},{"key":"e_1_3_2_2_21_1","first-page":"4125","article-title":"Spatio-temporal adaptive embedding makes vanilla transformer sota for traffic forecasting","author":"Liu Hangchen","year":"2023","unstructured":"Hangchen Liu, Zheng Dong, Renhe Jiang, Jiewen Deng, Jinliang Deng, Quanjun Chen, and Xuan Song. 2023. Spatio-temporal adaptive embedding makes vanilla transformer sota for traffic forecasting. In CIKM. 4125-4129.","journal-title":"CIKM."},{"key":"e_1_3_2_2_22_1","volume-title":"Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692","author":"Liu Yinhan","year":"2019","unstructured":"Yinhan Liu. 2019. Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692 (2019)."},{"key":"e_1_3_2_2_23_1","first-page":"136","article-title":"Integrating granger causality and vector auto-regression for traffic prediction of large-scale WLANs","volume":"10","author":"Lu Zheng","year":"2016","unstructured":"Zheng Lu, Chen Zhou, Jing Wu, Hao Jiang, and Songyue Cui. 2016. Integrating granger causality and vector auto-regression for traffic prediction of large-scale WLANs. KSII Transactions on Internet and Information Systems (TIIS), Vol. 10, 1 (2016), 136-151.","journal-title":"KSII Transactions on Internet and Information Systems (TIIS)"},{"key":"e_1_3_2_2_24_1","first-page":"1","article-title":"How powerful are decoder-only transformer neural models?","author":"Roberts Jesse","year":"2024","unstructured":"Jesse Roberts. 2024. How powerful are decoder-only transformer neural models?. In IJCNN. 1-8.","journal-title":"IJCNN."},{"key":"e_1_3_2_2_25_1","unstructured":"Chao Shang Jie Chen and Jinbo Bi. 2021. Discrete graph structure learning for forecasting multiple time series. In ICLR."},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2024.3484454"},{"key":"e_1_3_2_2_27_1","first-page":"4454","article-title":"Spatial-temporal identity: A simple yet effective baseline for multivariate time series forecasting","author":"Shao Zezhi","year":"2022","unstructured":"Zezhi Shao, Zhao Zhang, Fei Wang, Wei Wei, and Yongjun Xu. 2022b. Spatial-temporal identity: A simple yet effective baseline for multivariate time series forecasting. In CIKM. 4454-4458.","journal-title":"CIKM."},{"key":"e_1_3_2_2_28_1","first-page":"1567","article-title":"Pre-training enhanced spatial-temporal graph neural network for multivariate time series forecasting","author":"Shao Zezhi","year":"2022","unstructured":"Zezhi Shao, Zhao Zhang, Fei Wang, and Yongjun Xu. 2022a. Pre-training enhanced spatial-temporal graph neural network for multivariate time series forecasting. In SIGKDD. 1567-1577.","journal-title":"SIGKDD."},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i01.5438"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1002\/atr.1229"},{"key":"e_1_3_2_2_31_1","first-page":"3104","article-title":"Sequence to sequence learning with neural networks","author":"Sutskever Ilya","year":"2014","unstructured":"Ilya Sutskever, Oriol Vinyals, and Quoc V Le. 2014. Sequence to sequence learning with neural networks. In NeurIPS. 3104-3112.","journal-title":"NeurIPS."},{"key":"e_1_3_2_2_32_1","first-page":"153","article-title":"Predicting short-term traffic flow by long short-term memory recurrent neural network","author":"Tian Yongxue","year":"2015","unstructured":"Yongxue Tian and Li Pan. 2015. Predicting short-term traffic flow by long short-term memory recurrent neural network. In SmartCity. 153-158.","journal-title":"SmartCity."},{"key":"e_1_3_2_2_33_1","first-page":"5998","article-title":"Attention is all you need","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. In NeurIPS. 5998-6008.","journal-title":"NeurIPS."},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eng.2022.04.024"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2021.3130762"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAU.1967.1161901"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.2978386"},{"key":"e_1_3_2_2_38_1","first-page":"753","article-title":"Connecting the dots: Multivariate time series forecasting with graph neural networks","author":"Wu Zonghan","year":"2020","unstructured":"Zonghan Wu, Shirui Pan, Guodong Long, Jing Jiang, Xiaojun Chang, and Chengqi Zhang. 2020b. Connecting the dots: Multivariate time series forecasting with graph neural networks. In SIGKDD. 753-763.","journal-title":"SIGKDD."},{"key":"e_1_3_2_2_39_1","first-page":"1907","article-title":"Graph wavenet for deep spatial-temporal graph modeling","author":"Wu Zonghan","year":"2019","unstructured":"Zonghan Wu, Shirui Pan, Guodong Long, Jing Jiang, and Chengqi Zhang. 2019. Graph wavenet for deep spatial-temporal graph modeling. In IJCAI. 1907-1913.","journal-title":"IJCAI."},{"key":"e_1_3_2_2_40_1","first-page":"3634","article-title":"Spatio-temporal graph convolutional networks: a deep learning framework for traffic forecasting","author":"Yu Bing","year":"2018","unstructured":"Bing Yu, Haoteng Yin, and Zhanxing Zhu. 2018. Spatio-temporal graph convolutional networks: a deep learning framework for traffic forecasting. In IJCAI. 3634-3640.","journal-title":"IJCAI."},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1007\/s41019-020-00151-z"},{"key":"e_1_3_2_2_42_1","first-page":"26304","article-title":"Brant: Foundation model for intracranial neural signal","author":"Zhang Daoze","year":"2023","unstructured":"Daoze Zhang, Zhizhang Yuan, Yang Yang, Junru Chen, Jingjing Wang, and Yafeng Li. 2023. Brant: Foundation model for intracranial neural signal. In NeurIPS. 26304-26321.","journal-title":"NeurIPS."}],"event":{"name":"CIKM '25: The 34th ACM International Conference on Information and Knowledge Management","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"],"location":"Seoul Republic of Korea","acronym":"CIKM '25"},"container-title":["Proceedings of the 34th ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746252.3761432","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T02:47:42Z","timestamp":1765507662000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746252.3761432"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,10]]},"references-count":42,"alternative-id":["10.1145\/3746252.3761432","10.1145\/3746252"],"URL":"https:\/\/doi.org\/10.1145\/3746252.3761432","relation":{},"subject":[],"published":{"date-parts":[[2025,11,10]]},"assertion":[{"value":"2025-11-10","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}