{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T10:18:27Z","timestamp":1780395507504,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":49,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,21]],"date-time":"2023-10-21T00:00:00Z","timestamp":1697846400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Hong Kong Research Grants Council,The Hong Kong Jockey Club Charities Trust"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,21]]},"DOI":"10.1145\/3583780.3615068","type":"proceedings-article","created":{"date-parts":[[2023,10,21]],"date-time":"2023-10-21T07:45:26Z","timestamp":1697874326000},"page":"1208-1217","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":16,"title":["ST-MoE: Spatio-Temporal Mixture-of-Experts for Debiasing in Traffic Prediction"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-5175-7667","authenticated-orcid":false,"given":"Shuhao","family":"Li","sequence":"first","affiliation":[{"name":"Fudan University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1656-5407","authenticated-orcid":false,"given":"Yue","family":"Cui","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology, Hong Kong SAR, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0242-3707","authenticated-orcid":false,"given":"Yan","family":"Zhao","sequence":"additional","affiliation":[{"name":"Aalborg University, Aalborg, Denmark"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6473-9272","authenticated-orcid":false,"given":"Weidong","family":"Yang","sequence":"additional","affiliation":[{"name":"Fudan University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2022-7387","authenticated-orcid":false,"given":"Ruiyuan","family":"Zhang","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology, Hong Kong SAR, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6343-1455","authenticated-orcid":false,"given":"Xiaofang","family":"Zhou","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology, Hong Kong SAR, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2023,10,21]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-80328-4"},{"key":"e_1_3_2_1_2_1","volume-title":"Spectral networks and locally connected networks on graphs. arXiv preprint arXiv:1312.6203","author":"Bruna Joan","year":"2013","unstructured":"Joan Bruna , Wojciech Zaremba , Arthur Szlam , and Yann LeCun . 2013. Spectral networks and locally connected networks on graphs. arXiv preprint arXiv:1312.6203 ( 2013 ). Joan Bruna, Wojciech Zaremba, Arthur Szlam, and Yann LeCun. 2013. Spectral networks and locally connected networks on graphs. arXiv preprint arXiv:1312.6203 (2013)."},{"key":"e_1_3_2_1_3_1","volume-title":"Bias and debias in recommender system: A survey and future directions. arXiv preprint arXiv:2010.03240","author":"Chen Jiawei","year":"2020","unstructured":"Jiawei Chen , Hande Dong , Xiang Wang , Fuli Feng , Meng Wang , and Xiangnan He. 2020. Bias and debias in recommender system: A survey and future directions. arXiv preprint arXiv:2010.03240 ( 2020 ). Jiawei Chen, Hande Dong, Xiang Wang, Fuli Feng, Meng Wang, and Xiangnan He. 2020. Bias and debias in recommender system: A survey and future directions. arXiv preprint arXiv:2010.03240 (2020)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2018.00018"},{"key":"e_1_3_2_1_5_1","volume-title":"Caglar Gulcehre, Dzmitry Bahdanau, Fethi Bougares, Holger Schwenk, and Yoshua Bengio.","author":"Cho Kyunghyun","year":"2014","unstructured":"Kyunghyun Cho , Bart Van Merri\u00ebnboer , Caglar Gulcehre, Dzmitry Bahdanau, Fethi Bougares, Holger Schwenk, and Yoshua Bengio. 2014 a. Learning phrase representations using RNN encoder-decoder for statistical machine translation. arXiv preprint arXiv:1406.1078 (2014). Kyunghyun Cho, Bart Van Merri\u00ebnboer, Caglar Gulcehre, Dzmitry Bahdanau, Fethi Bougares, Holger Schwenk, and Yoshua Bengio. 2014a. Learning phrase representations using RNN encoder-decoder for statistical machine translation. arXiv preprint arXiv:1406.1078 (2014)."},{"key":"e_1_3_2_1_6_1","volume-title":"EMNLP'14","author":"Cho Kyunghyun","unstructured":"Kyunghyun Cho , Bart van Merrienboer , c C aglar G\u00fc lcc ehre, Fethi Bougares, Holger Schwenk, and Yoshua Bengio. 2014b. Learning Phrase Representations using RNN Encoder-Decoder for Statistical Machine Translation . In EMNLP'14 . Kyunghyun Cho, Bart van Merrienboer, cC aglar G\u00fc lcc ehre, Fethi Bougares, Holger Schwenk, and Yoshua Bengio. 2014b. Learning Phrase Representations using RNN Encoder-Decoder for Statistical Machine Translation. In EMNLP'14."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.14778\/3489496.3489503"},{"key":"e_1_3_2_1_8_1","volume-title":"Convolutional neural networks on graphs with fast localized spectral filtering. Advances in neural information processing systems","author":"Defferrard Micha\u00ebl","year":"2016","unstructured":"Micha\u00ebl Defferrard , Xavier Bresson , and Pierre Vandergheynst . 2016. Convolutional neural networks on graphs with fast localized spectral filtering. Advances in neural information processing systems , Vol. 29 ( 2016 ). Micha\u00ebl Defferrard, Xavier Bresson, and Pierre Vandergheynst. 2016. Convolutional neural networks on graphs with fast localized spectral filtering. Advances in neural information processing systems, Vol. 29 (2016)."},{"key":"e_1_3_2_1_9_1","volume-title":"Distributed representations, simple recurrent networks, and grammatical structure. Machine learning","author":"Elman Jeffrey L","year":"1991","unstructured":"Jeffrey L Elman . 1991. Distributed representations, simple recurrent networks, and grammatical structure. Machine learning , Vol. 7 , 2 ( 1991 ), 195--225. Jeffrey L Elman. 1991. Distributed representations, simple recurrent networks, and grammatical structure. Machine learning, Vol. 7, 2 (1991), 195--225."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403320"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.3038380"},{"key":"e_1_3_2_1_12_1","volume-title":"Deep learning","author":"Goodfellow Ian","unstructured":"Ian Goodfellow , Yoshua Bengio , and Aaron Courville . 2016. Deep learning . MIT press . Ian Goodfellow, Yoshua Bengio, and Aaron Courville. 2016. Deep learning. MIT press."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-24797-2"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.3301922"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2021.3056502"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447548.3467275"},{"key":"e_1_3_2_1_17_1","volume-title":"International Conference on Machine Learning. PMLR, 1512--1520","author":"Hern\u00e1ndez-Lobato Jos\u00e9 Miguel","year":"2014","unstructured":"Jos\u00e9 Miguel Hern\u00e1ndez-Lobato , Neil Houlsby , and Zoubin Ghahramani . 2014 . Probabilistic matrix factorization with non-random missing data . In International Conference on Machine Learning. PMLR, 1512--1520 . Jos\u00e9 Miguel Hern\u00e1ndez-Lobato, Neil Houlsby, and Zoubin Ghahramani. 2014. Probabilistic matrix factorization with non-random missing data. In International Conference on Machine Learning. PMLR, 1512--1520."},{"key":"e_1_3_2_1_18_1","volume-title":"Long short-term memory. Neural computation","author":"Hochreiter Sepp","year":"1997","unstructured":"Sepp Hochreiter and J\u00fcrgen Schmidhuber . 1997. Long short-term memory. Neural computation , Vol. 9 , 8 ( 1997 ), 1735--1780. Sepp Hochreiter and J\u00fcrgen Schmidhuber. 1997. Long short-term memory. Neural computation, Vol. 9, 8 (1997), 1735--1780."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403294"},{"key":"e_1_3_2_1_20_1","volume-title":"Hierarchical mixtures of experts and the EM algorithm. Neural computation","author":"Jordan Michael I","year":"1994","unstructured":"Michael I Jordan and Robert A Jacobs . 1994. Hierarchical mixtures of experts and the EM algorithm. Neural computation , Vol. 6 , 2 ( 1994 ), 181--214. Michael I Jordan and Robert A Jacobs. 1994. Hierarchical mixtures of experts and the EM algorithm. Neural computation, Vol. 6, 2 (1994), 181--214."},{"key":"e_1_3_2_1_21_1","volume-title":"Proceedings of the 2013 conference on empirical methods in natural language processing. 1700--1709","author":"Kalchbrenner Nal","year":"2013","unstructured":"Nal Kalchbrenner and Phil Blunsom . 2013 . Recurrent continuous translation models . In Proceedings of the 2013 conference on empirical methods in natural language processing. 1700--1709 . Nal Kalchbrenner and Phil Blunsom. 2013. Recurrent continuous translation models. In Proceedings of the 2013 conference on empirical methods in natural language processing. 1700--1709."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/2645710.2645754"},{"key":"e_1_3_2_1_23_1","volume-title":"Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907","author":"Kipf Thomas N","year":"2016","unstructured":"Thomas N Kipf and Max Welling . 2016. Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907 ( 2016 ). Thomas N Kipf and Max Welling. 2016. Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907 (2016)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3231541.3231544"},{"key":"e_1_3_2_1_25_1","volume-title":"Diffusion convolutional recurrent neural network: Data-driven traffic forecasting. arXiv preprint arXiv:1707.01926","author":"Li Yaguang","year":"2017","unstructured":"Yaguang Li , Rose Yu , Cyrus Shahabi , and Yan Liu . 2017. Diffusion convolutional recurrent neural network: Data-driven traffic forecasting. arXiv preprint arXiv:1707.01926 ( 2017 ). Yaguang Li, Rose Yu, Cyrus Shahabi, and Yan Liu. 2017. Diffusion convolutional recurrent neural network: Data-driven traffic forecasting. arXiv preprint arXiv:1707.01926 (2017)."},{"key":"e_1_3_2_1_26_1","first-page":"865","article-title":"Traffic flow prediction with big data: a deep learning approach","volume":"16","author":"Lv Yisheng","year":"2014","unstructured":"Yisheng Lv , Yanjie Duan , Wenwen Kang , Zhengxi Li , and Fei-Yue Wang . 2014 . Traffic flow prediction with big data: a deep learning approach . IEEE Transactions on Intelligent Transportation Systems , Vol. 16 , 2 (2014), 865 -- 873 . Yisheng Lv, Yanjie Duan, Wenwen Kang, Zhengxi Li, and Fei-Yue Wang. 2014. Traffic flow prediction with big data: a deep learning approach. IEEE Transactions on Intelligent Transportation Systems, Vol. 16, 2 (2014), 865--873.","journal-title":"IEEE Transactions on Intelligent Transportation Systems"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/482"},{"key":"e_1_3_2_1_28_1","volume-title":"Collaborative filtering and the missing at random assumption. arXiv preprint arXiv:1206.5267","author":"Marlin Benjamin","year":"2012","unstructured":"Benjamin Marlin , Richard S Zemel , Sam Roweis , and Malcolm Slaney . 2012. Collaborative filtering and the missing at random assumption. arXiv preprint arXiv:1206.5267 ( 2012 ). Benjamin Marlin, Richard S Zemel, Sam Roweis, and Malcolm Slaney. 2012. Collaborative filtering and the missing at random assumption. arXiv preprint arXiv:1206.5267 (2012)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/1639714.1639717"},{"key":"e_1_3_2_1_30_1","volume-title":"Wavenet: A generative model for raw audio. arXiv preprint arXiv:1609.03499","author":"van den Oord Aaron","year":"2016","unstructured":"Aaron van den Oord , Sander Dieleman , Heiga Zen , Karen Simonyan , Oriol Vinyals , Alex Graves , Nal Kalchbrenner , Andrew Senior , and Koray Kavukcuoglu . 2016 . Wavenet: A generative model for raw audio. arXiv preprint arXiv:1609.03499 (2016). Aaron van den Oord, Sander Dieleman, Heiga Zen, Karen Simonyan, Oriol Vinyals, Alex Graves, Nal Kalchbrenner, Andrew Senior, and Koray Kavukcuoglu. 2016. Wavenet: A generative model for raw audio. arXiv preprint arXiv:1609.03499 (2016)."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330884"},{"key":"e_1_3_2_1_32_1","volume-title":"Learning representations by back-propagating errors. nature","author":"Rumelhart David E","year":"1986","unstructured":"David E Rumelhart , Geoffrey E Hinton , and Ronald J Williams . 1986. Learning representations by back-propagating errors. nature , Vol. 323 , 6088 ( 1986 ), 533--536. David E Rumelhart, Geoffrey E Hinton, and Ronald J Williams. 1986. Learning representations by back-propagating errors. nature, Vol. 323, 6088 (1986), 533--536."},{"key":"e_1_3_2_1_33_1","volume-title":"international conference on machine learning. PMLR, 1670--1679","author":"Schnabel Tobias","year":"2016","unstructured":"Tobias Schnabel , Adith Swaminathan , Ashudeep Singh , Navin Chandak , and Thorsten Joachims . 2016 . Recommendations as treatments: Debiasing learning and evaluation . In international conference on machine learning. PMLR, 1670--1679 . Tobias Schnabel, Adith Swaminathan, Ashudeep Singh, Navin Chandak, and Thorsten Joachims. 2016. Recommendations as treatments: Debiasing learning and evaluation. In international conference on machine learning. PMLR, 1670--1679."},{"key":"e_1_3_2_1_34_1","volume-title":"Outrageously large neural networks: The sparsely-gated mixture-of-experts layer. arXiv preprint arXiv:1701.06538","author":"Shazeer Noam","year":"2017","unstructured":"Noam Shazeer , Azalia Mirhoseini , Krzysztof Maziarz , Andy Davis , Quoc Le , Geoffrey Hinton , and Jeff Dean . 2017. Outrageously large neural networks: The sparsely-gated mixture-of-experts layer. arXiv preprint arXiv:1701.06538 ( 2017 ). Noam Shazeer, Azalia Mirhoseini, Krzysztof Maziarz, Andy Davis, Quoc Le, Geoffrey Hinton, and Jeff Dean. 2017. Outrageously large neural networks: The sparsely-gated mixture-of-experts layer. arXiv preprint arXiv:1701.06538 (2017)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i01.5438"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2016.0061"},{"key":"e_1_3_2_1_37_1","volume-title":"The deconfounded recommender: A causal inference approach to recommendation. arXiv preprint arXiv:1808.06581","author":"Wang Yixin","year":"2018","unstructured":"Yixin Wang , Dawen Liang , Laurent Charlin , and David M Blei . 2018. The deconfounded recommender: A causal inference approach to recommendation. arXiv preprint arXiv:1808.06581 ( 2018 ). Yixin Wang, Dawen Liang, Laurent Charlin, and David M Blei. 2018. The deconfounded recommender: A causal inference approach to recommendation. arXiv preprint arXiv:1808.06581 (2018)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.trc.2018.03.001"},{"key":"e_1_3_2_1_39_1","volume-title":"Graph wavenet for deep spatial-temporal graph modeling. arXiv preprint arXiv:1906.00121","author":"Wu Zonghan","year":"2019","unstructured":"Zonghan Wu , Shirui Pan , Guodong Long , Jing Jiang , and Chengqi Zhang . 2019. Graph wavenet for deep spatial-temporal graph modeling. arXiv preprint arXiv:1906.00121 ( 2019 ). Zonghan Wu, Shirui Pan, Guodong Long, Jing Jiang, and Chengqi Zhang. 2019. Graph wavenet for deep spatial-temporal graph modeling. arXiv preprint arXiv:1906.00121 (2019)."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2915364"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11280-022-01045-y"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2015.2405556"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33015668"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11836"},{"key":"e_1_3_2_1_45_1","volume-title":"Deep learning on traffic prediction: Methods, analysis and future directions","author":"Yin Xueyan","year":"2021","unstructured":"Xueyan Yin , Genze Wu , Jinze Wei , Yanming Shen , Heng Qi , and Baocai Yin . 2021. Deep learning on traffic prediction: Methods, analysis and future directions . IEEE Transactions on Intelligent Transportation Systems ( 2021 ). Xueyan Yin, Genze Wu, Jinze Wei, Yanming Shen, Heng Qi, and Baocai Yin. 2021. Deep learning on traffic prediction: Methods, analysis and future directions. IEEE Transactions on Intelligent Transportation Systems (2021)."},{"key":"e_1_3_2_1_46_1","volume-title":"Spatio-temporal graph convolutional networks: A deep learning framework for traffic forecasting. arXiv preprint arXiv:1709.04875","author":"Yu Bing","year":"2017","unstructured":"Bing Yu , Haoteng Yin , and Zhanxing Zhu . 2017. Spatio-temporal graph convolutional networks: A deep learning framework for traffic forecasting. arXiv preprint arXiv:1709.04875 ( 2017 ). Bing Yu, Haoteng Yin, and Zhanxing Zhu. 2017. Spatio-temporal graph convolutional networks: A deep learning framework for traffic forecasting. arXiv preprint arXiv:1709.04875 (2017)."},{"key":"e_1_3_2_1_47_1","volume-title":"Multi-scale context aggregation by dilated convolutions. arXiv preprint arXiv:1511.07122","author":"Yu Fisher","year":"2015","unstructured":"Fisher Yu and Vladlen Koltun . 2015. Multi-scale context aggregation by dilated convolutions. arXiv preprint arXiv:1511.07122 ( 2015 ). Fisher Yu and Vladlen Koltun. 2015. Multi-scale context aggregation by dilated convolutions. arXiv preprint arXiv:1511.07122 (2015)."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.5555\/3298239.3298479"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i01.5477"}],"event":{"name":"CIKM '23: The 32nd ACM International Conference on Information and Knowledge Management","location":"Birmingham United Kingdom","acronym":"CIKM '23","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 32nd ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3583780.3615068","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3583780.3615068","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:36:56Z","timestamp":1750178216000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3583780.3615068"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,21]]},"references-count":49,"alternative-id":["10.1145\/3583780.3615068","10.1145\/3583780"],"URL":"https:\/\/doi.org\/10.1145\/3583780.3615068","relation":{},"subject":[],"published":{"date-parts":[[2023,10,21]]},"assertion":[{"value":"2023-10-21","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}