{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,17]],"date-time":"2026-03-17T20:14:38Z","timestamp":1773778478309,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":61,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,8,24]],"date-time":"2024-08-24T00:00:00Z","timestamp":1724457600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["No.62102110, No.92370204"],"award-info":[{"award-number":["No.62102110, No.92370204"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"National Key R&D Program of China","award":["No.2023YFF0725004"],"award-info":[{"award-number":["No.2023YFF0725004"]}]},{"name":"Guangzhou Basic and Applied Basic Research Program","award":["No.2024A04J3279"],"award-info":[{"award-number":["No.2024A04J3279"]}]},{"name":"Education Bureau of Guangzhou Municipality"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,8,25]]},"DOI":"10.1145\/3637528.3671507","type":"proceedings-article","created":{"date-parts":[[2024,8,25]],"date-time":"2024-08-25T04:55:12Z","timestamp":1724561712000},"page":"5206-5217","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":17,"title":["Interpretable Cascading Mixture-of-Experts for Urban Traffic Congestion Prediction"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-1081-8684","authenticated-orcid":false,"given":"Wenzhao","family":"Jiang","sequence":"first","affiliation":[{"name":"The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, Guangdong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1542-6149","authenticated-orcid":false,"given":"Jindong","family":"Han","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4271-1567","authenticated-orcid":false,"given":"Hao","family":"Liu","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology (Guangzhou) &amp; The Hong Kong University of Science and Technology, Guangzhou, Guangdong, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-1378-3422","authenticated-orcid":false,"given":"Tao","family":"Tao","sequence":"additional","affiliation":[{"name":"Didichuxing Co. Ltd, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-4687-5212","authenticated-orcid":false,"given":"Naiqiang","family":"Tan","sequence":"additional","affiliation":[{"name":"Didichuxing Co. Ltd, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6016-6465","authenticated-orcid":false,"given":"Hui","family":"Xiong","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology (Guangzhou) &amp; The Hong Kong University of Science and Technology, Guangzhou, Guangdong, China"}]}],"member":"320","published-online":{"date-parts":[[2024,8,24]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1155\/2021\/8878011"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.804"},{"key":"e_1_3_2_2_3_1","unstructured":"Lei Bai Lina Yao Can Li Xianzhi Wang and Can Wang. 2020. Adaptive Graph Convolutional Recurrent Network for Traffic Forecasting. In Advances in Neural Information Processing Systems 33."},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2018.2835523"},{"key":"e_1_3_2_2_5_1","volume-title":"BA-GNN: On Learning Bias-Aware Graph Neural Network. In 2022 IEEE 38th International Conference on Data Engineering. 3012--3024","author":"Chen Zhengyu","year":"2022","unstructured":"Zhengyu Chen, Teng Xiao, and Kun Kuang. 2022. BA-GNN: On Learning Bias-Aware Graph Neural Network. In 2022 IEEE 38th International Conference on Data Engineering. 3012--3024."},{"key":"e_1_3_2_2_6_1","volume-title":"DeepTransport: Learning Spatial-Temporal Dependency for Traffic Condition Forecasting. In 2018 International Joint Conference on Neural Networks. 1--8.","author":"Cheng Xingyi","year":"2018","unstructured":"Xingyi Cheng, Ruiqing Zhang, Jie Zhou, and Wei Xu. 2018. DeepTransport: Learning Spatial-Temporal Dependency for Traffic Condition Forecasting. In 2018 International Joint Conference on Neural Networks. 1--8."},{"key":"e_1_3_2_2_7_1","volume-title":"KyungHyun Cho, and Yoshua Bengio.","author":"Chung Junyoung","year":"2014","unstructured":"Junyoung Chung, cCaglar G\u00fclccehre, KyungHyun Cho, and Yoshua Bengio. 2014. Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling. CoRR, Vol. abs\/1412.3555 (2014)."},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-00296-0"},{"key":"e_1_3_2_2_9_1","volume-title":"Proceedings of the 27th ACM SIGKDD Conference on Knowledge Discovery & Data Mining. 269--278","author":"Deng Jinliang","unstructured":"Jinliang Deng, Xiusi Chen, Renhe Jiang, Xuan Song, and Ivor W. Tsang. 2021. ST-Norm: Spatial and Temporal Normalization for Multi-variate Time Series Forecasting. In Proceedings of the 27th ACM SIGKDD Conference on Knowledge Discovery & Data Mining. 269--278."},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/MDM.2019.00-45"},{"key":"e_1_3_2_2_11_1","volume-title":"Soft Labels for Ordinal Regression. In 2019 IEEE Conference on Computer Vision and Pattern Recognition. 4738--4747","author":"Diaz Raul","year":"2019","unstructured":"Raul Diaz and Amit Marathe. 2019. Soft Labels for Ordinal Regression. In 2019 IEEE Conference on Computer Vision and Pattern Recognition. 4738--4747."},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISC2.2015.7366151"},{"key":"e_1_3_2_2_13_1","volume-title":"STWave: A Multi-Scale Efficient Spectral Graph Attention Network With Long-Term Trends for Disentangled Traffic Flow Forecasting","author":"Fang Yuchen","year":"2023","unstructured":"Yuchen Fang, Yanjun Qin, Haiyong Luo, Fang Zhao, and Kai Zheng. 2023. STWave: A Multi-Scale Efficient Spectral Graph Attention Network With Long-Term Trends for Disentangled Traffic Flow Forecasting. IEEE Transactions on Knowledge and Data Engineering (2023)."},{"key":"e_1_3_2_2_14_1","volume-title":"A Review of Sparse Expert Models in Deep Learning. CoRR","author":"Fedus William","year":"2022","unstructured":"William Fedus, Jeff Dean, and Barret Zoph. 2022. A Review of Sparse Expert Models in Deep Learning. CoRR, Vol. abs\/2209.01667 (2022)."},{"key":"e_1_3_2_2_15_1","article-title":"Switch Transformers: Scaling to Trillion Parameter Models with Simple and Efficient Sparsity","volume":"23","author":"Fedus William","year":"2022","unstructured":"William Fedus, Barret Zoph, and Noam Shazeer. 2022. Switch Transformers: Scaling to Trillion Parameter Models with Simple and Efficient Sparsity. Journal of Machine Learning Research, Vol. 23 (2022), 120:1--120:39.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_2_16_1","volume-title":"Proceedings of the 14th International Conference on Artificial Intelligence and Statistics","volume":"15","author":"Glorot Xavier","year":"2011","unstructured":"Xavier Glorot, Antoine Bordes, and Yoshua Bengio. 2011. Deep Sparse Rectifier Neural Networks. In Proceedings of the 14th International Conference on Artificial Intelligence and Statistics, Vol. 15. 315--323."},{"key":"e_1_3_2_2_17_1","volume-title":"Proceedings of the 39th International Conference on Machine Learning","volume":"162","author":"Gong Yu","year":"2022","unstructured":"Yu Gong, Greg Mori, and Frederick Tung. 2022. RankSim: Ranking Similarity Regularization for Deep Imbalanced Regression. In Proceedings of the 39th International Conference on Machine Learning, Vol. 162. 7634--7649."},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2021.3056502"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599842"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.14778\/3641204.3641217"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1137\/1031129"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC.2016.7795635"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1991.3.1.79"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i12.26669"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.3141\/2595-12"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.trc.2021.103432"},{"key":"e_1_3_2_2_27_1","volume-title":"GShard: Scaling Giant Models with Conditional Computation and Automatic Sharding. In 9th International Conference on Learning Representations.","author":"Lepikhin Dmitry","year":"2021","unstructured":"Dmitry Lepikhin, HyoukJoong Lee, Yuanzhong Xu, Dehao Chen, Orhan Firat, Yanping Huang, Maxim Krikun, Noam Shazeer, and Zhifeng Chen. 2021. GShard: Scaling Giant Models with Conditional Computation and Automatic Sharding. In 9th International Conference on Learning Representations."},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-016-3474-3"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3589132.3625612"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3583780.3615068"},{"key":"e_1_3_2_2_31_1","volume-title":"Diffusion Convolutional Recurrent Neural Network: Data-Driven Traffic Forecasting. In 6th International Conference on Learning Representations.","author":"Li Yaguang","year":"2018","unstructured":"Yaguang Li, Rose Yu, Cyrus Shahabi, and Yan Liu. 2018. Diffusion Convolutional Recurrent Neural Network: Data-Driven Traffic Forecasting. In 6th International Conference on Learning Representations."},{"key":"e_1_3_2_2_32_1","unstructured":"Fan Liu Hao Liu and Wenzhao Jiang. 2022. Practical Adversarial Attacks on Spatiotemporal Traffic Forecasting Models. In Advances in Neural Information Processing Systems 35. 19035--19047."},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3583780.3615160"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599925"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403281"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447548.3467350"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0119044"},{"key":"e_1_3_2_2_38_1","unstructured":"Basil Mustafa Carlos Riquelme Joan Puigcerver Rodolphe Jenatton and Neil Houlsby. 2022. Multimodal Contrastive Learning with LIMoE: the Language-Image Mixture of Experts. In Advances in Neural Information Processing Systems 35."},{"key":"e_1_3_2_2_39_1","volume-title":"10th International Conference on Learning Representations.","author":"Park Namuk","year":"2022","unstructured":"Namuk Park and Songkuk Kim. 2022. How Do Vision Transformers Work?. In 10th International Conference on Learning Representations."},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.trc.2014.02.007"},{"key":"e_1_3_2_2_41_1","volume-title":"Proceedings of the 39th International Conference on Machine Learning","volume":"162","author":"Rajbhandari Samyam","year":"2022","unstructured":"Samyam Rajbhandari, Conglong Li, Zhewei Yao, Minjia Zhang, Reza Yazdani Aminabadi, Ammar Ahmad Awan, Jeff Rasley, and Yuxiong He. 2022. DeepSpeed-MoE: Advancing Mixture-of-Experts Inference and Training to Power Next-Generation AI Scale. In Proceedings of the 39th International Conference on Machine Learning, Vol. 162. 18332--18346."},{"key":"e_1_3_2_2_42_1","volume-title":"Daniel Keysers, and Neil Houlsby.","author":"Riquelme Carlos","year":"2021","unstructured":"Carlos Riquelme, Joan Puigcerver, Basil Mustafa, Maxim Neumann, Rodolphe Jenatton, Andr\u00e9 Susano Pinto, Daniel Keysers, and Neil Houlsby. 2021. Scaling Vision with Sparse Mixture of Experts. In Advances in Neural Information Processing Systems 34. 8583--8595."},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-019-0048-x"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3511808.3557702"},{"key":"e_1_3_2_2_45_1","volume-title":"Outrageously Large Neural Networks: The Sparsely-Gated Mixture-of-Experts Layer. In 5th International Conference on Learning Representations.","author":"Shazeer Noam","year":"2017","unstructured":"Noam Shazeer, Azalia Mirhoseini, Krzysztof Maziarz, Andy Davis, Quoc V. Le, Geoffrey E. Hinton, and Jeff Dean. 2017. Outrageously Large Neural Networks: The Sparsely-Gated Mixture-of-Experts Layer. In 5th International Conference on Learning Representations."},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2021\/417"},{"key":"e_1_3_2_2_47_1","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan N. Gomez Lukasz Kaiser and Illia Polosukhin. 2017. Attention is All you Need. In Advances in Neural Information Processing Systems 30. 5998--6008."},{"key":"e_1_3_2_2_48_1","volume-title":"Graph Attention Networks. In 6th International Conference on Learning Representations.","author":"Velickovic Petar","year":"2018","unstructured":"Petar Velickovic, Guillem Cucurull, Arantxa Casanova, Adriana Romero, Pietro Li\u00f2, and Yoshua Bengio. 2018. Graph Attention Networks. In 6th International Conference on Learning Representations."},{"key":"e_1_3_2_2_49_1","unstructured":"Haotao Wang Ziyu Jiang Yuning You Yan Han Gaowen Liu Jayanth Srinivasa Ramana Kompella and Zhangyang Wang. 2023. Graph Mixture of Experts: Learning on Large-Scale Graphs with Explicit Diversity Modeling. In Advances in Neural Information Processing Systems 36."},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"publisher","DOI":"10.1007\/s44196-022-00177-3"},{"key":"e_1_3_2_2_51_1","doi-asserted-by":"publisher","DOI":"10.1287\/trsc.2021.1068"},{"key":"e_1_3_2_2_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403118"},{"key":"e_1_3_2_2_53_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/264"},{"key":"e_1_3_2_2_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3511808.3557151"},{"key":"e_1_3_2_2_55_1","volume-title":"Proceedings of the 38th International Conference on Machine Learning","volume":"139","author":"Yang Yuzhe","year":"2021","unstructured":"Yuzhe Yang, Kaiwen Zha, Ying-Cong Chen, Hao Wang, and Dina Katabi. 2021. Delving into Deep Imbalanced Regression. In Proceedings of the 38th International Conference on Machine Learning, Vol. 139. 11842--11851."},{"key":"e_1_3_2_2_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/2934664"},{"key":"e_1_3_2_2_57_1","volume-title":"Mixture of Weak & Strong Experts on Graphs. CoRR","author":"Zeng Hanqing","year":"2023","unstructured":"Hanqing Zeng, Hanjia Lyu, Diyi Hu, Yinglong Xia, and Jiebo Luo. 2023. Mixture of Weak & Strong Experts on Graphs. CoRR, Vol. abs\/2311.05185 (2023)."},{"key":"e_1_3_2_2_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447548.3467187"},{"key":"e_1_3_2_2_59_1","volume-title":"Robust Mixture-of-Expert Training for Convolutional Neural Networks. In 2023 IEEE\/CVF International Conference on Computer Vision. 90--101","author":"Zhang Yihua","year":"2023","unstructured":"Yihua Zhang, Ruisi Cai, Tianlong Chen, Guanhua Zhang, Huan Zhang, Pin-Yu Chen, Shiyu Chang, Zhangyang Wang, and Sijia Liu. 2023. Robust Mixture-of-Expert Training for Convolutional Neural Networks. In 2023 IEEE\/CVF International Conference on Computer Vision. 90--101."},{"key":"e_1_3_2_2_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/2629592"},{"key":"e_1_3_2_2_61_1","volume-title":"Designing Effective Sparse Expert Models. CoRR","author":"Zoph Barret","year":"2022","unstructured":"Barret Zoph, Irwan Bello, Sameer Kumar, Nan Du, Yanping Huang, Jeff Dean, Noam Shazeer, and William Fedus. 2022. Designing Effective Sparse Expert Models. CoRR, Vol. abs\/2202.08906 (2022)."}],"event":{"name":"KDD '24: The 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","location":"Barcelona Spain","acronym":"KDD '24","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"]},"container-title":["Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3637528.3671507","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3637528.3671507","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:04:18Z","timestamp":1750291458000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3637528.3671507"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,24]]},"references-count":61,"alternative-id":["10.1145\/3637528.3671507","10.1145\/3637528"],"URL":"https:\/\/doi.org\/10.1145\/3637528.3671507","relation":{},"subject":[],"published":{"date-parts":[[2024,8,24]]},"assertion":[{"value":"2024-08-24","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}