{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T19:45:45Z","timestamp":1781552745719,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":15,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,8,14]],"date-time":"2022-08-14T00:00:00Z","timestamp":1660435200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,8,14]]},"DOI":"10.1145\/3534678.3539095","type":"proceedings-article","created":{"date-parts":[[2022,8,12]],"date-time":"2022-08-12T19:06:41Z","timestamp":1660331201000},"page":"3838-3848","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":22,"title":["Reinforcement Learning in the Wild: Scalable RL Dispatching Algorithm Deployed in Ridehailing Marketplace"],"prefix":"10.1145","author":[{"given":"Soheil","family":"Sadeghi Eshkevari","sequence":"first","affiliation":[{"name":"DiDi Labs, Mountain View, CA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xiaocheng","family":"Tang","sequence":"additional","affiliation":[{"name":"DiDi Labs, Mountain View, CA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zhiwei","family":"Qin","sequence":"additional","affiliation":[{"name":"DiDi Labs, Mountain View, CA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jinhan","family":"Mei","sequence":"additional","affiliation":[{"name":"DiDi Chuxing, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Cheng","family":"Zhang","sequence":"additional","affiliation":[{"name":"DiDi Chuxing, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Qianying","family":"Meng","sequence":"additional","affiliation":[{"name":"DiDi Chuxing, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jia","family":"Xu","sequence":"additional","affiliation":[{"name":"DiDi Chuxing, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2022,8,14]]},"reference":[{"key":"e_1_3_2_1_2_1","volume-title":"Maximum weight online matching with deadlines. arXiv preprint arXiv:1808.03526","author":"Ashlagi Itai","year":"2018","unstructured":"Itai Ashlagi, Maximilien Burq, Chinmoy Dutta, Patrick Jaillet, Amin Saberi, and Chris Sholley. 2018. Maximum weight online matching with deadlines. arXiv preprint arXiv:1808.03526 (2018)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1287\/msom.2018.0707"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.06.014"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.trc.2021.103289"},{"key":"e_1_3_2_1_6_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma Diederik P","year":"2014","unstructured":"Diederik P Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_1_7_1","volume-title":"2nd PASCAL Challenges Workshop","volume":"2","author":"Kocsis Levente","year":"2006","unstructured":"Levente Kocsis and Csaba Szepesv\u00e1ri. 2006. Discounted ucb. In 2nd PASCAL Challenges Workshop, Vol. 2."},{"key":"e_1_3_2_1_8_1","volume-title":"The Hungarian method for the assignment problem. Naval research logistics quarterly 2, 1--2","author":"Kuhn Harold W","year":"1955","unstructured":"Harold W Kuhn. 1955. The Hungarian method for the assignment problem. Naval research logistics quarterly 2, 1--2 (1955), 83--97."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/1772690.1772758"},{"key":"e_1_3_2_1_10_1","unstructured":"Hamid Reza Maei. 2011. Gradient temporal-difference learning algorithms. (2011)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1287\/stsy.2019.0037"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.tre.2017.08.003"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330724"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447548.3467096"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-018-0095-1"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1002\/nav.21872"}],"event":{"name":"KDD '22: The 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","location":"Washington DC USA","acronym":"KDD '22","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"]},"container-title":["Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3534678.3539095","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3534678.3539095","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T18:09:51Z","timestamp":1750183791000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3534678.3539095"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,8,14]]},"references-count":15,"alternative-id":["10.1145\/3534678.3539095","10.1145\/3534678"],"URL":"https:\/\/doi.org\/10.1145\/3534678.3539095","relation":{},"subject":[],"published":{"date-parts":[[2022,8,14]]},"assertion":[{"value":"2022-08-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}