{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,9]],"date-time":"2025-11-09T17:56:22Z","timestamp":1762710982611},"reference-count":28,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2020,4,20]],"date-time":"2020-04-20T00:00:00Z","timestamp":1587340800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,4,20]],"date-time":"2020-04-20T00:00:00Z","timestamp":1587340800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["World Wide Web"],"published-print":{"date-parts":[[2020,7]]},"DOI":"10.1007\/s11280-020-00804-z","type":"journal-article","created":{"date-parts":[[2020,4,20]],"date-time":"2020-04-20T02:02:25Z","timestamp":1587348145000},"page":"2491-2511","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":17,"title":["Rebalancing the car-sharing system with reinforcement learning"],"prefix":"10.1007","volume":"23","author":[{"given":"Changwei","family":"Ren","sequence":"first","affiliation":[]},{"given":"Lixingjian","family":"An","sequence":"additional","affiliation":[]},{"given":"Zhanquan","family":"Gu","sequence":"additional","affiliation":[]},{"given":"Yuexuan","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Yunjun","family":"Gao","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,4,20]]},"reference":[{"key":"804_CR1","unstructured":"Andre, D., Russell, S.J.: State abstraction for programmable reinforcement learning agents[C]. AAAI\/IAAI, pp. 119\u2013125 (2002)"},{"key":"804_CR2","doi-asserted-by":"crossref","unstructured":"Cai, Q., Filos-Ratsikas, A., Tang, P., et al.: Reinforcement Mechanism Design for e-commerce[C]. Proceedings of the 2018 World Wide Web Conference. International World Wide Web Conferences Steering Committee, pp. 1339\u20131348 (2018)","DOI":"10.1145\/3178876.3186039"},{"key":"804_CR3","unstructured":"Chemla, D., Meunier, F., Pradeau, T., Calvo, R.W., Yahiaoui, H.: Self-service bike sharing systems: simulation, repositioning pricing (2013)"},{"key":"804_CR4","unstructured":"Dayan, P., Hinton, G.E.: Feudal reinforcement learning[C]\/\/Advances in neural information processing systems, pp. 271\u2013278 (1993)"},{"key":"804_CR5","first-page":"3","volume":"2","author":"T Dean","year":"1995","unstructured":"Dean, T., Lin, S.H.: Decomposition techniques for planning in stochastic domains[C]. IJCAI 2, 3 (1995)","journal-title":"IJCAI"},{"key":"804_CR6","first-page":"118","volume":"98","author":"TG Dietterich","year":"1998","unstructured":"Dietterich, T.G.: The MAXQ Method for Hierarchical Reinforcement Learning[C]. ICML 98, 118\u2013126 (1998)","journal-title":"ICML"},{"key":"804_CR7","doi-asserted-by":"publisher","first-page":"227","DOI":"10.1613\/jair.639","volume":"13","author":"TG Dietterich","year":"2000","unstructured":"Dietterich, T.G.: Hierarchical reinforcement learning with the MAXQ value function decomposition[J]. J. Artif. Intell. Res. 13, 227\u2013303 (2000)","journal-title":"J. Artif. Intell. Res."},{"issue":"3","key":"804_CR8","doi-asserted-by":"publisher","first-page":"261","DOI":"10.1007\/s13676-014-0053-5","volume":"5","author":"C Fricker","year":"2016","unstructured":"Fricker, C., Gast, N.: Incentives and redistribution in homogeneous bike-sharing systems with stations of finite capacity. Euro J. Transp. Logist. 5(3), 261\u2013291 (2016)","journal-title":"Euro J. Transp. Logist."},{"key":"804_CR9","unstructured":"Ghosh, S., Trick, M., Varakantham, P.: Robust Repositioning to Counter Unpredictable Demand in Bike Sharing Systems. In: Proceedings of the Twenty-Fifth International Joint Conference on Artificial Intelligence (IJCAI\u201916), pp. 3096\u20133102. AAAI Press. http:\/\/dl.acm.org\/citation.cfm?id=3061053.3061055 (2016)"},{"issue":"8","key":"804_CR10","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory[J]. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"804_CR11","first-page":"167","volume":"951","author":"LP Kaelbling","year":"1993","unstructured":"Kaelbling, L.P.: Hierarchical learning in stochastic domains: Preliminary results[C]. Proc. Tenth Int. Conf. Mach. Learn. 951, 167\u2013173 (1993)","journal-title":"Proc. Tenth Int. Conf. Mach. Learn."},{"key":"804_CR12","unstructured":"Kingma, D.P., Ba, J.: Adam: A method for stochastic optimization. arXiv:1412.6980 (2014)"},{"issue":"6","key":"804_CR13","first-page":"A187","volume":"8","author":"TP Lillicrap","year":"2016","unstructured":"Lillicrap, T.P., Hunt, J.J., Pritzel, A., et al.: Continuous control with deep reinforcement learning. Comput. Sci. 8(6), A187 (2016)","journal-title":"Comput. Sci."},{"key":"804_CR14","doi-asserted-by":"crossref","unstructured":"Li, Y., Yu, Z., Yang, Q.: Dynamic Bike Reposition: A SpatioTemporal Reinforcement Learning Approach. In: Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pp. 1724\u20131733. ACM (2018)","DOI":"10.1145\/3219819.3220110"},{"key":"804_CR15","doi-asserted-by":"crossref","unstructured":"Liu, J., Sun, L., Chen, W., Xiong, H.: Rebalancing Bike Sharing Systems: A Multi-source Data Smart Optimization. In: Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pp. 1005\u20131014. ACM (2016)","DOI":"10.1145\/2939672.2939776"},{"key":"804_CR16","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., et al.: Playing Atari with deep reinforcement learning. Proceedings of Workshops at the 26th Neural Information Processing Systems 2013. Lake Tahoe, pp. 201\u2013220 (2013)"},{"issue":"7540","key":"804_CR17","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","journal-title":"Nature"},{"issue":"8","key":"804_CR18","first-page":"1064","volume":"46","author":"W Ning","year":"2018","unstructured":"Ning, W., Wenjian, Z., Xiang, L., Jing, Z.: Inter-Site-Vehicle Artificial scheduling strategy design for electric vehicle Sharing[J]. J. Tongji Univ. (Nat. Sci.) 46 (8), 1064\u20131071 (2018)","journal-title":"J. Tongji Univ. (Nat. Sci.)"},{"issue":"5","key":"804_CR19","first-page":"12","volume":"18","author":"W Ning","year":"2018","unstructured":"Ning, W., Yajing, S., Linhao, T., WenJian, Z.: Adaptive Scheduling Strategy in Car-sharing System Based on Feedback Dynamic Pricing. J. Transp. Syst. Eng. Inf. Technol. 18(5), 12\u201317 (2018)","journal-title":"J. Transp. Syst. Eng. Inf. Technol."},{"key":"804_CR20","doi-asserted-by":"crossref","unstructured":"O\u2019Mahony, E., Shmoys, D.B: Data analysis and optimization for (citi) bike sharing. In: AAAI, pp. 687\u2013694 (2015)","DOI":"10.1609\/aaai.v29i1.9245"},{"key":"804_CR21","doi-asserted-by":"crossref","unstructured":"Pan, L., Cai, Q., Fang, Z., et al.: A Deep Reinforcement Learning Framework for Rebalancing Dockless Bike Sharing Systems[J]. arXiv:1802.04592(2018)","DOI":"10.1609\/aaai.v33i01.33011393"},{"key":"804_CR22","unstructured":"Sergey, I., Szegedy, C.: Batch normalization: Accelerating deep network training by reducing internal covariate shift. arXiv:1502.03167 (2015)"},{"key":"804_CR23","unstructured":"Silver, D., Lever, G., Hess, N., et al.: Deterministic policy gradient algorithms. Proceedings of the International Conference on Machine Learning. Beijing, pp. 387\u2013395 (2014)"},{"key":"804_CR24","volume-title":"Reinforcement learning: an Introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, AG.: Reinforcement learning: an Introduction. MIT Press, Cambridge (1998)"},{"key":"804_CR25","unstructured":"Sutton, R.S., McAllister, D.A., Singh, S.P., et al.: Policy gradient methods for reinforcement learning with function approximation. Proceedings of the Advances in Neural Information Processing Systems, Denver, pp. 1057\u20131063 (1999)"},{"key":"804_CR26","doi-asserted-by":"crossref","unstructured":"Singla, A., Santoni, M., ok, Gabor B., Mukerji, P., Meenen, M., Krause, A.: Incentivizing users for balancing bike sharing systems. In: AAAI, pp. 723\u2013729, Austin, Texas (2015)","DOI":"10.1609\/aaai.v29i1.9251"},{"key":"804_CR27","unstructured":"Van Seijen, H., et al.: Hybrid reward architecture for reinforcement learning. Advances in Neural Information Processing Systems (2017)"},{"issue":"4","key":"804_CR28","first-page":"233","volume":"15","author":"CJCH Watkins","year":"1989","unstructured":"Watkins, C.J.C.H.: Learning from delayed rewards. Robot. Auton. Syst. 15(4), 233\u2013235 (1989)","journal-title":"Robot. Auton. Syst."}],"container-title":["World Wide Web"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11280-020-00804-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11280-020-00804-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11280-020-00804-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,21]],"date-time":"2022-10-21T19:39:05Z","timestamp":1666381145000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11280-020-00804-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,4,20]]},"references-count":28,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2020,7]]}},"alternative-id":["804"],"URL":"https:\/\/doi.org\/10.1007\/s11280-020-00804-z","relation":{},"ISSN":["1386-145X","1573-1413"],"issn-type":[{"value":"1386-145X","type":"print"},{"value":"1573-1413","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,4,20]]},"assertion":[{"value":"30 August 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 January 2020","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 February 2020","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 April 2020","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}