{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,16]],"date-time":"2026-05-16T02:11:04Z","timestamp":1778897464074,"version":"3.51.4"},"publisher-location":"Singapore","reference-count":30,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819784868","type":"print"},{"value":"9789819784875","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,4]],"date-time":"2024-11-04T00:00:00Z","timestamp":1730678400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,4]],"date-time":"2024-11-04T00:00:00Z","timestamp":1730678400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-97-8487-5_3","type":"book-chapter","created":{"date-parts":[[2024,11,3]],"date-time":"2024-11-03T07:02:19Z","timestamp":1730617339000},"page":"34-49","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Generalizing Soft Actor-Critic Algorithms to Discrete Action Spaces"],"prefix":"10.1007","author":[{"given":"Le","family":"Zhang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yong","family":"Gu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xin","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yanshuo","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shu","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yifei","family":"Jin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xinxin","family":"Wu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,11,4]]},"reference":[{"key":"3_CR1","first-page":"29304","volume":"34","author":"R Agarwal","year":"2021","unstructured":"Agarwal, R., Schwarzer, M., Castro, P.S., Courville, A.C., Bellemare, M.: Deep reinforcement learning at the edge of the statistical precipice. Adv. Neural. Inf. Process. Syst. 34, 29304\u201329320 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"3_CR2","doi-asserted-by":"crossref","unstructured":"Chen, X., He, K.: Exploring simple siamese representation learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 15750\u201315758 (2021)","DOI":"10.1109\/CVPR46437.2021.01549"},{"key":"3_CR3","unstructured":"Christodoulou, P.: Soft actor-critic for discrete action settings. arXiv:1910.07207 (2019)"},{"key":"3_CR4","unstructured":"D\u2019Oro, P., Schwarzer, M., Nikishin, E., Bacon, P.L., Bellemare, M.G., Courville, A.: Sample-efficient reinforcement learning by breaking the replay ratio barrier. In: The Eleventh International Conference on Learning Representations (2022)"},{"key":"3_CR5","unstructured":"Espeholt, L., Soyer, H., Munos, R., Simonyan, K., Mnih, V., Ward, T., Doron, Y., Firoiu, V., Harley, T., Dunning, I., et\u00a0al.: Impala: Scalable distributed deep-rl with importance weighted actor-learner architectures. In: International Conference on Machine Learning, pp. 1407\u20131416. PMLR (2018)"},{"key":"3_CR6","unstructured":"Gruslys, A., Dabney, W., Azar, M.G., Piot, B., Bellemare, M., Munos, R.: The reactor: A fast and sample-efficient actor-critic agent for reinforcement learning. In: International Conference on Learning Representations (2018). https:\/\/openreview.net\/forum?id=rkHVZWZAZ"},{"key":"3_CR7","unstructured":"Haarnoja, T., Tang, H., Abbeel, P., Levine, S.: Reinforcement learning with deep energy-based policies. In: International Conference on Machine Learning, pp. 1352\u20131361. PMLR (2017)"},{"key":"3_CR8","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., Levine, S.: Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: International Conference on Machine Learning, pp. 1861\u20131870. PMLR (2018)"},{"key":"3_CR9","doi-asserted-by":"crossref","unstructured":"Hessel, M., Modayil, J., Van\u00a0Hasselt, H., Schaul, T., Ostrovski, G., Dabney, W., Horgan, D., Piot, B., Azar, M., Silver, D.: Rainbow: combining improvements in deep reinforcement learning. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a032 (2018)","DOI":"10.1609\/aaai.v32i1.11796"},{"key":"3_CR10","unstructured":"\u0141ukasz Kaiser, Babaeizadeh, M., Mi\u0142os, P., Osi\u0144ski, B., Campbell, R.H., Czechowski, K., Erhan, D., Finn, C., Kozakowski, P., Levine, S., Mohiuddin, A., Sepassi, R., Tucker, G., Michalewski, H.: Model based reinforcement learning for atari. In: International Conference on Learning Representations (2020). https:\/\/openreview.net\/forum?id=S1xCPJHtDB"},{"key":"3_CR11","unstructured":"Kielak, K.P.: Do recent advancements in model-based deep reinforcement learning really improve data efficiency?, p. 9 (2020). https:\/\/openreview.net\/forum (2019)"},{"key":"3_CR12","unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational bayes. arXiv:1312.6114 (2013)"},{"key":"3_CR13","unstructured":"Laskin, M., Srinivas, A., Abbeel, P.: Curl: Contrastive unsupervised representations for reinforcement learning. In: International Conference on Machine Learning, pp. 5639\u20135650. PMLR (2020)"},{"key":"3_CR14","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. In: International Conference on Learning Representations (2019). https:\/\/openreview.net\/forum?id=Bkg6RiCqY7"},{"key":"3_CR15","unstructured":"Micheli, V., Alonso, E., Fleuret, F.: Transformers are sample-efficient world models. In: The Eleventh International Conference on Learning Representations (2023). https:\/\/openreview.net\/forum?id=vhFu1Acb0xb"},{"key":"3_CR16","unstructured":"Mnih, V., Badia, A.P., Mirza, M., Graves, A., Lillicrap, T., Harley, T., Silver, D., Kavukcuoglu, K.: Asynchronous methods for deep reinforcement learning. In: International Conference on Machine Learning, pp. 1928\u20131937. PMLR (2016)"},{"issue":"7540","key":"3_CR17","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Rusu, A.A., Veness, J., Bellemare, M.G., Graves, A., Riedmiller, M., Fidjeland, A.K., Ostrovski, G., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","journal-title":"Nature"},{"key":"3_CR18","unstructured":"Nikishin, E., Schwarzer, M., D\u2019Oro, P., Bacon, P.L., Courville, A.: The primacy bias in deep reinforcement learning. In: International Conference on Machine Learning, pp. 16828\u201316847. PMLR (2022)"},{"issue":"7839","key":"3_CR19","doi-asserted-by":"publisher","first-page":"604","DOI":"10.1038\/s41586-020-03051-4","volume":"588","author":"J Schrittwieser","year":"2020","unstructured":"Schrittwieser, J., Antonoglou, I., Hubert, T., Simonyan, K., Sifre, L., Schmitt, S., Guez, A., Lockhart, E., Hassabis, D., Graepel, T., et al.: Mastering atari, go, chess and shogi by planning with a learned model. Nature 588(7839), 604\u2013609 (2020)","journal-title":"Nature"},{"key":"3_CR20","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. arXiv:1707.06347 (2017)"},{"key":"3_CR21","unstructured":"Schwarzer, M., Anand, A., Goel, R., Hjelm, R.D., Courville, A., Bachman, P.: Data-efficient reinforcement learning with self-predictive representations. In: International Conference on Learning Representations (2020)"},{"key":"3_CR22","unstructured":"Schwarzer, M., Ceron, J.S.O., Courville, A., Bellemare, M.G., Agarwal, R., Castro, P.S.: Bigger, better, faster: Human-level atari with human-level efficiency. In: International Conference on Machine Learning, pp. 30365\u201330380. PMLR (2023)"},{"key":"3_CR23","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT press (2018)"},{"key":"3_CR24","unstructured":"Van\u00a0Hasselt, H.P., Hessel, M., Aslanides, J.: When to use parametric models in reinforcement learning? Advances in Neural Information Processing Systems 32 (2019)"},{"key":"3_CR25","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. Advances in Neural Information Processing Systems 30 (2017)"},{"key":"3_CR26","unstructured":"Wang, Z., Bapst, V., Heess, N., Mnih, V., Munos, R., Kavukcuoglu, K., de\u00a0Freitas, N.: Sample efficient actor-critic with experience replay. In: International Conference on Learning Representations (2017). https:\/\/openreview.net\/forum?id=HyM25Mqel"},{"key":"3_CR27","unstructured":"Xu, M., Quiroz, M., Kohn, R., Sisson, S.A.: Variance reduction properties of the reparameterization trick. In: Chaudhuri, K., Sugiyama, M. (eds.) Proceedings of the Twenty-Second International Conference on Artificial Intelligence and Statistics. Proceedings of Machine Learning Research, vol.\u00a089, pp. 2711\u20132720. PMLR. Accessed 16\u201318 April 2019. https:\/\/proceedings.mlr.press\/v89\/xu19a.html"},{"key":"3_CR28","unstructured":"Xu, Y., Hu, D., Liang, L., McAleer, S.M., Abbeel, P., Fox, R.: Target entropy annealing for discrete soft actor-critic. In: Deep RL Workshop NeurIPS (2021)"},{"key":"3_CR29","unstructured":"Yarats, D., Kostrikov, I., Fergus, R.: Image augmentation is all you need: Regularizing deep reinforcement learning from pixels. In: International Conference on Learning Representations (2020)"},{"key":"3_CR30","first-page":"25476","volume":"34","author":"W Ye","year":"2021","unstructured":"Ye, W., Liu, S., Kurutach, T., Abbeel, P., Gao, Y.: Mastering atari games with limited data. Adv. Neural. Inf. Process. Syst. 34, 25476\u201325488 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-8487-5_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,3]],"date-time":"2024-11-03T07:05:43Z","timestamp":1730617543000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-8487-5_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,4]]},"ISBN":["9789819784868","9789819784875"],"references-count":30,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-8487-5_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,4]]},"assertion":[{"value":"4 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Urumqi","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2024.prcv.cn\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}