{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T16:47:02Z","timestamp":1758041222575,"version":"3.44.0"},"publisher-location":"Cham","reference-count":10,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783032045577"},{"type":"electronic","value":"9783032045584"}],"license":[{"start":{"date-parts":[[2025,9,12]],"date-time":"2025-09-12T00:00:00Z","timestamp":1757635200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,12]],"date-time":"2025-09-12T00:00:00Z","timestamp":1757635200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-04558-4_8","type":"book-chapter","created":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T11:16:53Z","timestamp":1757589413000},"page":"93-104","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Learning to\u00a0Optimize Entropy in\u00a0the\u00a0Soft Actor-Critic"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-2451-8804","authenticated-orcid":false,"given":"Zhilei","family":"Zhou","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1521-0671","authenticated-orcid":false,"given":"Malcolm I.","family":"Heywood","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,9,12]]},"reference":[{"issue":"5","key":"8_CR1","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TSMC.1983.6313077","volume":"13","author":"AG Barto","year":"1983","unstructured":"Barto, A.G., Sutton, R.S., Anderson, C.W.: Neuronlike adaptive elements that can solve difficult learning control problems. IEEE Trans. Syst. Man Cybern. 13(5), 834\u2013846 (1983)","journal-title":"IEEE Trans. Syst. Man Cybern."},{"issue":"1","key":"8_CR2","doi-asserted-by":"publisher","first-page":"40","DOI":"10.1109\/TSMC.2020.3041775","volume":"51","author":"AG Barto","year":"2021","unstructured":"Barto, A.G., Sutton, R.S., Anderson, C.W.: Looking back on the actor-critic architecture. IEEE Trans. Syst. Man Cybern. Syst. 51(1), 40\u201350 (2021)","journal-title":"IEEE Trans. Syst. Man Cybern. Syst."},{"key":"8_CR3","unstructured":"Fujimoto, S., van Hoof, H., Meger, D.: Addressing function approximation error in actor-critic methods. In: Dy, J.G., Krause, A. (eds.) Proceedings of the 35th International Conference on Machine Learning, ICML 2018, Stockholmsm\u00e4ssan, Stockholm, Sweden, 10\u201315 July 2018. Proceedings of Machine Learning Research, vol.\u00a080, pp. 1582\u20131591. PMLR (2018)"},{"key":"8_CR4","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., Levine, S.: Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: Dy, J.G., Krause, A. (eds.) Proceedings of the International Conference on Machine Learning. Proceedings of Machine Learning Research, vol.\u00a080, pp. 1856\u20131865. PMLR (2018)"},{"key":"8_CR5","unstructured":"Haarnoja, T., et al.: Soft actor-critic algorithms and applications. CoRR arxiv:1812.05905 (2018)"},{"key":"8_CR6","unstructured":"Huang, S., et al.: Open RL benchmark: comprehensive tracked experiments for reinforcement learning. CoRR arxiv:2402.03046 (2024)"},{"key":"8_CR7","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. In: Bengio, Y., LeCun, Y. (eds.) International Conference on Learning Representations (2015)"},{"key":"8_CR8","unstructured":"Mania, H., Guy, A., Recht, B.: Simple random search of static linear policies is competitive for reinforcement learning. In: Bengio, S., Wallach, H.M., Larochelle, H., Grauman, K., Cesa-Bianchi, N., Garnett, R. (eds.) Advances in Neural Information Processing Systems, vol. 31. pp. 1805\u20131814 (2018)"},{"key":"8_CR9","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement learning - an introduction. In: Adaptive Computation and Machine Learning, 2nd edn. MIT Press (2018)"},{"key":"8_CR10","unstructured":"Wang, Y., Ni, T.: Meta-sac: auto-tune the entropy temperature of soft actor-critic via metagradient. CoRR arxiv:2007.01932 (2020)"}],"container-title":["Lecture Notes in Computer Science","Artificial Neural Networks and Machine Learning \u2013 ICANN 2025"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-04558-4_8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T11:16:58Z","timestamp":1757589418000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-04558-4_8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,12]]},"ISBN":["9783032045577","9783032045584"],"references-count":10,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-04558-4_8","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025,9,12]]},"assertion":[{"value":"12 September 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICANN","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Artificial Neural Networks","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Kaunas","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Lithuania","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"34","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icann2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/e-nns.org\/icann2025\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}