{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,13]],"date-time":"2026-05-13T18:08:05Z","timestamp":1778695685429,"version":"3.51.4"},"reference-count":21,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,5,25]]},"DOI":"10.23919\/acc50511.2021.9482807","type":"proceedings-article","created":{"date-parts":[[2021,7,28]],"date-time":"2021-07-28T20:29:16Z","timestamp":1627504156000},"page":"2581-2586","source":"Crossref","is-referenced-by-count":9,"title":["Reinforcement Learning-Based Fed-Batch Optimization with Reaction Surrogate Model"],"prefix":"10.23919","author":[{"given":"Yan","family":"Ma","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhenyu","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ivan","family":"Castillo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ricardo","family":"Rendall","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rahul","family":"Bindlish","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Brian","family":"Ashcraft","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"David","family":"Bentley","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Michael G.","family":"Benton","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jose A.","family":"Romagnoli","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Leo H.","family":"Chiang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-7012(00)00201-3"},{"key":"ref11","first-page":"953","article-title":"Artificial neural networks for small dataset analysis","volume":"7","author":"pasini","year":"2015","journal-title":"Thoracic Diseases"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref13","first-page":"265","article-title":"Tensorflow: A system for large-scale machine learning","author":"abadi","year":"0","journal-title":"12th USENIX Symposium on Operating Systems Design and Implementation ( OSDI 16)"},{"key":"ref14","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref15","author":"mnih","year":"2013","journal-title":"Playing atari with deep reinforcement learning"},{"key":"ref16","author":"schulman","year":"2017","journal-title":"Proximal policy optimization algorithms"},{"key":"ref17","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref18","author":"raffin","year":"2019","journal-title":"Stable baselines3"},{"key":"ref19","author":"paszke","year":"2017","journal-title":"Automatic differentiation in pytorch"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1039\/C7ME00131B"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/j.compchemeng.2019.106649"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1021\/acscentsci.7b00492"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1016\/j.compchemeng.2020.107016"},{"key":"ref7","author":"langlois","year":"2019","journal-title":"Benchmarking model-based reinforcement learning"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.jprocont.2018.11.004"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/s12293-012-0075-1"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2020.3012947"},{"key":"ref20","author":"haarnoja","year":"2018","journal-title":"Soft actor-critic Off-policy maximum entropy deep reinforcement learning with a stochastic actor"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W17-5518"}],"event":{"name":"2021 American Control Conference (ACC)","location":"New Orleans, LA, USA","start":{"date-parts":[[2021,5,25]]},"end":{"date-parts":[[2021,5,28]]}},"container-title":["2021 American Control Conference (ACC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9482409\/9482614\/09482807.pdf?arnumber=9482807","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,10,6]],"date-time":"2021-10-06T10:50:13Z","timestamp":1633517413000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9482807\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,5,25]]},"references-count":21,"URL":"https:\/\/doi.org\/10.23919\/acc50511.2021.9482807","relation":{},"subject":[],"published":{"date-parts":[[2021,5,25]]}}}