{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,9]],"date-time":"2026-01-09T15:58:34Z","timestamp":1767974314864,"version":"3.49.0"},"reference-count":39,"publisher":"Springer Science and Business Media LLC","issue":"13","license":[{"start":{"date-parts":[[2022,12,17]],"date-time":"2022-12-17T00:00:00Z","timestamp":1671235200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,12,17]],"date-time":"2022-12-17T00:00:00Z","timestamp":1671235200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100004733","name":"Universidade de Macau","doi-asserted-by":"publisher","award":["MYRG2019-00136-FST"],"award-info":[{"award-number":["MYRG2019-00136-FST"]}],"id":[{"id":"10.13039\/501100004733","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2023,7]]},"DOI":"10.1007\/s10489-022-04322-5","type":"journal-article","created":{"date-parts":[[2022,12,17]],"date-time":"2022-12-17T11:03:37Z","timestamp":1671275017000},"page":"16875-16892","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["Supervised actor-critic reinforcement learning with action feedback for algorithmic trading"],"prefix":"10.1007","volume":"53","author":[{"given":"Qizhou","family":"Sun","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8468-6182","authenticated-orcid":false,"given":"Yain-Whar","family":"Si","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,12,17]]},"reference":[{"key":"4322_CR1","doi-asserted-by":"crossref","unstructured":"Liu X-Y, Yang H, Chen Q, Zhang R, Yang L, Xiao B, Wang CD (2020) FinRL: a deep reinforcement learning library for automated stock trading in quantitative finance. In: Deep RL workshop, NeurIPS 2020","DOI":"10.2139\/ssrn.3737859"},{"key":"4322_CR2","doi-asserted-by":"crossref","unstructured":"Liu X-Y, Yang H, Gao J, Wang CD (2021) finRL: deep reinforcement learning framework to automate trading in quantitative finance. In: ACM international conference on AI in finance (ICAIf)","DOI":"10.1145\/3490354.3494366"},{"issue":"7540","key":"4322_CR3","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Rusu AA, Veness J, Bellemare MG, Graves A, Riedmiller M, Fidjeland AK, Ostrovski G et al (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533","journal-title":"Nature"},{"issue":"7676","key":"4322_CR4","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1038\/nature24270","volume":"550","author":"D Silver","year":"2017","unstructured":"Silver D, Schrittwieser J, Simonyan K, Antonoglou I, Huang A, Guez A, Hubert T, Baker L, Lai M, Bolton A et al (2017) Mastering the game of go without human knowledge. Nature 550 (7676):354\u2013359","journal-title":"Nature"},{"key":"4322_CR5","doi-asserted-by":"crossref","unstructured":"Yang H, Liu X-Y, Zhong S, Walid A (2020) Deep reinforcement learning for automated stock trading: an ensemble strategy. In: Proceedings of the first ACM international conference on AI in finance, pp 1\u20138","DOI":"10.1145\/3383455.3422540"},{"key":"4322_CR6","doi-asserted-by":"publisher","first-page":"128","DOI":"10.1016\/j.future.2021.04.018","volume":"123","author":"Y Zhang","year":"2021","unstructured":"Zhang Y, Zhou Y, Lu H, Fujita H (2021) Cooperative multi-agent actor\u2013critic control of traffic network flow based on edge computing. Futur Gener Comput Syst 123:128\u2013141. https:\/\/doi.org\/10.1016\/j.future.2021.04.018","journal-title":"Futur Gener Comput Syst"},{"issue":"7","key":"4322_CR7","doi-asserted-by":"publisher","first-page":"7708","DOI":"10.1109\/TITS.2021.3071862","volume":"23","author":"Y Zhang","year":"2022","unstructured":"Zhang Y, Zhou Y, Lu H, Fujita H (2022) Spark cloud-based parallel computing for traffic network flow predictive control using non-analytical predictive model. IEEE Trans Intell Transp Syst 23(7):7708\u20137720. https:\/\/doi.org\/10.1109\/TITS.2021.3071862","journal-title":"IEEE Trans Intell Transp Syst"},{"key":"4322_CR8","doi-asserted-by":"publisher","first-page":"708","DOI":"10.1016\/j.ins.2021.04.088","volume":"570","author":"M Shang","year":"2021","unstructured":"Shang M, Zhou Y, Fujita H (2021) Deep reinforcement learning with reference system to handle constraints for energy-efficient train control. Inf Sci 570:708\u2013721. https:\/\/doi.org\/10.1016\/j.ins.2021.04.088","journal-title":"Inf Sci"},{"key":"4322_CR9","doi-asserted-by":"crossref","unstructured":"Van Hasselt H, Guez A, Silver D (2016) Deep reinforcement learning with double q-learning. In: Proceedings of the AAAI conference on artificial intelligence, vol 30.","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"4322_CR10","doi-asserted-by":"crossref","unstructured":"Wang Y, He H, Tan X (2020) Truly proximal policy optimization. In: Uncertainty in artificial intelligence, PMLR, pp 113\u2013122","DOI":"10.32604\/jai.2020.010137"},{"key":"4322_CR11","unstructured":"Lillicrap TP, Hunt JJ, Pritzel A, Heess N, Erez T, Tassa Y, Silver D, Wierstra D (2016) Continuous control with deep reinforcement learning ICLR (Poster)2016"},{"key":"4322_CR12","unstructured":"Fujimoto S, Hoof H, Meger D (2018) Addressing function approximation error in actor-critic methods. In: International conference on machine learning, PMLR, pp 1587\u20131596"},{"key":"4322_CR13","unstructured":"Bellemare MG, Dabney W, R\u00e9mi M (2017) A distributional perspective on reinforcement learning. In: International conference on machine learning, PMLR, pp 449\u2013458"},{"key":"4322_CR14","doi-asserted-by":"publisher","first-page":"253","DOI":"10.1613\/jair.3912","volume":"47","author":"MG Bellemare","year":"2013","unstructured":"Bellemare MG, Naddaf Y, Veness J, Bowling M (2013) The arcade learning environment: an evaluation platform for general agents. J Artif Intell Res 47:253\u2013279","journal-title":"J Artif Intell Res"},{"key":"4322_CR15","doi-asserted-by":"publisher","first-page":"253","DOI":"10.1146\/annurev-control-042920-092451","volume":"5","author":"H Kurniawati","year":"2022","unstructured":"Kurniawati H (2022) Partially observable markov decision processes and robotics. AnnRev Control Robot Auton Syst 5:253\u2013277","journal-title":"AnnRev Control Robot Auton Syst"},{"key":"4322_CR16","doi-asserted-by":"publisher","first-page":"142","DOI":"10.1016\/j.ins.2020.05.066","volume":"538","author":"X Wu","year":"2020","unstructured":"Wu X, Chen H, Wang J, Troiano L, Loia V, Fujita H (2020) Adaptive stock trading strategies with deep reinforcement learning methods. Inf Sci 538:142\u2013158. https:\/\/doi.org\/10.1016\/j.ins.2020.05.066","journal-title":"Inf Sci"},{"key":"4322_CR17","doi-asserted-by":"crossref","unstructured":"Graves A (2012) Long short-term memory. In: Supervised sequence labelling with recurrent neural networks. Springer, Berlin, pp 37\u201345","DOI":"10.1007\/978-3-642-24797-2_4"},{"issue":"1","key":"4322_CR18","doi-asserted-by":"publisher","first-page":"219","DOI":"10.1109\/TSTE.2016.2593713","volume":"8","author":"V Mohan","year":"2016","unstructured":"Mohan V, Singh JG, Ongsakul W (2016) Sortino ratio based portfolio optimization considering evs and renewable energy in microgrid power market. IEEE Trans Sustain Energ 8(1):219\u2013229","journal-title":"IEEE Trans Sustain Energ"},{"issue":"2","key":"4322_CR19","doi-asserted-by":"publisher","first-page":"96","DOI":"10.3390\/jrfm12020096","volume":"12","author":"D Vezeris","year":"2019","unstructured":"Vezeris D, Karkanis I, Kyrgos T (2019) Adturtle: an advanced turtle trading system. J Risk Financ Manag 12(2):96","journal-title":"J Risk Financ Manag"},{"key":"4322_CR20","unstructured":"Liang E, Liaw R, Nishihara R, Moritz P, Fox R, Goldberg K, Gonzalez J, Jordan M, Stoica I (2018) Rllib: abstractions for distributed reinforcement learning. In: International conference on machine learning, PMLR, pp 3053\u20133062"},{"key":"4322_CR21","unstructured":"Moritz P, Nishihara R, Wang S, Tumanov A, Liaw R, Liang E, Elibol M, Yang Z, Paul W, Jordan MI et al (2018) Ray: a distributed framework for emerging {AI} applications. In: 13Th {USENIX} symposium on operating systems design and implementation ({OSDI} 18), pp 561\u2013577"},{"issue":"6","key":"4322_CR22","doi-asserted-by":"publisher","first-page":"26","DOI":"10.1109\/MSP.2017.2743240","volume":"34","author":"K Arulkumaran","year":"2017","unstructured":"Arulkumaran K, Deisenroth MP, Brundage M, Bharath AA (2017) Deep reinforcement learning: a brief survey. IEEE Signal Proc Mag 34(6):26\u201338","journal-title":"IEEE Signal Proc Mag"},{"key":"4322_CR23","unstructured":"Sutton RS, Barto A, et al. (2020) Introduction to reinforcement learning, vol 135. MIT press Cambridge"},{"key":"4322_CR24","doi-asserted-by":"publisher","unstructured":"Sewak M (2019) Deep Reinforcement Learning.,1st Edition, Springer Singapore. https:\/\/doi.org\/10.1007\/978-981-13-8285-7","DOI":"10.1007\/978-981-13-8285-7"},{"key":"4322_CR25","unstructured":"Mnih V, Badia AP, Mirza M, Graves A, Lillicrap T, Harley T, Silver D, Kavukcuoglu K (2016) Asynchronous methods for deep reinforcement learning. In: International conference on machine learning, PMLR, pp 1928\u20131937"},{"key":"4322_CR26","unstructured":"Duan Y, Chen X, Houthooft R, Schulman J, Abbeel P (2016) Benchmarking deep reinforcement learning for continuous control. In: International conference on machine learning, PMLR, pp 1329\u20131338"},{"key":"4322_CR27","doi-asserted-by":"crossref","unstructured":"Henderson P, Islam R, Bachman P, Pineau J, Precup D, Meger D (2018) Deep reinforcement learning that matters. In: Proceedings of the AAAI conference on artificial intelligence, vol 32.","DOI":"10.1609\/aaai.v32i1.11694"},{"key":"4322_CR28","doi-asserted-by":"crossref","unstructured":"Clouse JA, Utgoff PE (1992) A teaching method for reinforcement learning. In: Machine learning proceedings 1992, Elsevier, pp 92\u2013101","DOI":"10.1016\/B978-1-55860-247-2.50017-6"},{"issue":"3-4","key":"4322_CR29","doi-asserted-by":"publisher","first-page":"283","DOI":"10.1016\/S0921-8890(97)00043-2","volume":"22","author":"H Benbrahim","year":"1997","unstructured":"Benbrahim H, Franklin JA (1997) Biped dynamic walking using reinforcement learning. Robot Auton Syst 22(3-4):283\u2013302","journal-title":"Robot Auton Syst"},{"key":"4322_CR30","unstructured":"Rosenstein MT, Barto A, Si J, Barto A, Powell W, Wunsch D (2004) Supervised actor-critic reinforcement learning. In: Learning and approximate dynamic programming: scaling up to the real world, pp 359\u2013380"},{"key":"4322_CR31","doi-asserted-by":"crossref","unstructured":"Wang L, Zhang W, He X, Zha H (2018) Supervised reinforcement learning with recurrent neural network for dynamic treatment recommendation. In: Proceedings of the 24th ACM SIGKDD international conference on knowledge discovery & data mining, pp 2447\u20132456","DOI":"10.1145\/3219819.3219961"},{"key":"4322_CR32","unstructured":"Rockefeller B (2019) Technical analysis for dummies john wiley & sons"},{"key":"4322_CR33","doi-asserted-by":"publisher","unstructured":"Edwards RD, Magee J, Bassetti WHC (2018) Technical Analysis of Stock Trends (11th ed.). CRC Press. https:\/\/doi.org\/10.4324\/9781315115719","DOI":"10.4324\/9781315115719"},{"key":"4322_CR34","doi-asserted-by":"publisher","DOI":"10.1201\/9781315273600","volume-title":"Introduction to stochastic processes","author":"GF Lawler","year":"2018","unstructured":"Lawler GF (2018) Introduction to stochastic processes. Chapman and hall\/CRC, London"},{"key":"4322_CR35","unstructured":"Paszke A, Gross S, Massa F, Lerer A, Bradbury J, Chanan G, Killeen T, Lin Z, Gimelshein N, Antiga L, Desmaison A, Kopf A, Yang E, DeVito Z, Raison M, Tejani A, Chilamkurthy S, Steiner B, Fang L, Bai J, Chintala S (2019) Pytorch: an imperative style, high-performan ce deep learning library. In: Advances in neural information processing systems 32, Curran Associates, Inc., pp 8024\u20138035. http:\/\/papers.neurips.cc\/paper\/9015-pytorch-an-imperative-style-high-performance-deep-learning-library.pdfhttp:\/\/papers.neurips.cc\/paper\/9015-pytorch-an-imperative-style-high-performance-deep-learning-library.pdf"},{"issue":"77","key":"4322_CR36","first-page":"1","volume":"22","author":"Y Fujita","year":"2021","unstructured":"Fujita Y, Nagarajan P, Kataoka T, Ishikawa T (2021) Chainerrl: a deep reinforcement learning library. J Mach Learn Res 22(77):1\u201314","journal-title":"J Mach Learn Res"},{"key":"4322_CR37","doi-asserted-by":"crossref","unstructured":"Tokui S, Okuta R, Akiba T, Niitani Y, Ogawa T, Saito S, Suzuki S, Uenishi K, Vogel B, Yamazaki Vincent H (2019) Chainer: a deep learning framework for accelerating the research cycle. In: Proceedings of the 25th ACM SIGKDD international conference on knowledge discovery & data mining, ACM, pp 2002\u2013 2011","DOI":"10.1145\/3292500.3330756"},{"key":"4322_CR38","volume-title":"Neural networks and deep learning, vol 25","author":"MA Nielsen","year":"2015","unstructured":"Nielsen MA (2015) Neural networks and deep learning, vol 25. Determination press, San Francisco"},{"key":"4322_CR39","unstructured":"Haarnoja T, Zhou A, Abbeel P, Levine S (2018) Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: PMLR, pp 1861\u20131870"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-022-04322-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-022-04322-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-022-04322-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,1]],"date-time":"2023-07-01T05:11:55Z","timestamp":1688188315000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-022-04322-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,12,17]]},"references-count":39,"journal-issue":{"issue":"13","published-print":{"date-parts":[[2023,7]]}},"alternative-id":["4322"],"URL":"https:\/\/doi.org\/10.1007\/s10489-022-04322-5","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,12,17]]},"assertion":[{"value":"3 November 2022","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 December 2022","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no competing interests as defined by Springer, or other interests that might be perceived to influence the results and\/or discussion reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"<!--Emphasis Type='Bold' removed-->Competing interests"}}]}}