{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,21]],"date-time":"2026-03-21T01:26:18Z","timestamp":1774056378064,"version":"3.50.1"},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"23","license":[{"start":{"date-parts":[[2023,9,23]],"date-time":"2023-09-23T00:00:00Z","timestamp":1695427200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,9,23]],"date-time":"2023-09-23T00:00:00Z","timestamp":1695427200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100016252","name":"Research Services and Knowledge Transfer Office, University of Macau","doi-asserted-by":"publisher","award":["MYRG2022-00162-FST"],"award-info":[{"award-number":["MYRG2022-00162-FST"]}],"id":[{"id":"10.13039\/501100016252","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100016252","name":"Research Services and Knowledge Transfer Office, University of Macau","doi-asserted-by":"publisher","award":["MYRG2019-00136-FST"],"award-info":[{"award-number":["MYRG2019-00136-FST"]}],"id":[{"id":"10.13039\/501100016252","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2023,12]]},"DOI":"10.1007\/s10489-023-04959-w","type":"journal-article","created":{"date-parts":[[2023,9,23]],"date-time":"2023-09-23T08:02:26Z","timestamp":1695456146000},"page":"28186-28206","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Transaction-aware inverse reinforcement learning for trading in stock markets"],"prefix":"10.1007","volume":"53","author":[{"given":"Qizhou","family":"Sun","sequence":"first","affiliation":[]},{"given":"Xueyuan","family":"Gong","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8468-6182","authenticated-orcid":false,"given":"Yain-Whar","family":"Si","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,9,23]]},"reference":[{"key":"4959_CR1","doi-asserted-by":"publisher","DOI":"10.1002\/9781118630006","volume-title":"Quantitative Finance","author":"MC Mariani","year":"2019","unstructured":"Mariani MC, Florescu I (2019) Quantitative Finance. John Wiley & Sons, London"},{"key":"4959_CR2","unstructured":"Sutton RS (2020) Sutton & barto book: reinforcement learning: an introduction. In: A Bradford Book. MIT Press Cambridge, MA, London"},{"key":"4959_CR3","doi-asserted-by":"crossref","unstructured":"Liu X-Y, Yang H, Gao J, Wang CD (2021) Finrl: deep reinforcement learning framework to automate trading in quantitative finance. In: Proceedings of the second ACM international conference on AI in finance, pp 1\u20139","DOI":"10.1145\/3490354.3494366"},{"key":"4959_CR4","doi-asserted-by":"crossref","unstructured":"Li Z, Liu X-Y, Zheng J, Wang Z, Walid A, Guo J (2021) Finrl-podracer: high performance and scalable deep reinforcement learning for quantitative finance. In: Proceedings of the second ACM international conference on AI in finance, pp 1\u20139","DOI":"10.1145\/3490354.3494413"},{"key":"4959_CR5","doi-asserted-by":"publisher","first-page":"142","DOI":"10.1016\/j.ins.2020.05.066","volume":"538","author":"X Wu","year":"2020","unstructured":"Wu X, Chen H, Wang J, Troiano L, Loia V, Fujita H (2020) Adaptive stock trading strategies with deep reinforcement learning methods. Information Sciences 538:142\u2013158. https:\/\/doi.org\/10.1016\/j.ins.2020.05.066","journal-title":"Information Sciences"},{"key":"4959_CR6","doi-asserted-by":"publisher","first-page":"253","DOI":"10.1613\/jair.3912","volume":"47","author":"MG Bellemare","year":"2013","unstructured":"Bellemare MG, Naddaf Y, Veness J, Bowling M (2013) The arcade learning environment: An evaluation platform for general agents. Journal of Artificial Intelligence Research 47:253\u2013279","journal-title":"Journal of Artificial Intelligence Research"},{"issue":"7540","key":"4959_CR7","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Rusu AA, Veness J, Bellemare MG, Graves A, Riedmiller M, Fidjeland AK, Ostrovski G et al (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533","journal-title":"Nature"},{"key":"4959_CR8","unstructured":"Chou P-W, Maturana D, Scherer S (2017) Improving stochastic policy gradients in continuous control with deep reinforcement learning using the beta distribution. In: International conference on machine learning, PMLR pp 834\u2013843"},{"key":"4959_CR9","doi-asserted-by":"crossref","unstructured":"Hessel M, Modayil J, Van Hasselt H, Schaul T, Ostrovski G, Dabney W, Horgan D, Piot B, Azar M, Silver D (2018) Rainbow: combining improvements in deep reinforcement learning. In: Thirty-second AAAI conference on artificial intelligence","DOI":"10.1609\/aaai.v32i1.11796"},{"issue":"2","key":"4959_CR10","doi-asserted-by":"publisher","first-page":"610","DOI":"10.1109\/LRA.2019.2891991","volume":"4","author":"F Niroui","year":"2019","unstructured":"Niroui F, Zhang K, Kashino Z, Nejat G (2019) Deep reinforcement learning robot for search and rescue applications: Exploration in unknown cluttered environments. IEEE Robotics and Automation Letters 4(2):610\u2013617","journal-title":"IEEE Robotics and Automation Letters"},{"key":"4959_CR11","doi-asserted-by":"crossref","unstructured":"Marchesini E, Farinelli A (2022) Enhancing deep reinforcement learning approaches for multi-robot navigation via single-robot evolutionary policy search. In: 2022 international conference on robotics and automation (ICRA), IEEE pp 5525\u20135531","DOI":"10.1109\/ICRA46639.2022.9812341"},{"key":"4959_CR12","doi-asserted-by":"publisher","unstructured":"Nguyen H, La H (2019) Review of deep reinforcement learning for robot manipulation. In: 2019 third IEEE international conference on robotic computing (IRC), pp 590\u2013595. https:\/\/doi.org\/10.1109\/IRC.2019.00120","DOI":"10.1109\/IRC.2019.00120"},{"key":"4959_CR13","unstructured":"Ng AY, Russell S et al (2000) Algorithms for inverse reinforcement learning. In: ICML, vol 1, p 2"},{"key":"4959_CR14","volume-title":"Introduction to Machine Learning","author":"E Alpaydin","year":"2020","unstructured":"Alpaydin E (2020) Introduction to Machine Learning. MIT press, US"},{"key":"4959_CR15","doi-asserted-by":"publisher","first-page":"388","DOI":"10.1016\/j.eswa.2018.07.056","volume":"114","author":"SY Yang","year":"2018","unstructured":"Yang SY, Yu Y, Almahdi S (2018) An investor sentiment reward-based trading system using gaussian inverse reinforcement learning algorithm. Expert Systems with Applications 114:388\u2013401","journal-title":"Expert Systems with Applications"},{"key":"4959_CR16","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2022.108543","volume":"125","author":"W Zhang","year":"2022","unstructured":"Zhang W, Zhang N, Yan J, Li G, Yang X (2022) Auto uning of price prediction models for high-frequency trading via reinforcement learning. Pattern Recogn 125:108543","journal-title":"Pattern Recogn"},{"key":"4959_CR17","unstructured":"Hausknecht M, Stone P (2015) Deep recurrent q-learning for partially observable mdps. In: 2015 Aaai fall symposium series"},{"key":"4959_CR18","doi-asserted-by":"crossref","unstructured":"Wang Y, He H, Tan X (2020) Truly proximal policy optimization. In: Uncertainty in artificial intelligence, PMLR pp 113\u2013 122","DOI":"10.32604\/jai.2020.010137"},{"key":"4959_CR19","unstructured":"Haarnoja T, Zhou A, Abbeel P, Levine S (2018) Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor, PMLR 1861\u20131870"},{"key":"4959_CR20","doi-asserted-by":"publisher","DOI":"10.4324\/9781315115719","volume-title":"Technical Analysis of Stock Trends","author":"RD Edwards","year":"2018","unstructured":"Edwards RD, Magee J, Bassetti WC (2018) Technical Analysis of Stock Trends. CRC Press, UK"},{"key":"4959_CR21","unstructured":"Wang Z, Schaul T, Hessel M, Hasselt H, Lanctot M, Freitas N (2016) Dueling network architectures for deep reinforcement learning. In: International conference on machine learning, PMLR pp 1995\u20132003"},{"key":"4959_CR22","doi-asserted-by":"crossref","unstructured":"Van Hasselt H, Guez A, Silver D (2016) Deep reinforcement learning with double q-learning. In: Proceedings of the AAAI conference on artificial intelligence, vol 30","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"4959_CR23","first-page":"351","volume":"21","author":"J Chung","year":"2013","unstructured":"Chung J (2013) Playing atari with deep reinforcement learning. Comput Ence 21:351\u2013362","journal-title":"Comput Ence"},{"key":"4959_CR24","first-page":"1433","volume-title":"Aaai","author":"BD Ziebart","year":"2008","unstructured":"Ziebart BD, Maas AL, Bagnell JA, Dey AK et al (2008) Maximum entropy inverse reinforcement learning. Aaai, vol 8. IL, USA, Chicago, pp 1433\u20131438"},{"key":"4959_CR25","unstructured":"Hadfield-Menell D, Russell SJ, Abbeel P, Dragan A (2016) Cooperative inverse reinforcement learning. Advances in Neural Information Processing Systems 29"},{"key":"4959_CR26","doi-asserted-by":"crossref","unstructured":"Herman M, Fischer V, Gindele T, Burgard W (2015) Inverse reinforcement learning of behavioral models for online-adapting navigation strategies. In: 2015 IEEE international conference on robotics and automation (ICRA), IEEE pp 3215\u20133222","DOI":"10.1109\/ICRA.2015.7139642"},{"key":"4959_CR27","doi-asserted-by":"crossref","unstructured":"Zhifei S, Joo EM (2012) A review of inverse reinforcement learning theory and recent advances. In: 2012 IEEE congress on evolutionary computation, IEEE pp 1\u20138","DOI":"10.1109\/CEC.2012.6256507"},{"key":"4959_CR28","unstructured":"Audiffren J, Valko M, Lazaric A, Ghavamzadeh M (2015) Maximum entropy semi-supervised inverse reinforcement learning. In: Twenty- fourth international joint conference on artificial intelligence"},{"key":"4959_CR29","doi-asserted-by":"crossref","unstructured":"Zhifei S, Joo EM (2012) A survey of inverse reinforcement learning techniques. International Journal of Intelligent Computing and Cybernetics","DOI":"10.1108\/17563781211255862"},{"key":"4959_CR30","unstructured":"Mnih V, Badia AP, Mirza M, Graves A, Lillicrap T, Harley T, Silver D, Kavukcuoglu K (2016) Asynchronous methods for deep reinforcement learning. In: International conference on machine learning, PMLR pp 1928\u20131937"},{"key":"4959_CR31","unstructured":"Wu Y, Mansimov E, Grosse RB, Liao S, Ba J (2017) Scalable trustregion method for deep reinforcement learning using kronecker-factored approximation. Advances in Neural Information Processing Systems 30"},{"key":"4959_CR32","unstructured":"Ho J, Ermon S (2016) Generative adversarial imitation learning. Advances in Neural Information Processing Systems 29"},{"issue":"10","key":"4959_CR33","doi-asserted-by":"publisher","first-page":"1683","DOI":"10.1080\/14697688.2015.1011684","volume":"15","author":"SY Yang","year":"2015","unstructured":"Yang SY, Qiao Q, Beling PA, Scherer WT, Kirilenko AA (2015) Gaussian process-based algorithmic trading strategy identification. Quantitative Finance 15(10):1683\u20131703","journal-title":"Quantitative Finance"},{"key":"4959_CR34","unstructured":"Baiynd A-M (2011) The trading book: a complete solution to mastering technical systems and trading psychology. McGraw Hill Professional, NewYork"},{"key":"4959_CR35","doi-asserted-by":"publisher","first-page":"2128","DOI":"10.1609\/aaai.v34i02.5587","volume":"34","author":"Y Liu","year":"2020","unstructured":"Liu Y, Liu Q, Zhao H, Pan Z, Liu C (2020) Adaptive quantitative trading: An imitative deep reinforcement learning approach. Proceedings of the AAAI Conference on Artificial Intelligence 34:2128\u20132135","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"4959_CR36","unstructured":"Finn C, Levine S, Abbeel P (2016) Guided cost learning: Deep inverse optimal control via policy optimization. In: International conference on machine learning, PMLR pp 49\u201358"},{"key":"4959_CR37","volume-title":"Practical Time Series Analysis: Prediction with Statistics and Machine Learning","author":"A Nielsen","year":"2019","unstructured":"Nielsen A (2019) Practical Time Series Analysis: Prediction with Statistics and Machine Learning. O\u2019 Reilly Media, London"},{"key":"4959_CR38","unstructured":"Pascanu R, \u00c7aglar G, Cho K, Bengio Y (2014) How to construct deep recurrent neural networks. CoRR arXiv:1312.6026"},{"key":"4959_CR39","doi-asserted-by":"publisher","first-page":"108014","DOI":"10.1109\/ACCESS.2019.2932789","volume":"7","author":"Y Li","year":"2019","unstructured":"Li Y, Zheng W, Zheng Z (2019) Deep robust reinforcement learning for practical algorithmic trading. IEEE Access 7:108014\u2013108022","journal-title":"IEEE Access"},{"key":"4959_CR40","doi-asserted-by":"crossref","unstructured":"Abbeel P, Ng AY (2004) Apprenticeship learning via inverse reinforcement learning. In: Proceedings of the twenty-first international conference on machine learning, p 1","DOI":"10.1145\/1015330.1015430"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-023-04959-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-023-04959-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-023-04959-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,29]],"date-time":"2023-11-29T14:12:02Z","timestamp":1701267122000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-023-04959-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,9,23]]},"references-count":40,"journal-issue":{"issue":"23","published-print":{"date-parts":[[2023,12]]}},"alternative-id":["4959"],"URL":"https:\/\/doi.org\/10.1007\/s10489-023-04959-w","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,9,23]]},"assertion":[{"value":"8 August 2023","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 September 2023","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}