{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T15:44:33Z","timestamp":1772120673146,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":30,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,2,11]],"date-time":"2022-02-11T00:00:00Z","timestamp":1644537600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Huawei Noah's Ark Lab intern program","award":["1"],"award-info":[{"award-number":["1"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,2,11]]},"DOI":"10.1145\/3488560.3498535","type":"proceedings-article","created":{"date-parts":[[2022,2,15]],"date-time":"2022-02-15T21:42:57Z","timestamp":1644961377000},"page":"1081-1089","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":15,"title":["Hierarchical Imitation Learning via Subgoal Representation Learning for Dynamic Treatment Recommendation"],"prefix":"10.1145","author":[{"given":"Lu","family":"Wang","sequence":"first","affiliation":[{"name":"East China Normal University, Shanghai, China"}]},{"given":"Ruiming","family":"Tang","sequence":"additional","affiliation":[{"name":"Huawei Noah's Ark Lab, Shenzhen, China"}]},{"given":"Xiaofeng","family":"He","sequence":"additional","affiliation":[{"name":"East China Normal University, Shanghai, China"}]},{"given":"Xiuqiang","family":"He","sequence":"additional","affiliation":[{"name":"Huawei Noah's Ark Lab, Shenzhen, Hong Kong"}]}],"member":"320","published-online":{"date-parts":[[2022,2,15]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"InProceedings of the twenty-first international conference on Machine learning. ACM, Sanjeevan Ahilan and Peter Dayan","author":"Apprenticeship","year":"2019","unstructured":"Apprenticeship learning via inversereinforcement learning. InProceedings of the twenty-first international conference on Machine learning. ACM, Sanjeevan Ahilan and Peter Dayan. 2019."},{"key":"e_1_3_2_2_2_1","unstructured":"Feudal Multi-Agent Hierarchies forCooperative Reinforcement Learning.arXiv preprint arXiv:1901.08492(2019)."},{"key":"e_1_3_2_2_3_1","unstructured":"Jacek M Bajor and Thomas A Lasko. 2016. Predicting medications from diagnosticcodes with recurrent neural networks. (2016)."},{"key":"e_1_3_2_2_4_1","unstructured":"Jacek M Bajor and Thomas A Lasko. 2017. Predicting Medications from DiagnosticCodes with Recurrent Neural Networks.ICLR(2017)."},{"key":"e_1_3_2_2_5_1","volume-title":"The use of reinforcement learningalgorithms to meet the challenges of an artificial pancreas.Expert review ofmedical devices10, 5","author":"Bothe Melanie K","year":"2013","unstructured":"Melanie K Bothe, Luke Dickens, Katrin Reichel, Arn Tellmann, Bjoern Ellger,Martin Westphal, and Ahmed A Faisal. 2013. The use of reinforcement learningalgorithms to meet the challenges of an artificial pancreas.Expert review ofmedical devices10, 5 (2013), 661--673."},{"key":"e_1_3_2_2_6_1","volume-title":"Dynamic treatment regimes.Annual review of statistics and its application1","author":"Chakraborty Bibhas","year":"2014","unstructured":"Bibhas Chakraborty and Susan A Murphy. 2014. Dynamic treatment regimes.Annual review of statistics and its application1 (2014), 447--464."},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"crossref","unstructured":"Thomas G Dietterich. 2000. Hierarchical reinforcement learning with the MAXQvalue function decomposition.JAIR(2000) 227--303.","DOI":"10.1613\/jair.639"},{"key":"e_1_3_2_2_8_1","volume-title":"Feature control as intrinsic motivation for hierarchical reinforcementlearning","author":"Dilokthanakul Nat","year":"2019","unstructured":"Nat Dilokthanakul, Christos Kaplanis, Nick Pawlowski, and Murray Shanahan.2019. Feature control as intrinsic motivation for hierarchical reinforcementlearning.IEEE transactions on neural networks and learning systems30, 11 (2019),3409--3418."},{"key":"e_1_3_2_2_9_1","unstructured":"Miroslav Dud\u00edk John Langford and Lihong Li. 2011. Doubly Robust PolicyEvaluation and Learning. InICML. 1097--1104."},{"key":"e_1_3_2_2_10_1","unstructured":"Zach Dwiel Madhavun Candadai Mariano Phielipp and Arjun K Bansal. 2019.Hierarchical policy learning is sensitive to goal space design.arXiv preprintarXiv:1905.01537(2019)."},{"key":"e_1_3_2_2_11_1","unstructured":"Chelsea Finn Sergey Levine and Pieter Abbeel. 2016. Guided cost learning: Deepinverse optimal control via policy optimization. InICML. 49--58."},{"key":"e_1_3_2_2_12_1","unstructured":"Dibya Ghosh Abhishek Gupta and Sergey Levine. 2018. Learning actionablerepresentations with goal-conditioned policies.arXiv preprint arXiv:1811.07819(2018)."},{"key":"e_1_3_2_2_13_1","unstructured":"Ian Goodfellow Jean Pouget-Abadie Mehdi Mirza Bing Xu David Warde-Farley Sherjil Ozair Aaron Courville and Yoshua Bengio. 2014. Generative adversarialnets. InAdvances in neural information processing systems. 2672--2680."},{"key":"e_1_3_2_2_14_1","unstructured":"Karol Hausman Yevgen Chebotar Stefan Schaal Gaurav Sukhatme and Joseph JLim. 2017. Multi-modal imitation learning from unstructured demonstrationsusing generative adversarial nets. InNIPS. 1235--1245."},{"key":"e_1_3_2_2_15_1","unstructured":"Jonathan Ho and Stefano Ermon. 2016. Generative adversarial imitation learning.InAdvances in neural information processing systems. 4565--4573."},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"crossref","unstructured":"Jianying Hu Adam Perer and Fei Wang. 2016. Data driven analytics for person-alized healthcare. InHealthcare Information Management Systems. 529--554.","DOI":"10.1007\/978-3-319-20765-0_31"},{"key":"e_1_3_2_2_17_1","unstructured":"Nan Jiang and Lihong Li. 2015. Doubly robust off-policy value evaluation forreinforcement learning.arXiv preprint arXiv:1511.03722(2015)."},{"key":"e_1_3_2_2_18_1","volume-title":"Leo Anthony Celi, andRoger G Mark","author":"Johnson Alistair EW","year":"2016","unstructured":"Alistair EW Johnson, Tom J Pollard, Lu Shen, H Lehman Li-wei, Mengling Feng,Mohammad Ghassemi, Benjamin Moody, Peter Szolovits, Leo Anthony Celi, andRoger G Mark. 2016. MIMIC-III, a freely accessible critical care database.Scientificdata3 (2016), 160035."},{"key":"e_1_3_2_2_19_1","volume-title":"CompILE:Compositional Imitation Learning and Execution. InInternational Conference onMachine Learning. 3418--3428","author":"Kipf Thomas","year":"2019","unstructured":"Thomas Kipf, Yujia Li, Hanjun Dai, Vinicius Zambaldi, Alvaro Sanchez-Gonzalez,Edward Grefenstette, Pushmeet Kohli, and Peter Battaglia. 2019. CompILE:Compositional Imitation Learning and Execution. InInternational Conference onMachine Learning. 3418--3428."},{"key":"e_1_3_2_2_20_1","volume-title":"The Artificial Intelligence Clinician learns optimal treatment strate-gies for sepsis in intensive care.Nature Medicine24, 11","author":"Komorowski Matthieu","year":"2018","unstructured":"Matthieu Komorowski, Leo A Celi, Omar Badawi, Anthony C Gordon, and A AldoFaisal. 2018. The Artificial Intelligence Clinician learns optimal treatment strate-gies for sepsis in intensive care.Nature Medicine24, 11 (2018), 1716."},{"key":"e_1_3_2_2_21_1","unstructured":"Hoang M Le Nan Jiang Alekh Agarwal Miroslav Dud\u00edk Yisong Yue and HalDaum\u00e9 III. 2018. Hierarchical imitation and reinforcement learning.arXivpreprint arXiv:1803.00590(2018)."},{"key":"e_1_3_2_2_22_1","unstructured":"Siyuan Li Rui Wang Minxue Tang and Chongjie Zhang. 2019. Hierarchicalreinforcement learning with advantage-based auxiliary rewards.arXiv preprintarXiv:1910.04450(2019)."},{"key":"e_1_3_2_2_23_1","unstructured":"Siyuan Li Lulu Zheng Jianhao Wang and Chongjie Zhang. [n.d.]. LEARNINGSUBGOAL REPRESENTATIONS WITH SLOW DYNAMICS. ([n. d.])."},{"key":"e_1_3_2_2_24_1","volume-title":"Optimal dynamic treatment regimes.Journal of the RoyalStatistical Society: Series B (Statistical Methodology)65, 2","author":"Murphy Susan A","year":"2003","unstructured":"Susan A Murphy. 2003. Optimal dynamic treatment regimes.Journal of the RoyalStatistical Society: Series B (Statistical Methodology)65, 2 (2003), 331--355."},{"key":"e_1_3_2_2_25_1","unstructured":"Ofir Nachum Shixiang Gu Honglak Lee and Sergey Levine. 2018. Data-efficienthierarchical reinforcement learning.arXiv preprint arXiv:1805.08296(2018)."},{"key":"e_1_3_2_2_26_1","volume":"199","author":"Parr Ronald","unstructured":"Ronald Parr and Stuart J Russell. 1998. Reinforcement learning with hierarchiesof machines. InNeurIPS. 1043--1049.","journal-title":"Stuart J Russell."},{"key":"e_1_3_2_2_27_1","volume-title":"Efficient training of artificial neural networks forautonomous navigation.Neural Computation3, 1","author":"Pomerleau Dean A","year":"1991","unstructured":"Dean A Pomerleau. 1991. Efficient training of artificial neural networks forautonomous navigation.Neural Computation3, 1 (1991), 88--97."},{"key":"e_1_3_2_2_28_1","unstructured":"Doina Precup. 2000.Temporal abstraction in reinforcement learning. Universityof Massachusetts Amherst."},{"key":"e_1_3_2_2_29_1","unstructured":"Doina Precup Richard S. Sutton and Sanjoy Dasgupta. 2001. Off-policy temporal-difference learning with function approximation. InICML. 417--424."},{"key":"e_1_3_2_2_30_1","volume-title":"Peter Szolovits,and Marzyeh Ghassemi.","author":"Raghu Aniruddh","year":"2017","unstructured":"Aniruddh Raghu, Matthieu Komorowski, Leo Anthony Celi, Peter Szolovits,and Marzyeh Ghassemi. 2017."}],"event":{"name":"WSDM '22: The Fifteenth ACM International Conference on Web Search and Data Mining","location":"Virtual Event AZ USA","acronym":"WSDM '22","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the Fifteenth ACM International Conference on Web Search and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3488560.3498535","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3488560.3498535","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:31:20Z","timestamp":1750188680000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3488560.3498535"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,2,11]]},"references-count":30,"alternative-id":["10.1145\/3488560.3498535","10.1145\/3488560"],"URL":"https:\/\/doi.org\/10.1145\/3488560.3498535","relation":{},"subject":[],"published":{"date-parts":[[2022,2,11]]},"assertion":[{"value":"2022-02-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}