{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T22:15:03Z","timestamp":1762035303149,"version":"build-2065373602"},"reference-count":22,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,1,5]]},"DOI":"10.1109\/comsnets51098.2021.9352741","type":"proceedings-article","created":{"date-parts":[[2021,2,19]],"date-time":"2021-02-19T10:11:22Z","timestamp":1613729482000},"page":"86-89","source":"Crossref","is-referenced-by-count":3,"title":["Monte Carlo Rollout Policy for Recommendation Systems with Dynamic User Behavior"],"prefix":"10.1109","author":[{"given":"Rahul","family":"Meshram","sequence":"first","affiliation":[]},{"given":"Kesav","family":"Kaza","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/1772690.1772758"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1561\/1500000067"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.2307\/3214163"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1002\/9780470980033"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1287\/moor.24.2.293"},{"key":"ref15","first-page":"1","article-title":"On-line policy improvement using monte carlo search","author":"tesauro","year":"1996","journal-title":"NIPS"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1023\/B:DISC.0000028199.78776.c4"},{"key":"ref17","article-title":"Distributed Reinforcement Learning, Rollout, and Approximate Policy Iteration","author":"bertsekas","year":"2020","journal-title":"Athena Scientific"},{"key":"ref18","article-title":"Simulation based algorithms for Markov decision processes and multi-action restless bandits","author":"meshram","year":"2020","journal-title":"ArXiv"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1287\/opre.21.5.1071"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/371920.372071"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/245108.245126"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-72079-9_9"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/MC.2009.263"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2010.2044061"},{"key":"ref7","article-title":"Neural collaborating filtering","author":"he","year":"2017","journal-title":"ArXiv"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-29659-3"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/COMSNETS.2017.7945378"},{"key":"ref9","article-title":"The epoch-greedy algorithm for contextual multi-armed bandits","author":"langford","year":"2007","journal-title":"Proc NIPS"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1287\/opre.26.2.282"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2018.2799521"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1287\/opre.35.5.736"}],"event":{"name":"2021 International Conference on COMmunication Systems & NETworkS (COMSNETS)","start":{"date-parts":[[2021,1,5]]},"location":"Bangalore, India","end":{"date-parts":[[2021,1,9]]}},"container-title":["2021 International Conference on COMmunication Systems &amp; NETworkS (COMSNETS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9352735\/9352808\/09352741.pdf?arnumber=9352741","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,28]],"date-time":"2022-01-28T22:09:21Z","timestamp":1643407761000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9352741\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,1,5]]},"references-count":22,"URL":"https:\/\/doi.org\/10.1109\/comsnets51098.2021.9352741","relation":{},"subject":[],"published":{"date-parts":[[2021,1,5]]}}}