{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T01:42:02Z","timestamp":1743126122572,"version":"3.40.3"},"publisher-location":"Cham","reference-count":28,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030946616"},{"type":"electronic","value":"9783030946623"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-030-94662-3_2","type":"book-chapter","created":{"date-parts":[[2022,1,11]],"date-time":"2022-01-11T12:03:08Z","timestamp":1641902588000},"page":"21-37","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Uncertainty-Aware Low-Rank Q-Matrix Estimation for\u00a0Deep Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Tong","family":"Sang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hongyao","family":"Tang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianye","family":"Hao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yan","family":"Zheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhaopeng","family":"Meng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,1,11]]},"reference":[{"key":"2_CR1","unstructured":"Bellemare, M., Srinivasan, S., Ostrovski, G., Schaul, T., Saxton, D., Munos, R.: Unifying count-based exploration and intrinsic motivation. In: NeurIPS, pp. 1471\u20131479 (2016)"},{"issue":"4","key":"2_CR2","doi-asserted-by":"publisher","first-page":"1956","DOI":"10.1137\/080738970","volume":"20","author":"J Cai","year":"2010","unstructured":"Cai, J., Cand\u00e8s, E.J., Shen, Z.: A singular value thresholding algorithm for matrix completion. SIAM J. Optim. 20(4), 1956\u20131982 (2010)","journal-title":"SIAM J. Optim."},{"key":"2_CR3","unstructured":"Ciosek, K., Vuong, Q., Loftin, R., Hofmann, K.: Better exploration with optimistic actor critic. In: NeurIPS, pp. 1785\u20131796 (2019)"},{"key":"2_CR4","unstructured":"Fujimoto, S., van Hoof, H., Meger, D.: Addressing function approximation error in actor-critic methods. In: ICML (2018)"},{"key":"2_CR5","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., Levine, S.: Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: ICML, pp. 1856\u20131865 (2018)"},{"key":"2_CR6","unstructured":"Hafner, D., Lillicrap, T.P., Ba, J., Norouzi, M.: Dream to control: learning behaviors by latent imagination. In: ICLR (2020)"},{"key":"2_CR7","unstructured":"Hasselt, H., Doron, Y., Strub, F., Hessel, M., Sonnerat, N., Modayil, J.: Deep reinforcement learning and the deadly triad. CoRR abs\/1812.02648 (2018)"},{"key":"2_CR8","unstructured":"He, J., Zhou, D., Gu, Q.: Uniform-pac bounds for reinforcement learning with linear function approximation. CoRR abs\/2106.11612 (2021)"},{"key":"2_CR9","doi-asserted-by":"crossref","unstructured":"Keshavan, R.H., Oh, S., Montanari, A.: Matrix completion from a few entries. In: Proceedings of the IEEE International Symposium on Information Theory, ISIT 2009, June 28\u2013July 3, 2009, Seoul, Korea, pp. 324\u2013328. IEEE (2009)","DOI":"10.1109\/ISIT.2009.5205567"},{"key":"2_CR10","unstructured":"Kumar, A., Agarwal, R., Ghosh, D., Levine, S.: Implicit under-parameterization inhibits data-efficient deep reinforcement learning. In: ICLR (2021)"},{"key":"2_CR11","unstructured":"Lillicrap, T.P., et al.: Continuous control with deep reinforcement learning. In: ICLR (2015)"},{"key":"2_CR12","doi-asserted-by":"crossref","unstructured":"Luo, X., Meng, Q., He, D., Chen, W., Wang, Y.: I4R: promoting deep reinforcement learning by the indicator for expressive representations. In: IJCAI, pp. 2669\u20132675. ijcai.org (2020)","DOI":"10.24963\/ijcai.2020\/370"},{"key":"2_CR13","unstructured":"Lyle, C., Rowland, M., Ostrovski, G., Dabney, W.: On the effect of auxiliary tasks on representation dynamics. In: AISTATS. vol. 130, pp. 1\u20139 (2021)"},{"issue":"1\u20132","key":"2_CR14","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1007\/s10107-009-0306-5","volume":"128","author":"S Ma","year":"2011","unstructured":"Ma, S., Goldfarb, D., Chen, L.: Fixed point and Bregman iterative methods for matrix rank minimization. Math. Program. 128(1\u20132), 321\u2013353 (2011)","journal-title":"Math. Program."},{"key":"2_CR15","first-page":"2287","volume":"11","author":"R Mazumder","year":"2010","unstructured":"Mazumder, R., Hastie, T., Tibshirani, R.: Spectral regularization algorithms for learning large incomplete matrices. J. Mach. Learn. Res. 11, 2287\u20132322 (2010)","journal-title":"J. Mach. Learn. Res."},{"issue":"7540","key":"2_CR16","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","journal-title":"Nature"},{"key":"2_CR17","unstructured":"Ong, H.: Value function approximation via low-rank models. CoRR abs\/1509.00061 (2015)"},{"key":"2_CR18","unstructured":"Osband, I., Blundell, C., Pritzel, A., Roy, B.V.: Deep exploration via bootstrapped DQN. In: Lee, D.D., Sugiyama, M., von Luxburg, U., Guyon, I., Garnett, R. (eds.) Advances in Neural Information Processing Systems 29: Annual Conference on Neural Information Processing Systems 2016, December 5\u201310, 2016, Barcelona, Spain, pp. 4026\u20134034 (2016)"},{"key":"2_CR19","unstructured":"Pathak, D., Gandhi, D., Gupta, A.: Self-supervised exploration via disagreement. In: ICML, vol. 97, 5062\u20135071 (2019)"},{"key":"2_CR20","first-page":"1629","volume":"16","author":"B Scherrer","year":"2015","unstructured":"Scherrer, B., Ghavamzadeh, M., Gabillon, V., Lesner, B., Geist, M.: Approximate modified policy iteration and its application to the game of Tetris. J. Mach. Learn. Res. 16, 1629\u20131676 (2015)","journal-title":"J. Mach. Learn. Res."},{"issue":"6","key":"2_CR21","doi-asserted-by":"publisher","first-page":"970","DOI":"10.1021\/acscentsci.9b00055","volume":"5","author":"JS Schreck","year":"2019","unstructured":"Schreck, J.S., Coley, C.W., Bishop, K.J.: Learning retrosynthetic planning through simulated experience. ACS Central Sci. 5(6), 970\u2013981 (2019)","journal-title":"ACS Central Sci."},{"issue":"7587","key":"2_CR22","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., et al.: Mastering the game of go with deep neural networks and tree search. Nature 529(7587), 484\u2013489 (2016)","journal-title":"Nature"},{"key":"2_CR23","unstructured":"Silver, D., Lever, G., Heess, N., Degris, T., Wierstra, D., Riedmiller, M.A.: Deterministic policy gradient algorithms. In: ICML, pp. 387\u2013395 (2014)"},{"key":"2_CR24","first-page":"285","volume":"16","author":"RS Sutton","year":"1988","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement learning: an introduction. IEEE Trans. Neural Netw. 16, 285\u2013286 (1988)","journal-title":"IEEE Trans. Neural Netw."},{"key":"2_CR25","unstructured":"Tang, H., et al.: #Exploration: a study of count-based exploration for deep reinforcement learning. In: Guyon, I., et al. (eds.) Advances in Neural Information Processing Systems 30: Annual Conference on Neural Information Processing Systems 2017, 4\u20139 December 2017, pp. 2753\u20132762. Long Beach, CA, USA (2017)"},{"issue":"7782","key":"2_CR26","doi-asserted-by":"publisher","first-page":"350","DOI":"10.1038\/s41586-019-1724-z","volume":"575","author":"O Vinyals","year":"2019","unstructured":"Vinyals, O., et al.: Grandmaster level in StarCraft ii using multi-agent reinforcement learning. Nature 575(7782), 350\u2013354 (2019)","journal-title":"Nature"},{"key":"2_CR27","unstructured":"Yang, T., et al.: Exploration in deep reinforcement learning: a comprehensive survey. CoRR abs\/2109.06668 (2021)"},{"key":"2_CR28","unstructured":"Yang, Y., Zhang, G., Xu, Z., Katabi, D.: Harnessing structures for value-based planning and reinforcement learning. In: ICLR (2020)"}],"container-title":["Lecture Notes in Computer Science","Distributed Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-94662-3_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,3,29]],"date-time":"2022-03-29T22:04:01Z","timestamp":1648591441000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-94662-3_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783030946616","9783030946623"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-94662-3_2","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"11 January 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"DAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Distributed Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Shanghai","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17 December 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 December 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"3","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"dai22021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.adai.ai\/dai\/2021\/2021.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"31","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"15","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"48% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}