{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T22:08:11Z","timestamp":1740175691586,"version":"3.37.3"},"reference-count":31,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2019,11,5]],"date-time":"2019-11-05T00:00:00Z","timestamp":1572912000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2019,11,5]],"date-time":"2019-11-05T00:00:00Z","timestamp":1572912000000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100004663","name":"Ministry of Science and Technology, Taiwan","doi-asserted-by":"crossref","award":["105-2221-E-009 -103 -MY3"],"award-info":[{"award-number":["105-2221-E-009 -103 -MY3"]}],"id":[{"id":"10.13039\/501100004663","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100004725","name":"Ministry of Economic Affairs","doi-asserted-by":"publisher","award":["106-EC-17-A-24-0619"],"award-info":[{"award-number":["106-EC-17-A-24-0619"]}],"id":[{"id":"10.13039\/501100004725","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Comput Game J"],"published-print":{"date-parts":[[2020,9]]},"DOI":"10.1007\/s40869-019-00089-x","type":"journal-article","created":{"date-parts":[[2019,11,5]],"date-time":"2019-11-05T20:02:41Z","timestamp":1572984161000},"page":"263-280","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Efficient Exploration in Side-Scrolling Video Games with Trajectory Replay"],"prefix":"10.1007","volume":"9","author":[{"given":"I-Huan","family":"Chiang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chung-Ming","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nien-Hu","family":"Cheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hsin-Yu","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0085-0377","authenticated-orcid":false,"given":"Shi-Chun","family":"Tsai","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2019,11,5]]},"reference":[{"key":"89_CR1","unstructured":"Abadi, M., Agarwal, A., Barham, P., Brevdo, E., Chen, Z., Citro, C., Corrado, G.S., Davis, A., Dean, J., Devin, M., Ghemawat, S., Goodfellow, I., Harp, A., Irving, G., Isard, M., Jia, Y., Jozefowicz, R., Kaiser, L., Kudlur, M., Levenberg, J., Man\u00e9, D., Monga, R., Moore, S., Murray, D., Olah, C., Schuster, M., Shlens, J., Steiner, B., Sutskever, I., Talwar, K., Tucker, P., Vanhoucke, V., Vasudevan, V., Vi\u00e9gas, F., Vinyals, O., Warden, P., Wattenberg, M., Wicke, M., Yu, Y., & Zheng, X. (2015). TensorFlow: Large-scale machine learning on heterogeneous systems. URL \nhttps:\/\/www.tensorflow.org\/\n\n. Software available from tensorflow.org."},{"key":"89_CR2","unstructured":"Arjona-Medina, J. A., Gillhofer, M., Widrich, M., Unterthiner, T., & Hochreiter, S. (2018). Rudder: Return decomposition for delayed rewards. arXiv preprint \narXiv:1806.07857\n\n."},{"key":"89_CR3","unstructured":"Bellemare, M., Srinivasan, S., Ostrovski, G., Schaul, T., Saxton, D., & Munos, R. (2016). Unifying count-based exploration and intrinsic motivation. In Advances in neural information processing systems (pp. 1471\u20131479)."},{"key":"89_CR4","unstructured":"de\u00a0Bruin, T., Kober, J., Tuyls, K., & Babu\u0161ka, R. (2015). The importance of experience replay database composition in deep reinforcement learning. In Deep reinforcement learning workshop, NIPS."},{"key":"89_CR5","unstructured":"Burda, Y., Edwards, H., Pathak, D., Storkey, A., Darrell, T., & Efros, A.A. (2018). Large-scale study of curiosity-driven learning. arXiv preprint \narXiv:1808.04355\n\n."},{"key":"89_CR6","unstructured":"Clark, J., & Amodei, D. (2016). Faulty reward functions in the wild. URL \nhttps:\/\/blog.openai.com\/faulty-reward-functions\/\n\n."},{"key":"89_CR7","unstructured":"Clevert, D. A., Unterthiner, T., & Hochreiter, S. (2015). Fast and accurate deep network learning by exponential linear units (elus). arXiv preprint \narXiv:1511.07289\n\n."},{"key":"89_CR8","unstructured":"Conti, E., Madhavan, V., Such, F. P., Lehman, J., Stanley, K., & Clune, J. (2018). Improving exploration in evolution strategies for deep reinforcement learning via a population of novelty-seeking agents. In Advances in neural information processing systems (pp. 5027\u20135038)."},{"key":"89_CR9","unstructured":"Ecoffet, A., Huizinga, J., Lehman, J., Stanley, K. O., & Clune, J. (2019). Go-explore: a new approach for hard-exploration problems. arXiv preprint \narXiv:1901.10995\n\n."},{"key":"89_CR10","unstructured":"Fu, J., Co-Reyes, J., & Levine, S. (2017). Ex2: Exploration with exemplar models for deep reinforcement learning. In Advances in neural information processing systems (pp. 2577\u20132587)."},{"issue":"8","key":"89_CR11","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., & Schmidhuber, J. (1997). Long short-term memory. Neural Computation, 9(8), 1735\u20131780. \nhttps:\/\/doi.org\/10.1162\/neco.1997.9.8.1735\n\n.","journal-title":"Neural Computation"},{"key":"89_CR12","unstructured":"Houthooft, R., Chen, X., Duan, Y., Schulman, J., De\u00a0Turck, F., & Abbeel, P. (2016). Curiosity-driven exploration in deep reinforcement learning via bayesian neural networks. arXiv preprint \narxiv.1605.09674\n\n."},{"key":"89_CR13","unstructured":"Jaderberg, M., Mnih, V., Czarnecki, W. M., Schaul, T., Leibo, J. Z., Silver, D., & Kavukcuoglu, K. (2016). Reinforcement learning with unsupervised auxiliary tasks. arXiv preprint \narXiv:1611.05397\n\n."},{"issue":"4","key":"89_CR14","doi-asserted-by":"publisher","first-page":"849","DOI":"10.1137\/16M1062569","volume":"59","author":"M Kelly","year":"2017","unstructured":"Kelly, M. (2017). An introduction to trajectory optimization: How to do your own direct collocation. SIAM Review, 59(4), 849\u2013904.","journal-title":"SIAM Review"},{"key":"89_CR15","unstructured":"Kimura, D., Chaudhury, S., Tachibana, R., & Dasgupta, S. (2018). Internal model from observations for reward shaping. arXiv preprint \narXiv:1806.01267\n\n."},{"key":"89_CR16","doi-asserted-by":"publisher","first-page":"313","DOI":"10.1016\/j.artint.2015.02.001","volume":"247","author":"VR Kompella","year":"2017","unstructured":"Kompella, V. R., Stollenga, M., Luciw, M., & Schmidhuber, J. (2017). Continual curiosity-driven skill acquisition from high-dimensional video inputs for humanoid robots. Artificial Intelligence, 247, 313\u2013335.","journal-title":"Artificial Intelligence"},{"key":"89_CR17","unstructured":"Liu, R., & Zou, J. (2017). The effects of memory replay in reinforcement learning. arXiv preprint \narXiv:1710.06574\n\n."},{"key":"89_CR18","unstructured":"Nair, V., & Hinton, G. E. (2010). Rectified linear units improve restricted boltzmann machines. In Proceedings of the 27th international conference on machine learning (ICML-10) (pp. 807\u2013814)."},{"key":"89_CR19","unstructured":"Nichol, A., Pfau, V., Hesse, C., Klimov, O., & Schulman, J. (2018). Gotta learn fast: A new benchmark for generalization in rl. arXiv preprint \narXiv:1804.03720\n\n."},{"key":"89_CR20","unstructured":"Oh, J., Guo, Y., Singh, S., & Lee, H. (2018). Self-imitation learning. arXiv preprint \narXiv:1806.05635\n\n."},{"key":"89_CR21","unstructured":"OpenAI: Openai five. \nhttps:\/\/blog.openai.com\/openai-five\/\n\n (2018)."},{"key":"89_CR22","unstructured":"Pardo, F., Levdik, V., & Kormushev, P. (2018). Goal-oriented trajectories for efficient exploration. arXiv preprint \narXiv:1807.02078\n\n."},{"key":"89_CR23","unstructured":"Pardo, F., Levdik, V., & Kormushev, P. (2018). Q-map: a convolutional approach for goal-oriented reinforcement learning. arXiv preprint \narXiv:1810.02927\n\n."},{"key":"89_CR24","doi-asserted-by":"crossref","unstructured":"Pathak, D., Agrawal, P., Efros, A. A., & Darrell, T. (2017). Curiosity-driven exploration by self-supervised prediction. In International conference on machine learning (ICML) (vol. 2017).","DOI":"10.1109\/CVPRW.2017.70"},{"key":"89_CR25","unstructured":"Schaul, T., Quan, J., Antonoglou, I., & Silver, D. (2015). Prioritized experience replay. arXiv preprint \narXiv:1511.05952\n\n."},{"key":"89_CR26","unstructured":"Schulman, J., Klimov, O., Wolski, F., Dhariwal, P., & Radford, A. (2017). Proximal policy optimization. URL \nhttps:\/\/openai.com\/blog\/openai-baselines-ppo\/"},{"key":"89_CR27","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., & Klimov, O. (2017). Proximal policy optimization algorithms. arXiv preprint \narXiv:1707.06347\n\n."},{"key":"89_CR28","unstructured":"Simonini, T. (2018). Sonic the hedgehog. in openai gym. github:simoninithomas\/Deep\\_reinforcement\\_learning\\_Course."},{"key":"89_CR29","unstructured":"Sukhbaatar, S., Lin, Z., Kostrikov, I., Synnaeve, G., Szlam, A., & Fergus, R. (2017). Intrinsic motivation and automatic curricula via asymmetric self-play. arXiv preprint \narXiv:1703.05407\n\n."},{"key":"89_CR30","unstructured":"Tang, H., Houthooft, R., Foote, D., Stooke, A., Chen, O.X., Duan, Y., Schulman, J., DeTurck, F., & Abbeel, P. (2017). # exploration: A study of count-based exploration for deep reinforcement learning. In Advances in neural information processing systems (pp. 2753\u20132762)."},{"key":"89_CR31","unstructured":"Vinyals, O., Babuschkin, I., Chung, J., Mathieu, M., Jaderberg, M., Czarnecki, W.M., Dudzik, A., Huang, A., Georgiev, P., Powell, R., Ewalds, T., Horgan, D., Kroiss, M., Danihelka, I., Agapiou, J., Oh, J., Dalibard, V., Choi, D., Sifre, L., Sulsky, Y., Vezhnevets, S., Molloy, J., Cai, T., Budden, D., Paine, T., Gulcehre, C., Wang, Z., Pfaff, T., Pohlen, T., Wu, Y., Yogatama, D., Cohen, J., McKinney, K., Smith, O., Schaul, T., Lillicrap, T., Apps, C., Kavukcuoglu, K., Hassabis, D., & Silver, D. (2019). Alphastar: Mastering the real-time strategy game starcraft ii. \nhttps:\/\/deepmind.com\/blog\/alphastar-mastering-real-time-strategy-game-starcraft-ii\/\n\n."}],"container-title":["The Computer Games Journal"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s40869-019-00089-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s40869-019-00089-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s40869-019-00089-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,11,4]],"date-time":"2020-11-04T00:48:01Z","timestamp":1604450881000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s40869-019-00089-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,11,5]]},"references-count":31,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2020,9]]}},"alternative-id":["89"],"URL":"https:\/\/doi.org\/10.1007\/s40869-019-00089-x","relation":{},"ISSN":["2052-773X"],"issn-type":[{"type":"electronic","value":"2052-773X"}],"subject":[],"published":{"date-parts":[[2019,11,5]]},"assertion":[{"value":"27 July 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 October 2019","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 November 2019","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}