{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T16:16:19Z","timestamp":1742919379244,"version":"3.40.3"},"publisher-location":"Cham","reference-count":50,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031708923"},{"type":"electronic","value":"9783031708930"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-70893-0_4","type":"book-chapter","created":{"date-parts":[[2024,8,29]],"date-time":"2024-08-29T11:02:54Z","timestamp":1724929374000},"page":"45-59","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Data Augmentation in\u00a0Latent Space with\u00a0Variational Autoencoder and\u00a0Pretrained Image Model for\u00a0Visual Reinforcement Learning"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-9262-5939","authenticated-orcid":false,"given":"Xuzhe","family":"Dang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8435-5025","authenticated-orcid":false,"given":"Stefan","family":"Edelkamp","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,8,30]]},"reference":[{"unstructured":"Bowles, C., et al.: Gan augmentation: augmenting training data using generative adversarial networks (2018)","key":"4_CR1"},{"unstructured":"Cetin, E., Ball, P.J., Roberts, S., Celiktutan, O.: Stabilizing off-policy deep reinforcement learning from pixels. arXiv preprint arXiv:2207.00986 (2022)","key":"4_CR2"},{"doi-asserted-by":"crossref","unstructured":"Chadebec, C., Allassonni\u00e8re, S.: Data augmentation with variational autoencoders and manifold sampling (2021)","key":"4_CR3","DOI":"10.1007\/978-3-030-88210-5_17"},{"doi-asserted-by":"crossref","unstructured":"Chen, C., Hammernik, K., Ouyang, C., Qin, C., Bai, W., Rueckert, D.: Cooperative training and latent space data augmentation for robust medical image segmentation (2021)","key":"4_CR4","DOI":"10.1007\/978-3-030-87199-4_14"},{"unstructured":"Chen, T., Kornblith, S., Norouzi, M., Hinton, G.: A simple framework for contrastive learning of visual representations. In: International Conference on Machine Learning, pp. 1597\u20131607. PMLR (2020)","key":"4_CR5"},{"unstructured":"Cheung, T.H., Yeung, D.Y.: MODALS: modality-agnostic automated data augmentation in the latent space. In: International Conference on Learning Representations (2021). https:\/\/openreview.net\/forum?id=XjYgR6gbCEc","key":"4_CR6"},{"doi-asserted-by":"crossref","unstructured":"Ciregan, D., Meier, U., Schmidhuber, J.: Multi-column deep neural networks for image classification. In: 2012 IEEE Conference on Computer Vision and Pattern Recognition, pp. 3642\u20133649. IEEE (2012)","key":"4_CR7","DOI":"10.1109\/CVPR.2012.6248110"},{"unstructured":"Cire\u015fan, D.C., Meier, U., Masci, J., Gambardella, L.M., Schmidhuber, J.: High-performance neural networks for visual object classification. arXiv preprint arXiv:1102.0183 (2011)","key":"4_CR8"},{"unstructured":"Cui, Y., Niekum, S., Gupta, A., Kumar, V., Rajeswaran, A.: Can foundation models perform zero-shot task specification for robot manipulation? In: Learning for Dynamics and Control Conference, pp. 893\u2013905. PMLR (2022)","key":"4_CR9"},{"unstructured":"Dosovitskiy, A., Ros, G., Codevilla, F., Lopez, A., Koltun, V.: Carla: an open urban driving simulator. In: Conference on Robot Learning, pp. 1\u201316. PMLR (2017)","key":"4_CR10"},{"doi-asserted-by":"crossref","unstructured":"Ebert, F., et al.: Bridge data: boosting generalization of robotic skills with cross-domain datasets. arXiv preprint arXiv:2109.13396 (2021)","key":"4_CR11","DOI":"10.15607\/RSS.2022.XVIII.063"},{"unstructured":"Fujimoto, S., van Hoof, H., Meger, D.: Addressing function approximation error in actor-critic methods (2018)","key":"4_CR12"},{"unstructured":"Goodfellow, I.J., et al.: Generative adversarial networks (2014)","key":"4_CR13"},{"unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., Levine, S.: Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: International Conference on Machine Learning, pp. 1861\u20131870. PMLR (2018)","key":"4_CR14"},{"unstructured":"Hansen, N., Su, H., Wang, X.: Stabilizing deep q-learning with convnets and vision transformers under data augmentation (2021)","key":"4_CR15"},{"doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","key":"4_CR16","DOI":"10.1109\/CVPR.2016.90"},{"unstructured":"Higgins, I., et al.: beta-VAE: learning basic visual concepts with a constrained variational framework. In: International Conference on Learning Representations (2016)","key":"4_CR17"},{"unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models (2020)","key":"4_CR18"},{"doi-asserted-by":"crossref","unstructured":"Khandelwal, A., Weihs, L., Mottaghi, R., Kembhavi, A.: Simple but effective: clip embeddings for embodied AI. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14829\u201314838 (2022)","key":"4_CR19","DOI":"10.1109\/CVPR52688.2022.01441"},{"unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114 (2013)","key":"4_CR20"},{"unstructured":"Kostrikov, I., Yarats, D., Fergus, R.: Image augmentation is all you need: regularizing deep reinforcement learning from pixels. arXiv preprint arXiv:2004.13649 (2020)","key":"4_CR21"},{"unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: Imagenet classification with deep convolutional neural networks. Adv. Neural Inf. Process. Syst. 25 (2012)","key":"4_CR22"},{"unstructured":"Laskin, M., Srinivas, A., Abbeel, P.: CURL: contrastive unsupervised representations for reinforcement learning. In: International Conference on Machine Learning, pp. 5639\u20135650. PMLR (2020)","key":"4_CR23"},{"key":"4_CR24","first-page":"19884","volume":"33","author":"M Laskin","year":"2020","unstructured":"Laskin, M., Lee, K., Stooke, A., Pinto, L., Abbeel, P., Srinivas, A.: Reinforcement learning with augmented data. Adv. Neural Inf. Process. Syst. 33, 19884\u201319895 (2020)","journal-title":"Adv. Neural Inf. Process. Syst."},{"unstructured":"Lillicrap, T.P., et al.: Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971 (2015)","key":"4_CR25"},{"doi-asserted-by":"publisher","unstructured":"Liu, X., et al.: Data augmentation via latent space interpolation for image classification. In: 2018 24th International Conference on Pattern Recognition (ICPR), pp. 728\u2013733 (2018). https:\/\/doi.org\/10.1109\/ICPR.2018.8545506","key":"4_CR26","DOI":"10.1109\/ICPR.2018.8545506"},{"doi-asserted-by":"publisher","unstructured":"Mansourifar, H., Chen, L., Shi, W.: Virtual big data for GAN based data augmentation. In: 2019 IEEE International Conference on Big Data (Big Data), pp. 1478\u20131487 (2019). https:\/\/doi.org\/10.1109\/BigData47090.2019.9006268","key":"4_CR27","DOI":"10.1109\/BigData47090.2019.9006268"},{"unstructured":"Mnih, V., et al.: Asynchronous methods for deep reinforcement learning. In: International Conference on Machine Learning, pp. 1928\u20131937. PMLR (2016)","key":"4_CR28"},{"unstructured":"Mnih, V., et al.: Playing Atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602 (2013)","key":"4_CR29"},{"doi-asserted-by":"publisher","unstructured":"Moreno-Barea, F.J., Jerez, J.M., Franco, L.: Improving classification accuracy using data augmentation on small data sets. Expert Syst. Appl. 161, 113696 (2020). https:\/\/doi.org\/10.1016\/j.eswa.2020.113696, https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0957417420305200","key":"4_CR30","DOI":"10.1016\/j.eswa.2020.113696"},{"unstructured":"Nair, S., Rajeswaran, A., Kumar, V., Finn, C., Gupta, A.: R3M: a universal visual representation for robot manipulation. arXiv preprint arXiv:2203.12601 (2022)","key":"4_CR31"},{"doi-asserted-by":"crossref","unstructured":"Pari, J., Shafiullah, N.M., Arunachalam, S.P., Pinto, L.: The surprising effectiveness of representation learning for visual imitation. arXiv preprint arXiv:2112.01511 (2021)","key":"4_CR32","DOI":"10.15607\/RSS.2022.XVIII.010"},{"unstructured":"Parisi, S., Rajeswaran, A., Purushwalkam, S., Gupta, A.: The unsurprising effectiveness of pre-trained vision models for control. In: International Conference on Machine Learning, pp. 17359\u201317371. PMLR (2022)","key":"4_CR33"},{"doi-asserted-by":"crossref","unstructured":"Savva, M., et\u00a0al.: Habitat: a platform for embodied AI research. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9339\u20139347 (2019)","key":"4_CR34","DOI":"10.1109\/ICCV.2019.00943"},{"unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)","key":"4_CR35"},{"unstructured":"Schwarzer, M., Anand, A., Goel, R., Hjelm, R.D., Courville, A., Bachman, P.: Data-efficient reinforcement learning with momentum predictive representations. arXiv preprint arXiv:2007.059292(3) (2020)","key":"4_CR36"},{"unstructured":"Shah, R., Kumar, V.: RRL: resnet as representation for reinforcement learning. arXiv preprint arXiv:2107.03380 (2021)","key":"4_CR37"},{"issue":"1","key":"4_CR38","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s40537-019-0197-0","volume":"6","author":"C Shorten","year":"2019","unstructured":"Shorten, C., Khoshgoftaar, T.M.: A survey on image data augmentation for deep learning. J. Big Data 6(1), 1\u201348 (2019)","journal-title":"J. Big Data"},{"doi-asserted-by":"crossref","unstructured":"Silver, D., et\u00a0al.: Mastering the game of go without human knowledge. Nature 550(7676), 354\u2013359 (2017)","key":"4_CR39","DOI":"10.1038\/nature24270"},{"unstructured":"Tassa, Y., et al.: Deepmind control suite (2018)","key":"4_CR40"},{"issue":"7782","key":"4_CR41","doi-asserted-by":"publisher","first-page":"350","DOI":"10.1038\/s41586-019-1724-z","volume":"575","author":"O Vinyals","year":"2019","unstructured":"Vinyals, O., et al.: Grandmaster level in StarCraft II using multi-agent reinforcement learning. Nature 575(7782), 350\u2013354 (2019)","journal-title":"Nature"},{"key":"4_CR42","first-page":"32974","volume":"35","author":"C Wang","year":"2022","unstructured":"Wang, C., Luo, X., Ross, K., Li, D.: VRL3: a data-driven framework for visual deep reinforcement learning. Adv. Neural Inf. Process. Syst. 35, 32974\u201332988 (2022)","journal-title":"Adv. Neural Inf. Process. Syst."},{"unstructured":"Xiao, T., Radosavovic, I., Darrell, T., Malik, J.: Masked visual pre-training for motor control. arXiv preprint arXiv:2203.06173 (2022)","key":"4_CR43"},{"doi-asserted-by":"crossref","unstructured":"Yarats, D., Fergus, R., Lazaric, A., Pinto, L.: Improving sample efficiency in model-free reinforcement learning from images. arXiv preprint arXiv:2102.062814(5), 1\u201312 (2021)","key":"4_CR44","DOI":"10.1609\/aaai.v35i12.17276"},{"unstructured":"Yarats, D., Fergus, R., Lazaric, A., Pinto, L.: Mastering visual continuous control: improved data-augmented reinforcement learning. arXiv preprint arXiv:2107.09645 (2021)","key":"4_CR45"},{"doi-asserted-by":"crossref","unstructured":"Yen-Chen, L., Zeng, A., Song, S., Isola, P., Lin, T.Y.: Learning to see before learning to act: visual pre-training for manipulation. In: 2020 IEEE International Conference on Robotics and Automation (ICRA), pp. 7286\u20137293. IEEE (2020)","key":"4_CR46","DOI":"10.1109\/ICRA40945.2020.9197331"},{"unstructured":"Yu, T., et al.: Meta-world: a benchmark and evaluation for multi-task and meta reinforcement learning. In: Conference on Robot Learning, pp. 1094\u20131100. PMLR (2020)","key":"4_CR47"},{"doi-asserted-by":"crossref","unstructured":"Yu, X., et al.: Diffusion-based data augmentation for nuclei image segmentation (2024)","key":"4_CR48","DOI":"10.1007\/978-3-031-43993-3_57"},{"key":"4_CR49","first-page":"13022","volume":"35","author":"Z Yuan","year":"2022","unstructured":"Yuan, Z., et al.: Pre-trained image encoder for generalizable visual reinforcement learning. Adv. Neural Inf. Process. Syst. 35, 13022\u201313037 (2022)","journal-title":"Adv. Neural Inf. Process. Syst."},{"unstructured":"Zheng, R., et al.: TACO: temporal latent action-driven contrastive loss for visual reinforcement learning. arXiv preprint arXiv:2306.13229 (2023)","key":"4_CR50"}],"container-title":["Lecture Notes in Computer Science","KI 2024: Advances in Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-70893-0_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,29]],"date-time":"2024-08-29T11:03:45Z","timestamp":1724929425000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-70893-0_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031708923","9783031708930"],"references-count":50,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-70893-0_4","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"30 August 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"KI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"German Conference on Artificial Intelligence (K\u00fcnstliche Intelligenz)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"W\u00fcrzburg","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"47","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ki2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.informatik.uni-wuerzburg.de\/ki24\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}