{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T16:46:52Z","timestamp":1774630012123,"version":"3.50.1"},"reference-count":83,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,6,1]],"date-time":"2021-06-01T00:00:00Z","timestamp":1622505600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,6,1]],"date-time":"2021-06-01T00:00:00Z","timestamp":1622505600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,6,1]],"date-time":"2021-06-01T00:00:00Z","timestamp":1622505600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,6]]},"DOI":"10.1109\/cvpr46437.2021.00715","type":"proceedings-article","created":{"date-parts":[[2021,11,2]],"date-time":"2021-11-02T21:56:02Z","timestamp":1635890162000},"page":"7226-7236","source":"Crossref","is-referenced-by-count":84,"title":["GeoSim: Realistic Video Simulation via Geometry-Aware Composition for Self-Driving"],"prefix":"10.1109","author":[{"given":"Yun","family":"Chen","sequence":"first","affiliation":[]},{"given":"Frieda","family":"Rong","sequence":"additional","affiliation":[]},{"given":"Shivam","family":"Duggal","sequence":"additional","affiliation":[]},{"given":"Shenlong","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Xinchen","family":"Yan","sequence":"additional","affiliation":[]},{"given":"Sivabalan","family":"Manivasagam","sequence":"additional","affiliation":[]},{"given":"Shangjie","family":"Xue","sequence":"additional","affiliation":[]},{"given":"Ersin","family":"Yumer","sequence":"additional","affiliation":[]},{"given":"Raquel","family":"Urtasun","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58607-2_26"},{"key":"ref72","article-title":"Generative image modeling using style and structure adversarial networks","author":"wang","year":"2016","journal-title":"ECCV"},{"key":"ref71","article-title":"High-Resolution Image Synthesis and Semantic Manipulation with Conditional GANs","author":"wang","year":"2017"},{"key":"ref70","article-title":"Video-to-video synthesis","author":"wang","year":"2018","journal-title":"NeurIPS"},{"key":"ref76","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR42600.2020.01113","article-title":"SurfelGAN: Synthesizing Realistic Sensor Data for Autonomous Driving","author":"yang","year":"2020"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00457"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00945"},{"key":"ref39","author":"kato","year":"2017","journal-title":"Neural 3d mesh renderer"},{"key":"ref75","article-title":"High-Resolution Image Inpainting using Multi-Scale Neural Patch Synthesis","author":"yang","year":"2016"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/2070781.2024191"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1007\/s41095-019-0136-1"},{"key":"ref79","article-title":"Image gans meet differentiable rendering for inverse graphics and interpretable 3d neural rendering","author":"zhang","year":"2020"},{"key":"ref33","article-title":"Spatial Transformer Networks","author":"jaderberg","year":"2016"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.632"},{"key":"ref31","article-title":"Inserting virtual static object with geometry consistency into real video","author":"ibrahim","year":"2020","journal-title":"Journal of Physics Conference Series"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00833"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00453"},{"key":"ref36","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-030-01267-0_23","article-title":"Learning Category-Specific Mesh Reconstruction from Image Collections","author":"kanazawa","year":"2018"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00133"},{"key":"ref34","article-title":"Perceptual losses for real-time style transfer and super-resolution","author":"johnson","year":"2016","journal-title":"CoRR"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.352"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00943"},{"key":"ref61","article-title":"Minos: Multimodal indoor simulator for navigation in complex environments","author":"savva","year":"2017"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2018.8594095"},{"key":"ref28","article-title":"Gans trained by a two time-scale update rule converge to a local nash equilibrium","author":"heusel","year":"2017"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00252"},{"key":"ref27","doi-asserted-by":"crossref","DOI":"10.1145\/3272127.3275084","article-title":"Deep blending for free-viewpoint image-based rendering","author":"hedman","year":"2018","journal-title":"TOG"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01219-9_10"},{"key":"ref66","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR46437.2021.01026","article-title":"Trafficsim: Learning to simulate realistic multi-agent behaviors","author":"suo","year":"2021"},{"key":"ref29","article-title":"Learning Hierarchical Semantic Image Manipulation through Structured Representations","author":"hong","year":"2018"},{"key":"ref67","article-title":"Off-road lidar simulation using data driven terrain primitives","author":"tallavajhula","year":"2018","journal-title":"ICRA"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1111\/cgf.14022"},{"key":"ref69","article-title":"Layer-structured 3d scene inference via view synthesis","author":"tulsiani","year":"2018","journal-title":"ECCV"},{"key":"ref2","article-title":"Augmented Reality Meets Computer Vision : Efficient Data Generation for Urban Driving Scenes","author":"alhaija","year":"2017"},{"key":"ref1","year":"2020","journal-title":"Corona renderer"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.146"},{"key":"ref22","year":"0","journal-title":"Unreal Engine"},{"key":"ref21","article-title":"Relate: Physically plausible multi-object scene synthesis using structured latent spaces","author":"ehrhardt","year":"2020","journal-title":"NeurIPS"},{"key":"ref24","article-title":"Vid2game: Controllable characters extracted from real-world videos","author":"gafni","year":"2019"},{"key":"ref23","article-title":"Augmented LiDAR Simulator for Autonomous Driving","author":"fang","year":"2019"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1201\/9781315265285-22"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913491297"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00780"},{"key":"ref51","article-title":"World-consistent video-to-video synthesis","author":"mallya","year":"2020","journal-title":"ECCV"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46475-6_7"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00918"},{"key":"ref57","article-title":"Pointnet: Deep learning on point sets for 3d classification and segmentation","author":"qi","year":"2016"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00244"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.82"},{"key":"ref54","article-title":"Instagan: Instance-aware image-to-image translation","author":"mo","year":"2018"},{"key":"ref53","article-title":"Beyond grand theft auto v for training, testing and enhancing deep learning in self driving cars","author":"martinez","year":"2017"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01118"},{"key":"ref10","article-title":"Rethinking atrous convolution for semantic image segmentation","author":"chen","year":"2017"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.168"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00982"},{"key":"ref12","article-title":"Learning to predict 3d objects with an interpolation-based differentiable renderer","author":"chen","year":"2019","journal-title":"NIPS"},{"key":"ref13","article-title":"Monocular Neural Image Based Rendering with Continuous View Control","author":"chen","year":"2019"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.01012"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/882262.882298"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.244"},{"key":"ref16","article-title":"Dovenet: Deep image harmonization via domain verification","author":"cong","year":"2020","journal-title":"CVPR"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46493-0_18"},{"key":"ref17","article-title":"Pybullet, a python module for physics simulation for games, robotics and machine learning","author":"coumans","year":"2016","journal-title":"Github Repository"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/280814.280864"},{"key":"ref83","article-title":"Visual Object Networks: Image Generation with Disen-tangled 3D Representation","author":"zhu","year":"2018"},{"key":"ref19","article-title":"Carla: An open urban driving simulator","author":"dosovitskiy","year":"2017","journal-title":"Conference on Robot Learning"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.660"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.2966414"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-018-1070-x"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460875"},{"key":"ref5","article-title":"Cut-and-paste neural rendering","author":"bhattad","year":"2020"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00603"},{"key":"ref7","article-title":"nuscenes: A multimodal dataset for autonomous driving","author":"caesar","year":"2019"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00985"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00895"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.aaw0863"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01030"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01157"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01265"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00375"},{"key":"ref41","article-title":"Ai2-thor: An interactive 3d environment for visual ai","author":"kolve","year":"2017"},{"key":"ref44","article-title":"Context-aware synthesis and placement of object instances","author":"lee","year":"2018","journal-title":"NIPS"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1145\/1276377.1276381"}],"event":{"name":"2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","location":"Nashville, TN, USA","start":{"date-parts":[[2021,6,20]]},"end":{"date-parts":[[2021,6,25]]}},"container-title":["2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9577055\/9577056\/09578059.pdf?arnumber=9578059","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,12]],"date-time":"2023-11-12T05:06:08Z","timestamp":1699765568000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9578059\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,6]]},"references-count":83,"URL":"https:\/\/doi.org\/10.1109\/cvpr46437.2021.00715","relation":{},"subject":[],"published":{"date-parts":[[2021,6]]}}}