{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T09:25:27Z","timestamp":1780392327693,"version":"3.54.1"},"publisher-location":"Cham","reference-count":38,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031730290","type":"print"},{"value":"9783031730306","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,24]],"date-time":"2024-11-24T00:00:00Z","timestamp":1732406400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,24]],"date-time":"2024-11-24T00:00:00Z","timestamp":1732406400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73030-6_14","type":"book-chapter","created":{"date-parts":[[2024,11,25]],"date-time":"2024-11-25T16:57:46Z","timestamp":1732553866000},"page":"247-263","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":27,"title":["SceneScript: Reconstructing Scenes with an Autoregressive Structured Language Model"],"prefix":"10.1007","author":[{"given":"Armen","family":"Avetisyan","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Christopher","family":"Xie","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Henry","family":"Howard-Jenkins","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Tsun-Yi","family":"Yang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Samir","family":"Aroudj","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Suvam","family":"Patra","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Fuyang","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Duncan","family":"Frost","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Luke","family":"Holland","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Campbell","family":"Orme","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jakob","family":"Engel","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Edward","family":"Miller","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Richard","family":"Newcombe","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Vasileios","family":"Balntas","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,11,24]]},"reference":[{"key":"14_CR1","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"596","DOI":"10.1007\/978-3-030-58542-6_36","volume-title":"Computer Vision \u2013 ECCV 2020","author":"A Avetisyan","year":"2020","unstructured":"Avetisyan, A., Khanova, T., Choy, C., Dash, D., Dai, A., Nie\u00dfner, M.: SceneCAD: predicting object alignments and layouts in RGB-D scans. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12367, pp. 596\u2013612. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58542-6_36"},{"key":"14_CR2","doi-asserted-by":"crossref","unstructured":"Brazil, G., Kumar, A., Straub, J., Ravi, N., Johnson, J., Gkioxari, G.: Omni3D: a large benchmark and model for 3D object detection in the wild. In: CVPR. IEEE, Vancouver (2023)","DOI":"10.1109\/CVPR52729.2023.01264"},{"key":"14_CR3","doi-asserted-by":"crossref","unstructured":"Cabral, R., Furukawa, Y.: Piecewise planar and compact floorplan reconstruction from images. In: 2014 IEEE Conference on Computer Vision and Pattern Recognition (2014)","DOI":"10.1109\/CVPR.2014.546"},{"key":"14_CR4","unstructured":"Carlier, A., Danelljan, M., Alahi, A., Timofte, R.: Deepsvg: a hierarchical generative network for vector graphics animation. Adv. Neural Inf. Process. Syst. (2020)"},{"key":"14_CR5","doi-asserted-by":"crossref","unstructured":"Chen, J., Liu, C., Wu, J., Furukawa, Y.: Floor-sp: inverse cad for floorplans by sequential room-wise shortest path. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (2019)","DOI":"10.1109\/ICCV.2019.00275"},{"key":"14_CR6","unstructured":"Chen, T., Saxena, S., Li, L., Fleet, D.J., Hinton, G.: Pix2seq: a language modeling framework for object detection. In: International Conference on Learning Representations (ICLR) (2022)"},{"key":"14_CR7","doi-asserted-by":"crossref","unstructured":"Collins, J., et al.: Abo: Dataset and benchmarks for real-world 3d object understanding. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.02045"},{"key":"14_CR8","doi-asserted-by":"crossref","unstructured":"Dai, A., Chang, A.X., Savva, M., Halber, M., Funkhouser, T., Nie\u00dfner, M.: Scannet: richly-annotated 3d reconstructions of indoor scenes. In: Proceedings of Computer Vision and Pattern Recognition (CVPR). IEEE (2017)","DOI":"10.1109\/CVPR.2017.261"},{"key":"14_CR9","doi-asserted-by":"crossref","unstructured":"Furukawa, Y., Curless, B., Seitz, S.M., Szeliski, R.: Manhattan-world stereo. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition (2009)","DOI":"10.1109\/CVPRW.2009.5206867"},{"key":"14_CR10","doi-asserted-by":"crossref","unstructured":"Furukawa, Y., Curless, B., Seitz, S.M., Szeliski, R.: Reconstructing building interiors from images. In: 2009 IEEE 12th International Conference on Computer Vision (2009)","DOI":"10.1109\/ICCV.2009.5459145"},{"key":"14_CR11","first-page":"5885","volume":"34","author":"Y Ganin","year":"2021","unstructured":"Ganin, Y., Bartunov, S., Li, Y., Keller, E., Saliceti, S.: Computer-aided design as language. Adv. Neural. Inf. Process. Syst. 34, 5885\u20135897 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"14_CR12","doi-asserted-by":"crossref","unstructured":"Girshick, R.: Fast r-cnn. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1440\u20131448 (2015)","DOI":"10.1109\/ICCV.2015.169"},{"key":"14_CR13","doi-asserted-by":"crossref","unstructured":"Lee, C.Y., Badrinarayanan, V., Malisiewicz, T., Rabinovich, A.: Roomnet: end-to-end room layout estimation. In: Proceedings of the IEEE International Conference on Computer Vision (2017)","DOI":"10.1109\/ICCV.2017.521"},{"key":"14_CR14","unstructured":"Meta: Project aria (2022). https:\/\/projectaria.com\/. Accessed 30 Aug 2023"},{"issue":"1","key":"14_CR15","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1145\/3503250","volume":"65","author":"B Mildenhall","year":"2021","unstructured":"Mildenhall, B., Srinivasan, P.P., Tancik, M., Barron, J.T., Ramamoorthi, R., Ng, R.: Nerf: representing scenes as neural radiance fields for view synthesis. Commun. ACM 65(1), 99\u2013106 (2021)","journal-title":"Commun. ACM"},{"key":"14_CR16","doi-asserted-by":"crossref","unstructured":"Misra, I., Girdhar, R., Joulin, A.: An end-to-end transformer model for 3d object detection. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 2906\u20132917 (2021)","DOI":"10.1109\/ICCV48922.2021.00290"},{"key":"14_CR17","doi-asserted-by":"crossref","unstructured":"Murali, S., Speciale, P., Oswald, M.R., Pollefeys, M.: Indoor scan2bim: building information models of house interiors. In: 2017 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS) (2017)","DOI":"10.1109\/IROS.2017.8206513"},{"key":"14_CR18","unstructured":"Nash, C., Ganin, Y., Eslami, S.A., Battaglia, P.: Polygen: an autoregressive generative model of 3d meshes. In: International Conference on Machine Learning, pp. 7220\u20137229. PMLR (2020)"},{"key":"14_CR19","doi-asserted-by":"publisher","first-page":"251","DOI":"10.1016\/j.isprsjprs.2019.03.017","volume":"151","author":"S Ochmann","year":"2019","unstructured":"Ochmann, S., Vock, R., Klein, R.: Automatic reconstruction of fully volumetric 3d building models from oriented point clouds. ISPRS J. Photogramm. Remote. Sens. 151, 251\u2013262 (2019)","journal-title":"ISPRS J. Photogramm. Remote. Sens."},{"key":"14_CR20","unstructured":"OpenAI: Gpt-4 technical report (2023)"},{"key":"14_CR21","unstructured":"Para, W., et al.: Sketchgen: generating constrained cad sketches. Adv. Neural Inf. Process. Syst. (2021)"},{"key":"14_CR22","doi-asserted-by":"crossref","unstructured":"Park, J.J., Florence, P., Straub, J., Newcombe, R., Lovegrove, S.: Deepsdf: learning continuous signed distance functions for shape representation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2019)","DOI":"10.1109\/CVPR.2019.00025"},{"key":"14_CR23","doi-asserted-by":"crossref","unstructured":"Pintore, G., Agus, M., Gobbetti, E.: Atlantanet: inferring the 3d indoor layout from a single 360 image beyond the manhattan world assumption. In: Proceedings of the European Conference on Computer Vision (ECCV) (2020)","DOI":"10.1007\/978-3-030-58598-3_26"},{"key":"14_CR24","doi-asserted-by":"crossref","unstructured":"Rukhovich, D., Vorontsova, A., Konushin, A.: Imvoxelnet: image to voxels projection for monocular and multi-view general-purpose 3d object detection. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 2397\u20132406 (2022)","DOI":"10.1109\/WACV51458.2022.00133"},{"key":"14_CR25","unstructured":"Somasundaram, K., et al.: Project aria: a new tool for egocentric multi-modal ai research. arXiv preprint arXiv:2308.13561 (2023)"},{"key":"14_CR26","doi-asserted-by":"crossref","unstructured":"Tang, H., Liu, Z., Li, X., Lin, Y., Han, S.: TorchSparse: efficient point cloud inference engine. In: Conference on Machine Learning and Systems (MLSys) (2022)","DOI":"10.1109\/CVPRW59228.2023.00025"},{"key":"14_CR27","doi-asserted-by":"crossref","unstructured":"Tang, H., Liu, Z., Zhao, S., Lin, Y., Lin, J., Wang, H., Han, S.: Searching efficient 3D architectures with sparse point-voxel convolution. In: European Conference on Computer Vision (ECCV) (2020)","DOI":"10.1007\/978-3-030-58604-1_41"},{"key":"14_CR28","doi-asserted-by":"crossref","unstructured":"Tulsiani, S., Su, H., Guibas, L.J., Efros, A.A., Malik, J.: Learning shape abstractions by assembling volumetric primitives. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2017)","DOI":"10.1109\/CVPR.2017.160"},{"key":"14_CR29","doi-asserted-by":"publisher","unstructured":"Tyszkiewicz, M.J., Maninis, K.K., Popov, S., Ferrari, V.: Raytran: 3d pose estimation and shape reconstruction of multiple objects from videos with ray-traced transformers. In: Computer Vision\u2013ECCV 2022: 17th European Conference, Tel Aviv, Israel, 23\u201327 October 2022, Proceedings, Part X, pp. 211\u2013228. Springer, Heidelberg (2022). https:\/\/doi.org\/10.1007\/978-3-031-20080-9_13","DOI":"10.1007\/978-3-031-20080-9_13"},{"key":"14_CR30","unstructured":"Vaswani, A., et al.: Attention is all you need. In: NeurIPS, vol. 30 (2017)"},{"key":"14_CR31","doi-asserted-by":"crossref","unstructured":"Vu, T., Kim, K., Luu, T.M., Nguyen, X.T., Yoo, C.D.: Softgroup for 3d instance segmentation on 3d point clouds. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.00273"},{"key":"14_CR32","doi-asserted-by":"crossref","unstructured":"Wu, R., Xiao, C., Zheng, C.: Deepcad: a deep generative network for computer-aided design models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (2021)","DOI":"10.1109\/ICCV48922.2021.00670"},{"key":"14_CR33","unstructured":"Xu, X., Willis, K.D., Lambourne, J.G., Cheng, C.Y., Jayaraman, P.K., Furukawa, Y.: Skexgen: autoregressive generation of cad construction sequences with disentangled codebooks. In: International Conference on Machine Learning (ICML) (2022)"},{"key":"14_CR34","doi-asserted-by":"crossref","unstructured":"Xue, Y., et al.: Point2seq: detecting 3d objects as sequences. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8521\u20138530 (2022)","DOI":"10.1109\/CVPR52688.2022.00833"},{"issue":"4","key":"14_CR35","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3450626.3459873","volume":"40","author":"K Yang","year":"2021","unstructured":"Yang, K., Chen, X.: Unsupervised learning for cuboid shape abstraction via joint segmentation from point clouds. ACM Trans. Graph. (TOG) 40(4), 1\u201311 (2021)","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"14_CR36","doi-asserted-by":"crossref","unstructured":"Yin, T., Zhou, X., Krahenbuhl, P.: Center-based 3d object detection and tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2021)","DOI":"10.1109\/CVPR46437.2021.01161"},{"key":"14_CR37","doi-asserted-by":"crossref","unstructured":"Yue, Y., Kontogianni, T., Schindler, K., Engelmann, F.: Connecting the dots: floorplan reconstruction using two-level queries. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2023)","DOI":"10.1109\/CVPR52729.2023.00088"},{"key":"14_CR38","doi-asserted-by":"crossref","unstructured":"Zou, C., Colburn, A., Shan, Q., Hoiem, D.: Layoutnet: reconstructing the 3d room layout from a single rgb image. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (2018)","DOI":"10.1109\/CVPR.2018.00219"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73030-6_14","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,25]],"date-time":"2024-11-25T17:13:59Z","timestamp":1732554839000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73030-6_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,24]]},"ISBN":["9783031730290","9783031730306"],"references-count":38,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73030-6_14","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,24]]},"assertion":[{"value":"24 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}