{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,19]],"date-time":"2026-06-19T11:54:25Z","timestamp":1781870065882,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":21,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,6,22]],"date-time":"2026-06-22T00:00:00Z","timestamp":1782086400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,6,22]]},"DOI":"10.1145\/3744256.3812555","type":"proceedings-article","created":{"date-parts":[[2026,6,19]],"date-time":"2026-06-19T11:01:41Z","timestamp":1781866901000},"page":"126-130","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Do Large Multimodal Models Understand Construction Drawings? An Evaluation of Visually Grounded Workflows"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-7352-4653","authenticated-orcid":false,"given":"Ryan","family":"Dubois","sequence":"first","affiliation":[{"name":"Mechanical Engineering \/ Building Energy Research Laboratory, Columbia University, New York, NY, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7610-8841","authenticated-orcid":false,"given":"Bianca","family":"Howard","sequence":"additional","affiliation":[{"name":"Mechanical Engineering \/ Building Energy Research Laboratory, Columbia University, New York, NY, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,22]]},"reference":[{"key":"e_1_3_3_1_2_2","unstructured":"2024. Hello GPT-4o. https:\/\/openai.com\/index\/hello-gpt-4o\/"},{"key":"e_1_3_3_1_3_2","unstructured":"2025. Gemini 3 Pro. https:\/\/deepmind.google\/models\/gemini\/pro\/"},{"key":"e_1_3_3_1_4_2","unstructured":"2025. Introducing the File Search Tool in Gemini API. https:\/\/blog.google\/innovation-and-ai\/technology\/developers-tools\/file-search-gemini-api\/"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"publisher","unstructured":"Adamu Ali-Gombe and Eyad Elyan. 2019. MFC-GAN: Class-imbalanced dataset classification using Multiple Fake Class Generative Adversarial Network. Neurocomputing 361 (Oct. 2019) 212\u2013221. 10.1016\/j.neucom.2019.06.043","DOI":"10.1016\/j.neucom.2019.06.043"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","unstructured":"Oswaldo Domingo\u00a0Hernandez Bueno Kevin Smith Lasse Hamborg and Christian\u00a0Anker Hviid. 2025. Using Large Multimodal Models (LMM) to digitalize scanned HVAC Schematics into Metadata Schemas for Buildings. 10.26868\/25222708.2025.1939","DOI":"10.26868\/25222708.2025.1939"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","unstructured":"Eyad Elyan Laura Jamieson and Adamu Ali-Gombe. 2020. Deep learning for symbols detection and classification in engineering drawings. Neural Networks 129 (Sept. 2020) 91\u2013102. 10.1016\/j.neunet.2020.05.025","DOI":"10.1016\/j.neunet.2020.05.025"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"publisher","unstructured":"Chin-Shyurng Fahn Jhing-Fa Wang and Jau-Yien Lee. 1988. A topology-based component extractor for understanding electronic circuit diagrams. Computer Vision Graphics and Image Processing 44 2 (Nov. 1988) 119\u2013138. 10.1016\/S0734-189X(88)80001-X","DOI":"10.1016\/S0734-189X(88)80001-X"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.1201\/9781003354222-44"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"publisher","unstructured":"Frans C.\u00a0A Groen Arthur\u00a0C Sanderson and John\u00a0F Schlag. 1985. Symbol recognition in electrical diagrams using probabilistic graph matching. Pattern Recognition Letters 3 5 (Sept. 1985) 343\u2013350. 10.1016\/0167-8655(85)90066-2","DOI":"10.1016\/0167-8655(85)90066-2"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"publisher","unstructured":"Laura Jamieson Carlos Francisco Moreno-Garc\u00eda and Eyad Elyan. 2024. A review of deep learning methods for digitisation of complex documents and engineering diagrams. Artif Intell Rev 57 6 (May 2024) 136. 10.1007\/s10462-024-10779-2","DOI":"10.1007\/s10462-024-10779-2"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1109\/DSAA65442.2025.11248012"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","unstructured":"Feng Li Renrui Zhang Hao Zhang Yuanhan Zhang Bo Li Wei Li Zejun Ma and Chunyuan Li. 2024. LLaVA-NeXT-Interleave: Tackling Multi-image Video and 3D in Large Multimodal Models. 10.48550\/arXiv.2407.07895arXiv:https:\/\/arXiv.org\/abs\/2407.07895 [cs] Status: Accepted to ICLR 2025.","DOI":"10.48550\/arXiv.2407.07895"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","unstructured":"Zhang Li Biao Yang Qiang Liu Zhiyin Ma Shuo Zhang Jingxu Yang Yabo Sun Yuliang Liu and Xiang Bai. 2024. Monkey: Image Resolution and Text Label Are Important Things for Large Multi-modal Models. 10.48550\/arXiv.2311.06607arXiv:https:\/\/arXiv.org\/abs\/2311.06607 [cs] Status: Accepted to CVPR 2024.","DOI":"10.48550\/arXiv.2311.06607"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"publisher","unstructured":"Nelson\u00a0F. Liu Kevin Lin John Hewitt Ashwin Paranjape Michele Bevilacqua Fabio Petroni and Percy Liang. 2024. Lost in the Middle: How Language Models Use Long Contexts. Transactions of the Association for Computational Linguistics 12 (Feb. 2024) 157\u2013173. 10.1162\/tacl_a_00638","DOI":"10.1162\/tacl_a_00638"},{"key":"e_1_3_3_1_16_2","first-page":"46534","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Madaan Aman","year":"2023","unstructured":"Aman Madaan, Niket Tandon, Prakhar Gupta, Skyler Hallinan, Luyu Gao, Sarah Wiegreffe, Uri Alon, Nouha Dziri, Shrimai Prabhumoye, Yiming Yang, Shashank Gupta, Bodhisattwa\u00a0Prasad Majumder, Katherine Hermann, Sean Welleck, Amir Yazdanbakhsh, and Peter Clark. 2023. Self-Refine: Iterative Refinement with Self-Feedback. In Advances in Neural Information Processing Systems , A.\u00a0Oh, T.\u00a0Naumann, A.\u00a0Globerson, K.\u00a0Saenko, M.\u00a0Hardt, and S.\u00a0Levine (Eds.), Vol.\u00a036. Curran Associates, Inc., 46534\u201346594. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2023\/file\/91edff07232fb1b55a505a9e9f6c0ff3-Paper-Conference.pdf"},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"crossref","unstructured":"Trang\u00a0M. Nguyen Long\u00a0Van Pham Chien\u00a0Chu Nguyen and Vinh\u00a0Van Nguyen. 2026. Object Detection and Text Recognition in Large-scale Technical Drawings. 612\u2013619. https:\/\/www.scitepress.org\/Link.aspx?doi=10.5220\/0010314406120619","DOI":"10.5220\/0010314406120619"},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN52387.2021.9534122"},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"publisher","unstructured":"Beate Scheibel Juergen Mangler and Stefanie Rinderle-Ma. 2021. Extraction of dimension requirements from engineering drawings for supporting quality control in production processes. Computers in Industry 129 (Aug. 2021) 103442. 10.1016\/j.compind.2021.103442","DOI":"10.1016\/j.compind.2021.103442"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","unstructured":"Hanlong Wan Jian Zhang Yan Chen Weili Xu and Fan Feng. 2025. Exploring Gen-AI applications in building research and industry: A review. Build. Simul. 18 6 (June 2025) 1251\u20131273. 10.1007\/s12273-025-1279-x","DOI":"10.1007\/s12273-025-1279-x"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","DOI":"10.52202\/079017-2400"},{"key":"e_1_3_3_1_22_2","volume-title":"Advances in Neural Information Processing Systems (NeurIPS) workshop","author":"Zhang Yuyou","year":"2025","unstructured":"Yuyou Zhang, Radu Corcodel, Chori Hori, Anoop Cherian, and Ding Zhao. 2025. AxisBench: What Can Go Wrong in VLMs\u2019 Spatial Reasoning?. In Advances in Neural Information Processing Systems (NeurIPS) workshop. https:\/\/www.merl.com\/publications\/TR2025-168"}],"event":{"name":"BuildSys '26: The 13th ACM International Conference on Systems for Energy-Efficient Buildings, Cities, and Transportation","location":"Banff Canada","acronym":"BuildSys '26","sponsor":["SIGEnergy ACM Special Interest Group on Energy Systems and Informatics"]},"container-title":["Proceedings of the 13th ACM International Conference on Systems for Energy-Efficient Buildings, Cities, and Transportation"],"original-title":[],"deposited":{"date-parts":[[2026,6,19]],"date-time":"2026-06-19T11:31:06Z","timestamp":1781868666000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3744256.3812555"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6,22]]},"references-count":21,"alternative-id":["10.1145\/3744256.3812555","10.1145\/3744256"],"URL":"https:\/\/doi.org\/10.1145\/3744256.3812555","relation":{},"subject":[],"published":{"date-parts":[[2026,6,22]]},"assertion":[{"value":"2026-06-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}