{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T05:04:45Z","timestamp":1750309485392,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":26,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,9,14]],"date-time":"2024-09-14T00:00:00Z","timestamp":1726272000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,9,14]]},"DOI":"10.1145\/3697355.3697385","type":"proceedings-article","created":{"date-parts":[[2024,12,13]],"date-time":"2024-12-13T04:52:23Z","timestamp":1734065543000},"page":"183-191","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["A Deep Learning-based Auditing System for BIM Components Library"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-7477-6575","authenticated-orcid":false,"given":"Li","family":"Yang","sequence":"first","affiliation":[{"name":"China Communications Construction Company Limited, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-8453-2525","authenticated-orcid":false,"given":"Jing","family":"Wang","sequence":"additional","affiliation":[{"name":"China Communications Information&amp;Technology Group, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-6997-0231","authenticated-orcid":false,"given":"Haifeng","family":"Jiang","sequence":"additional","affiliation":[{"name":"China Communications Information&amp;Technology Group, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-5044-7470","authenticated-orcid":false,"given":"Yang","family":"Mo","sequence":"additional","affiliation":[{"name":"China Communications Information&amp;Technology Group, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-8257-3938","authenticated-orcid":false,"given":"Hangming","family":"Fu","sequence":"additional","affiliation":[{"name":"China Communications Information&amp;Technology Group, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-3604-4912","authenticated-orcid":false,"given":"Yexing","family":"Zhang","sequence":"additional","affiliation":[{"name":"China Communications Information&amp;Technology Group, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2024,12,12]]},"reference":[{"key":"e_1_3_3_1_1_2","volume-title":"Adv. in Neural Inf. Process. Syst.","volume":"25","author":"Krizhevsky A.","year":"2012","unstructured":"A. Krizhevsky, I. Sutskever, E. Hinton, \u201cImagenet classification with deep convolutional neural networks,\u201d Adv. in Neural Inf. Process. Syst., 2012, vol. 25."},{"key":"e_1_3_3_1_2_2","unstructured":"K. Simonyan A. Zisserman \u201dVery deep convolutional networks for large-scale image recognition \u201d unpublished."},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_3_1_5_2","first-page":"4700","volume-title":"Proc. of the IEEE Conf. on Comput. Vision and Pattern Recognit.","author":"Huang G.","year":"2017","unstructured":"G. Huang, Z. Liu, L. Maaten, K. Q. Weinberger, \u201cDensely connected convolutional network,\u201d Proc. of the IEEE Conf. on Comput. Vision and Pattern Recognit., 2017, pp. 4700-4708."},{"key":"e_1_3_3_1_6_2","first-page":"1492","volume-title":"Proc. of the IEEE Conf. on Comput. Vision and Pattern Recognit.","author":"Xie S.","year":"2017","unstructured":"S. Xie, R. Girshick, P. Doll\u00e1r, Z Tu, K. He, \u201cAggregated residual transformations for deep neural networks,\u201d Proc. of the IEEE Conf. on Comput. Vision and Pattern Recognit., 2017, pp. 1492-1500."},{"key":"e_1_3_3_1_7_2","unstructured":"A. G. Howard M. Zhu B. Chen D. Kalenichenko W. Wang et al \u201cMobilenets: Efficient convolutional neural networks for mobile vision applications \u201d unpublished."},{"key":"e_1_3_3_1_8_2","first-page":"6105","volume-title":"Int. Conf. on Mach. Learn., PMLR","author":"Tan M.","year":"2019","unstructured":"M. Tan, W. Le, \u201cEfficientnet: Rethinking model scaling for convolutional neural networks,\u201d Int. Conf. on Mach. Learn., PMLR, 2019, pp. 6105-6114."},{"key":"e_1_3_3_1_9_2","volume-title":"Adv. in Neural Inf. Process. Syst.","volume":"30","author":"Vaswani A.","year":"2017","unstructured":"A. Vaswani, N. Shazeer, N. Parmar, J. Uszkoreit, L. Jones, et al, \u201cAttention is all you need,\u201d Adv. in Neural Inf. Process. Syst., 2017, vol. 30."},{"key":"e_1_3_3_1_10_2","unstructured":"A. Dosovitskiy L. Beyer A. Kolesnikov D. Weissenborn X. Zhai et al. \u201cAn image is worth 16x16 words: Transformers for image recognition at scale \u201d unpublished."},{"key":"e_1_3_3_1_11_2","first-page":"213","volume-title":"Conf. on Comput. Vision","author":"Carion N.","year":"2020","unstructured":"N. Carion, F. Massa, G. Synnaeve, N. Usunier, A. Kirillov, et al, \u201cEnd-to-end object detection with transformers,\u201c Eur. Conf. on Comput. Vision, Cham: Springer International Publishing, 2020, pp. 213-229."},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1989.1.4.541"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","DOI":"10.1207\/s15516709cog1402_1"},{"key":"e_1_3_3_1_15_2","unstructured":"A. Zadeh M. Chen S. Poria E. Cambria L. Morency \u201cTensor fusion network for multimodal sentiment analysis \u201d unpublished."},{"key":"e_1_3_3_1_16_2","unstructured":"J. Devlin M. W. Chang K. Lee K. Toutanova \u201cBert: Pre-training of deep bidirectional transformers for language understanding \u201d unpublished."},{"key":"e_1_3_3_1_17_2","volume-title":"Improving language understanding by generative pre-training","author":"A. Radford A, K.","year":"2018","unstructured":"A. Radford A, K. Narasimhan K, T. Salimans T, I. Sutskever, \u201cImproving language understanding by generative pre-training,\u201d 2018."},{"key":"e_1_3_3_1_18_2","volume-title":"Adv. in Neural Inf. Process. Syst.","volume":"32","author":"Lu J.","year":"2019","unstructured":"J. Lu, D. Batra, D. Parikh, S. Lee, \u201cVilbert: Pretraining task-agnostic visiolinguistic representations for vision-and-language tasks,\u201d Adv. in Neural Inf. Process. Syst., 2019, vol. 32."},{"key":"e_1_3_3_1_19_2","unstructured":"H. Tan M. Bansal \u201cLxmert: Learning cross-modality encoder representations from transformers \u201d unpublished."},{"key":"e_1_3_3_1_20_2","first-page":"8748","volume-title":"Int. Conf. on Mach. Learn., PMLR","author":"Radford A.","year":"2021","unstructured":"A. Radford, J. W. Kim, C. Hallacy, A. Ramesh, G. Goh, et al, \u201cLearning transferable visual models from natural language supervision,\u201d Int. Conf. on Mach. Learn., PMLR, 2021, pp. 8748-8763."},{"key":"e_1_3_3_1_21_2","first-page":"12888","volume-title":"Int. Conf. on Mach. Learn., PMLR","author":"Li J.","year":"2022","unstructured":"J. Li, D. Li, C. Xiong, S, Hoi, \u201dBlip: Bootstrapping language-image pre-training for unified vision-language understanding and generation,\u201d Int. Conf. on Mach. Learn., PMLR, 2022, pp. 12888-12900."},{"key":"e_1_3_3_1_22_2","unstructured":"J. Li D. Li S. Savarese S. Hoi \u201cBlip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models \u201d unpublished."},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2022\/480"},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.7005"},{"key":"e_1_3_3_1_25_2","unstructured":"Y. Fang Q. Sun X. Wang T. Huang X. Wang et al \u201cEva-02: A visual representation for neon genesis \u201d unpublished."},{"key":"e_1_3_3_1_26_2","first-page":"9694","article-title":"Align before fuse: Vision and language representation learning with momentum distillation","volume":"34","author":"Li J.","year":"2021","unstructured":"J. Li, R. Selvaraju, A. Gotmare, S. Joty, C. Xiong, et al, \u201cAlign before fuse: Vision and language representation learning with momentum distillation,\u201d Adv. in Neural Inf. Process. Syst., vol. 34, pp. 9694-9705, 2021.","journal-title":"Adv. in Neural Inf. Process. Syst."}],"event":{"name":"BDIOT 2024: 2024 8th International Conference on Big Data and Internet of Things","acronym":"BDIOT 2024","location":"Macau China"},"container-title":["Proceedings of the 2024 8th International Conference on Big Data and Internet of Things"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3697355.3697385","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3697355.3697385","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:34Z","timestamp":1750295854000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3697355.3697385"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,14]]},"references-count":26,"alternative-id":["10.1145\/3697355.3697385","10.1145\/3697355"],"URL":"https:\/\/doi.org\/10.1145\/3697355.3697385","relation":{},"subject":[],"published":{"date-parts":[[2024,9,14]]},"assertion":[{"value":"2024-12-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}