{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T14:41:45Z","timestamp":1742913705149,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":40,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819720941"},{"type":"electronic","value":"9789819720958"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-97-2095-8_3","type":"book-chapter","created":{"date-parts":[[2024,3,29]],"date-time":"2024-03-29T13:01:41Z","timestamp":1711717301000},"page":"42-58","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Leveraging Panoptic Prior for\u00a03D Zero-Shot Semantic Understanding Within Language Embedded Radiance Fields"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-3572-060X","authenticated-orcid":false,"given":"Yuzhou","family":"Ji","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9346-1196","authenticated-orcid":false,"given":"Xin","family":"Tan","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0006-0835-7153","authenticated-orcid":false,"given":"He","family":"Zhu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0008-3209-1281","authenticated-orcid":false,"given":"Wuyi","family":"Liu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0004-6511-3599","authenticated-orcid":false,"given":"Jiachen","family":"Xu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6945-7437","authenticated-orcid":false,"given":"Yuan","family":"Xie","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1653-4341","authenticated-orcid":false,"given":"Lizhuang","family":"Ma","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,3,30]]},"reference":[{"key":"3_CR1","doi-asserted-by":"crossref","unstructured":"Afham, M., Dissanayake, I., Dissanayake, D., Dharmasiri, A., Thilakarathna, K., Rodrigo, R.: Crosspoint: self-supervised cross-modal contrastive learning for 3d point cloud understanding. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 9892\u20139902 (2022). https:\/\/api.semanticscholar.org\/CorpusID:247187696","DOI":"10.1109\/CVPR52688.2022.00967"},{"key":"3_CR2","doi-asserted-by":"crossref","unstructured":"Barron, J.T., Mildenhall, B., Tancik, M., Hedman, P., Martin-Brualla, R., Srinivasan, P.P.: Mip-NeRF: a multiscale representation for anti-aliasing neural radiance fields. In: 2021 IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 5835\u20135844 (2021). https:\/\/api.semanticscholar.org\/CorpusID:232352655","DOI":"10.1109\/ICCV48922.2021.00580"},{"key":"3_CR3","unstructured":"Brock, A., Donahue, J., Simonyan, K.: Large scale GAN training for high fidelity natural image synthesis. ArXiv abs\/1809.11096 (2018). https:\/\/api.semanticscholar.org\/CorpusID:52889459"},{"key":"3_CR4","doi-asserted-by":"crossref","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. ArXiv abs\/2005.12872 (2020). https:\/\/api.semanticscholar.org\/CorpusID:218889832","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"3_CR5","doi-asserted-by":"crossref","unstructured":"Caron, M., Touvron, H., Misra, I., J\u00e9gou, H., Mairal, J., Bojanowski, P., Joulin, A.: Emerging properties in self-supervised vision transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 9650\u20139660, October 2021","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"3_CR6","doi-asserted-by":"crossref","unstructured":"Chen, R., et al.: Bridging language and geometric primitives for zero-shot point cloud segmentation. In: Proceedings of the 31st ACM International Conference on Multimedia, pp. 5380\u20135388 (2023)","DOI":"10.1145\/3581783.3612409"},{"key":"3_CR7","doi-asserted-by":"crossref","unstructured":"Cheng, B., et al.: Panoptic-DeepLab: a simple, strong, and fast baseline for bottom-up panoptic segmentation. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 12472\u201312482 (2019). https:\/\/api.semanticscholar.org\/CorpusID:208248153","DOI":"10.1109\/CVPR42600.2020.01249"},{"key":"3_CR8","doi-asserted-by":"crossref","unstructured":"Cheng, B., Misra, I., Schwing, A.G., Kirillov, A., Girdhar, R.: Masked-attention mask transformer for universal image segmentation. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 1280\u20131289 (2021). https:\/\/api.semanticscholar.org\/CorpusID:244799297","DOI":"10.1109\/CVPR52688.2022.00135"},{"key":"3_CR9","unstructured":"Cheng, B., Schwing, A.G., Kirillov, A.: Per-pixel classification is not all you need for semantic segmentation. In: Neural Information Processing Systems (2021). https:\/\/api.semanticscholar.org\/CorpusID:235829267"},{"key":"3_CR10","doi-asserted-by":"crossref","unstructured":"Cheraghian, A., Rahman, S., Campbell, D., Petersson, L.: Mitigating the hubness problem for zero-shot learning of 3d objects. In: British Machine Vision Conference (2019). https:\/\/api.semanticscholar.org\/CorpusID:196622565","DOI":"10.23919\/MVA.2019.8758063"},{"key":"3_CR11","doi-asserted-by":"crossref","unstructured":"Cheraghian, A., Rahman, S., Chowdhury, T.F., Campbell, D., Petersson, L.: Zero-shot learning on 3d point cloud objects and beyond. Int. J. Comput. Vis. 130, 2364\u20132384 (2021). https:\/\/api.semanticscholar.org\/CorpusID:233210533","DOI":"10.1007\/s11263-022-01650-4"},{"key":"3_CR12","doi-asserted-by":"crossref","unstructured":"Crowson, K., et al.: VQGAN-CLIP: open domain image generation and editing with natural language guidance. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13697, pp. 88\u2013105. Springer, Cham (2022). https:\/\/api.semanticscholar.org\/CorpusID:248239727","DOI":"10.1007\/978-3-031-19836-6_6"},{"key":"3_CR13","unstructured":"Dhariwal, P., Nichol, A.: Diffusion models beat GANs on image synthesis. ArXiv abs\/2105.05233 (2021). https:\/\/api.semanticscholar.org\/CorpusID:234357997"},{"key":"3_CR14","doi-asserted-by":"crossref","unstructured":"Du, Y., Wei, F., Zhang, Z., Shi, M., Gao, Y., Li, G.C.: Learning to prompt for open-vocabulary object detection with vision-language model. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 14064\u201314073 (2022). https:\/\/api.semanticscholar.org\/CorpusID:247778949","DOI":"10.1109\/CVPR52688.2022.01369"},{"key":"3_CR15","doi-asserted-by":"crossref","unstructured":"Garbin, S.J., Kowalski, M., Johnson, M., Shotton, J., Valentin, J.P.C.: FastNeRF: high-fidelity neural rendering at 200FPS. In: 2021 IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 14326\u201314335 (2021). https:\/\/api.semanticscholar.org\/CorpusID:232270138","DOI":"10.1109\/ICCV48922.2021.01408"},{"key":"3_CR16","unstructured":"Ge, Y., Xu, J., Zhao, B.N., Joshi, N., Itti, L., Vineet, V.: Beyond generation: harnessing text to image models for object detection and segmentation (2023)"},{"key":"3_CR17","unstructured":"Gu, X., Lin, T.Y., Kuo, W., Cui, Y.: Open-vocabulary object detection via vision and language knowledge distillation. In: International Conference on Learning Representations (2021). https:\/\/api.semanticscholar.org\/CorpusID:238744187"},{"key":"3_CR18","doi-asserted-by":"crossref","unstructured":"Kerr, J., Kim, C.M., Goldberg, K., Kanazawa, A., Tancik, M.: LERF: language embedded radiance fields (2023)","DOI":"10.1109\/ICCV51070.2023.01807"},{"key":"3_CR19","unstructured":"Kobayashi, S., Matsumoto, E., Sitzmann, V.: Decomposing nerf for editing via feature field distillation (2022)"},{"key":"3_CR20","doi-asserted-by":"crossref","unstructured":"Kundu, A., et al.: Panoptic neural fields: a semantic object-aware neural scene representation. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 12861\u201312871 (2022). https:\/\/api.semanticscholar.org\/CorpusID:248572506","DOI":"10.1109\/CVPR52688.2022.01253"},{"key":"3_CR21","doi-asserted-by":"crossref","unstructured":"Li, D., et al.: BigDatasetGAN: synthesizing imageNet with pixel-wise annotations. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 21298\u201321308 (2022). https:\/\/api.semanticscholar.org\/CorpusID:245906460","DOI":"10.1109\/CVPR52688.2022.02064"},{"key":"3_CR22","unstructured":"Liu, K., et al.: 3D open-vocabulary segmentation with foundation models (2023)"},{"key":"3_CR23","unstructured":"Liu, Y.C., et al.: Learning from 2d: contrastive pixel-to-point knowledge transfer for 3d pretraining. arXiv preprint arXiv:2104.04687 (2021)"},{"key":"3_CR24","unstructured":"Lu, Y.T., Liu, S., Thiagarajan, J.J., Sakla, W.A., Anirudh, R.: On-the-fly object detection using styleGAN with clip guidance. ArXiv abs\/2210.16742 (2022). https:\/\/api.semanticscholar.org\/CorpusID:253237985"},{"key":"3_CR25","doi-asserted-by":"crossref","unstructured":"Mildenhall, B., Srinivasan, P.P., Tancik, M., Barron, J.T., Ramamoorthi, R., Ng, R.: NERF: Representing scenes as neural radiance fields for view synthesis (2020)","DOI":"10.1007\/978-3-030-58452-8_24"},{"key":"3_CR26","unstructured":"Minderer, M., et al.: Simple open-vocabulary object detection with vision transformers. ArXiv abs\/2205.06230 (2022). https:\/\/api.semanticscholar.org\/CorpusID:248721818"},{"key":"3_CR27","unstructured":"Mokady, R.: ClipCap: CLIP prefix for image captioning. ArXiv abs\/2111.09734 (2021). https:\/\/api.semanticscholar.org\/CorpusID:244346239"},{"key":"3_CR28","doi-asserted-by":"publisher","unstructured":"M\u00fcller, T., Evans, A., Schied, C., Keller, A.: Instant neural graphics primitives with a multiresolution hash encoding. ACM Trans. Graph. 41(4), 1\u201315 (2022). https:\/\/doi.org\/10.1145\/3528223.3530127","DOI":"10.1145\/3528223.3530127"},{"key":"3_CR29","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: Meila, M., Zhang, T. (eds.) Proceedings of the 38th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol. 139, pp. 8748\u20138763. PMLR, 18\u201324 July 2021. https:\/\/proceedings.mlr.press\/v139\/radford21a.html"},{"key":"3_CR30","doi-asserted-by":"crossref","unstructured":"Sautier, C., Puy, G., Gidaris, S., Boulch, A., Bursuc, A., Marlet, R.: Image-to-lidar self-supervised distillation for autonomous driving data. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 9881\u20139891 (2022). https:\/\/api.semanticscholar.org\/CorpusID:247793124","DOI":"10.1109\/CVPR52688.2022.00966"},{"key":"3_CR31","doi-asserted-by":"crossref","unstructured":"Siddiqui, Y., Porzi, L., Bul\u00f3, S.R., M\u00fcller, N., Nie\u00dfner, M., Dai, A., Kontschieder, P.: Panoptic lifting for 3d scene understanding with neural fields (2022)","DOI":"10.1109\/CVPR52729.2023.00873"},{"key":"3_CR32","unstructured":"Song, J., Meng, C., Ermon, S.: Denoising diffusion implicit models. ArXiv abs\/2010.02502 (2020). https:\/\/api.semanticscholar.org\/CorpusID:222140788"},{"key":"3_CR33","doi-asserted-by":"crossref","unstructured":"Tancik, M., et al.: Nerfstudio: a modular framework for neural radiance field development. In: ACM SIGGRAPH 2023 Conference Proceedings (2023). https:\/\/api.semanticscholar.org\/CorpusID:256662551","DOI":"10.1145\/3588432.3591516"},{"key":"3_CR34","doi-asserted-by":"crossref","unstructured":"Teng, Z., Duan, Y., Liu, Y., Zhang, B., Fan, J.: Global to local: Clip-LSTM-based object detection from remote sensing images. IEEE Trans. Geosci. Remote Sens. 60, 1\u201313 (2022). https:\/\/api.semanticscholar.org\/CorpusID:234104424","DOI":"10.1109\/TGRS.2021.3064840"},{"key":"3_CR35","doi-asserted-by":"crossref","unstructured":"Wang, C., Chai, M., He, M., Chen, D., Liao, J.: CLIP-NERF: text-and-image driven manipulation of neural radiance fields (2022)","DOI":"10.1109\/CVPR52688.2022.00381"},{"key":"3_CR36","doi-asserted-by":"crossref","unstructured":"Xu, J., Liu, S., Vahdat, A., Byeon, W., Wang, X., Mello, S.D.: Open-vocabulary panoptic segmentation with text-to-image diffusion models (2023)","DOI":"10.1109\/CVPR52729.2023.00289"},{"key":"3_CR37","doi-asserted-by":"crossref","unstructured":"Yu, Q., et al.: CMT-DeepLab: clustering mask transformers for panoptic segmentation. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2550\u20132560 (2022). https:\/\/api.semanticscholar.org\/CorpusID:249890221","DOI":"10.1109\/CVPR52688.2022.00259"},{"key":"3_CR38","unstructured":"Zhang, K., Riegler, G., Snavely, N., Koltun, V.: NeRF++: analyzing and improving neural radiance fields (2020)"},{"key":"3_CR39","doi-asserted-by":"crossref","unstructured":"Zhang, Y., et al.: DatasetGAN: efficient labeled data factory with minimal human effort. In: 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 10140\u201310150 (2021). https:\/\/api.semanticscholar.org\/CorpusID:233231510","DOI":"10.1109\/CVPR46437.2021.01001"},{"key":"3_CR40","doi-asserted-by":"crossref","unstructured":"Zhi, S., Laidlow, T., Leutenegger, S., Davison, A.J.: In-place scene labelling and understanding with implicit scene representation (2021)","DOI":"10.1109\/ICCV48922.2021.01554"}],"container-title":["Lecture Notes in Computer Science","Computational Visual Media"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-2095-8_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,29]],"date-time":"2024-03-29T13:03:27Z","timestamp":1711717407000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-2095-8_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9789819720941","9789819720958"],"references-count":40,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-2095-8_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"30 March 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"CVM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Computational Visual Media","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Wellington","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"New Zealand","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10 April 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 April 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"cvm2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CVM submission system","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"212","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"34","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"16% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}