{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T08:17:00Z","timestamp":1774685820266,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":66,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T00:00:00Z","timestamp":1733184000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"United States - Israel Binational Science Foundation (BSF)","award":["2022363"],"award-info":[{"award-number":["2022363"]}]},{"name":"National Science Foundation (NSF)","award":["2304481"],"award-info":[{"award-number":["2304481"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,12,3]]},"DOI":"10.1145\/3680528.3687605","type":"proceedings-article","created":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T03:14:37Z","timestamp":1733195677000},"page":"1-11","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["iSeg: Interactive 3D Segmentation via Interactive Attention"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4066-4293","authenticated-orcid":false,"given":"Itai","family":"Lang","sequence":"first","affiliation":[{"name":"University of Chicago, Chicago, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8850-7580","authenticated-orcid":false,"given":"Fei","family":"Xu","sequence":"additional","affiliation":[{"name":"University of Chicago, Chicago, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8771-1814","authenticated-orcid":false,"given":"Dale","family":"Decatur","sequence":"additional","affiliation":[{"name":"University of Chicago, Chicago, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-0785-020X","authenticated-orcid":false,"given":"Sudarshan","family":"Babu","sequence":"additional","affiliation":[{"name":"Toyota Technological Institute at Chicago, Chicago, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3214-3703","authenticated-orcid":false,"given":"Rana","family":"Hanocka","sequence":"additional","affiliation":[{"name":"University of Chicago, Chicago, United States of America"}]}],"member":"320","published-online":{"date-parts":[[2024,12,3]]},"reference":[{"key":"e_1_3_3_2_2_1","doi-asserted-by":"crossref","unstructured":"Ahmed Abdelreheem Abdelrahman Eldesokey Maks Ovsjanikov and Peter Wonka. 2023a. Zero-Shot 3D Shape Correspondence. SIGGRAPH Asia 2023 Conference Papers (2023).","DOI":"10.1145\/3610548.3618228"},{"key":"e_1_3_3_2_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01392"},{"key":"e_1_3_3_2_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/1186822.1073207"},{"key":"e_1_3_3_2_5_1","unstructured":"I. Armeni A. Sax A.\u00a0R. Zamir and S. Savarese. 2017. Joint 2D-3D-Semantic Data for Indoor Scene Understanding. ArXiv e-prints (Feb. 2017). arxiv:https:\/\/arXiv.org\/abs\/1702.01105\u00a0[cs.CV]"},{"key":"e_1_3_3_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2001.937505"},{"key":"e_1_3_3_2_7_1","unstructured":"Jiazhong Cen Zanwei Zhou Jiemin Fang Chen Yang Wei Shen Lingxi Xie Dongsheng Jiang Xiaopeng Zhang and Qi Tian. 2023. Segment Anything in 3D with NeRFs. Advances in Neural Information Processing Systems 36 (2023) 25971\u201325990."},{"key":"e_1_3_3_2_8_1","unstructured":"Angel\u00a0X Chang Thomas Funkhouser Leonidas Guibas Pat Hanrahan Qixing Huang Zimo Li Silvio Savarese Manolis Savva Shuran Song Hao Su et\u00a0al. 2015. Shapenet: An information-rich 3d model repository. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1512.03012 (2015)."},{"key":"e_1_3_3_2_9_1","unstructured":"Xiaokang Chen Jiaxiang Tang Diwen Wan Jingbo Wang and Gang Zeng. 2023b. Interactive Segment Anything NeRF with Feature Imitation. arxiv:https:\/\/arXiv.org\/abs\/2305.16233\u00a0[cs.CV]"},{"key":"e_1_3_3_2_10_1","unstructured":"Zhimin Chen Longlong Jing Yingwei Li and Bing Li. 2023a. Bridging the Domain Gap: Self-Supervised 3D Scene Understanding with Foundation Models. arxiv:https:\/\/arXiv.org\/abs\/2305.08776\u00a0[cs.CV]"},{"key":"e_1_3_3_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00858"},{"key":"e_1_3_3_2_12_1","doi-asserted-by":"crossref","unstructured":"Nicu\u00a0D Cornea Deborah Silver and Patrick Min. 2007. Curve-skeleton properties applications and algorithms. IEEE Transactions on visualization and computer graphics 13 3 (2007) 530.","DOI":"10.1109\/TVCG.2007.1002"},{"key":"e_1_3_3_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00428"},{"key":"e_1_3_3_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02005"},{"key":"e_1_3_3_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00011"},{"key":"e_1_3_3_2_16_1","doi-asserted-by":"crossref","unstructured":"Tamal\u00a0K Dey and Wulue Zhao. 2004. Approximating the medial axis from the Voronoi diagram with a convergence guarantee. Algorithmica 38 1 (2004) 179\u2013200.","DOI":"10.1007\/s00453-003-1049-y"},{"key":"e_1_3_3_2_17_1","doi-asserted-by":"crossref","unstructured":"Yuval Eldar Michael Lindenbaum Moshe Porat and Y.\u00a0Yehoshua Zeevi. 1997. The Farthest Point Strategy for Progressive Image Sampling. IEEE Transactions on Image Processing 6 (1997) 1305\u20131315.","DOI":"10.1109\/83.623193"},{"key":"e_1_3_3_2_18_1","volume-title":"Proceedings of the International Conference on Learning Representations (ICLR)","author":"Fan Zhiwen","year":"2023","unstructured":"Zhiwen Fan, Peihao Wang, Yifan Jiang, Xinyu Gong, Dejia Xu, and Zhangyang Wang. 2023. NeRF-SOS: Any-View Self-supervised Object Segmentation on Complex Scenes. In Proceedings of the International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_3_2_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00409"},{"key":"e_1_3_3_2_20_1","volume-title":"Proceedings of the 2022 Conference on Robot Learning","author":"Ha Huy","year":"2022","unstructured":"Huy Ha and Shuran Song. 2022. Semantic Abstraction: Open-World 3D Scene Understanding from 2D Vision-Language Models. In Proceedings of the 2022 Conference on Robot Learning."},{"key":"e_1_3_3_2_21_1","doi-asserted-by":"crossref","unstructured":"Rana Hanocka Amir Hertz Noa Fish Raja Giryes Shachar Fleishman and Daniel Cohen-Or. 2019. MeshCNN: A Network with an Edge. ACM Transactions on Graphics (TOG) 38 4 (2019) 90:1\u201390:12.","DOI":"10.1145\/3306346.3322959"},{"key":"e_1_3_3_2_22_1","doi-asserted-by":"crossref","unstructured":"Donald\u00a0D Hoffman and Whitman\u00a0A Richards. 1984. Parts of recognition. Cognition 18 1-3 (1984) 65\u201396.","DOI":"10.1016\/0010-0277(84)90022-2"},{"key":"e_1_3_3_2_23_1","volume-title":"Annual Conference on Neural Information Processing Systems","author":"Hong Yining","year":"2022","unstructured":"Yining Hong, Yilun Du, Chunru Lin, Josh Tenenbaum, and Chuang Gan. 2022. 3D Concept Grounding on Neural Fields. In Annual Conference on Neural Information Processing Systems."},{"key":"e_1_3_3_2_24_1","doi-asserted-by":"publisher","unstructured":"Shi-Min Hu Zheng-Ning Liu Meng-Hao Guo Junxiong Cai Jiahui Huang Tai-Jiang Mu and Ralph\u00a0R. Martin. 2022. Subdivision-based Mesh Convolution Networks. ACM Trans. Graph. 41 3 (2022) 25:1\u201325:16. 10.1145\/3506694https:\/\/dl.acm.org\/doi\/10.1145\/3506694","DOI":"10.1145\/3506694"},{"key":"e_1_3_3_2_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.702"},{"key":"e_1_3_3_2_26_1","doi-asserted-by":"crossref","unstructured":"Justin Kerr Chung\u00a0Min Kim Ken Goldberg Angjoo Kanazawa and Matthew Tancik. 2023. LERF: Language Embedded Radiance Fields. arxiv:https:\/\/arXiv.org\/abs\/2303.09553\u00a0[cs.CV]","DOI":"10.1109\/ICCV51070.2023.01807"},{"key":"e_1_3_3_2_27_1","unstructured":"Hyunjin Kim and Minhyuk Sung. 2024. PartSTAD: 2D-to-3D Part Segmentation Task Adaptation."},{"key":"e_1_3_3_2_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"e_1_3_3_2_29_1","unstructured":"Sosuke Kobayashi Eiichi Matsumoto and Vincent Sitzmann. 2022. Decomposing NeRF for Editing via Feature Field Distillation. Advances in Neural Information Processing Systems 35 (2022) 23311\u201323330."},{"key":"e_1_3_3_2_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160904"},{"key":"e_1_3_3_2_31_1","doi-asserted-by":"publisher","unstructured":"Abhijit Kundu Xiaoqi Yin Alireza Fathi David Ross Brian Brewington Thomas Funkhouser and Caroline Pantofaru. 2020. Virtual Multi-view Fusion for 3D Semantic Segmentation. arXiv e-prints Article arXiv:2007.13138 (July 2020) arXiv:2007.13138\u00a0pages. 10.48550\/arXiv.2007.13138 arxiv:https:\/\/arXiv.org\/abs\/2007.13138\u00a0[cs.CV]","DOI":"10.48550\/arXiv.2007.13138"},{"key":"e_1_3_3_2_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/1236246.1236265"},{"key":"e_1_3_3_2_33_1","doi-asserted-by":"crossref","unstructured":"Nobuyuki Ostu. 1979. A threshold selection method from gray-level histograms. IEEE Trans SMC 9 (1979) 62.","DOI":"10.1109\/TSMC.1979.4310076"},{"key":"e_1_3_3_2_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02082"},{"key":"e_1_3_3_2_35_1","first-page":"3","volume-title":"Eurographics Workshop on 3D Object Retrieval","author":"Melzi Simone","year":"2019","unstructured":"Simone Melzi, Riccardo Marin, Emanuele Rodol\u00e0, Umberto Castellani, Jing Ren, Adrien Poulenard, Peter Wonka, and Maks Ovsjanikov. 2019. SHREC 2019: Matching Humans with Different Connectivity. In Eurographics Workshop on 3D Object Retrieval , Vol.\u00a07. The Eurographics Association, 3."},{"key":"e_1_3_3_2_36_1","unstructured":"Francesco Milano Antonio Loquercio Antoni Rosinol Davide Scaramuzza and Luca Carlone. 2020. Primal-dual mesh convolutional neural networks. Advances in Neural Information Processing Systems 33 (2020) 952\u2013963."},{"key":"e_1_3_3_2_37_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_24"},{"key":"e_1_3_3_2_38_1","unstructured":"Fausto Milletari Nassir Navab and Seyed-Ahmad Ahmadi. 2016. V-Net: Fully Convolutional Neural Networks for Volumetric Medical Image Segmentation. arxiv:https:\/\/arXiv.org\/abs\/1606.04797\u00a0[cs.CV]"},{"key":"e_1_3_3_2_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01980"},{"key":"e_1_3_3_2_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00100"},{"key":"e_1_3_3_2_41_1","volume-title":"NIPS-W","author":"Paszke Adam","year":"2017","unstructured":"Adam Paszke, Sam Gross, Soumith Chintala, Gregory Chanan, Edward Yang, Zachary DeVito, Zeming Lin, Alban Desmaison, Luca Antiga, and Adam Lerer. 2017. Automatic differentiation in PyTorch. In NIPS-W."},{"key":"e_1_3_3_2_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00085"},{"key":"e_1_3_3_2_43_1","doi-asserted-by":"crossref","unstructured":"Charles\u00a0R. Qi Hao Su Kaichun Mo and Leonidas\u00a0J. Guibas. 2017. PointNet: Deep Learning on Point Sets for 3D Classification and Segmentation. arxiv:https:\/\/arXiv.org\/abs\/1612.00593\u00a0[cs.CV]","DOI":"10.1109\/CVPR.2017.16"},{"key":"e_1_3_3_2_44_1","unstructured":"James\u00a0Matthew Rehg. 2022. Toys4K 3D Object Dataset. https:\/\/github.com\/rehg-lab\/lowshot-shapebias\/tree\/main\/toys4k."},{"key":"e_1_3_3_2_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00604"},{"key":"e_1_3_3_2_46_1","doi-asserted-by":"crossref","unstructured":"Carsten Rother Vladimir Kolmogorov and Andrew Blake. 2004. GrabCut -Interactive Foreground Extraction using Iterated Graph Cuts. ACM Transactions on Graphics (SIGGRAPH) (August 2004). https:\/\/www.microsoft.com\/en-us\/research\/publication\/grabcut-interactive-foreground-extraction-using-iterated-graph-cuts\/","DOI":"10.1145\/1186562.1015720"},{"key":"e_1_3_3_2_47_1","doi-asserted-by":"crossref","unstructured":"Ariel Shamir. 2008. A survey on mesh segmentation techniques. Computer graphics forum 27 6 (2008) 1539\u20131556.","DOI":"10.1111\/j.1467-8659.2007.01103.x"},{"key":"e_1_3_3_2_48_1","doi-asserted-by":"crossref","unstructured":"Nicholas Sharp Souhaib Attaiki Keenan Crane and Maks Ovsjanikov. 2022. Diffusionnet: Discretization agnostic learning on surfaces. ACM Transactions on Graphics (TOG) 41 3 (2022) 1\u201316.","DOI":"10.1145\/3507905"},{"key":"e_1_3_3_2_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DPVT.2006.70"},{"key":"e_1_3_3_2_50_1","first-page":"24993","volume-title":"Advances in Neural Information Processing Systems","author":"Sun Weiwei","year":"2021","unstructured":"Weiwei Sun, Andrea Tagliasacchi, Boyang Deng, Sara Sabour, Soroosh Yazdani, Geoffrey\u00a0E Hinton, and Kwang\u00a0Moo Yi. 2021. Canonical Capsules: Self-Supervised Capsules in Canonical Pose. In Advances in Neural Information Processing Systems , M.\u00a0Ranzato, A.\u00a0Beygelzimer, Y.\u00a0Dauphin, P.S. Liang, and J.\u00a0Wortman Vaughan (Eds.), Vol.\u00a034. Curran Associates, Inc., 24993\u201325005. https:\/\/proceedings.neurips.cc\/paper\/2021\/file\/d1ee59e20ad01cedc15f5118a7626099-Paper.pdf"},{"key":"e_1_3_3_2_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV57658.2022.00056"},{"key":"e_1_3_3_2_52_1","unstructured":"TurboSquid. 2021. TurboSquid 3D Model Repository. https:\/\/www.turbosquid.com\/."},{"key":"e_1_3_3_2_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00333"},{"key":"e_1_3_3_2_54_1","doi-asserted-by":"publisher","unstructured":"Oliver van Kaick Andrea Tagliasacchi Oana Sidi Hao Zhang Daniel Cohen-Or Lior Wolf and Ghassan Hamarneh. 2011. Prior Knowledge for Part Correspondence. Computer Graphics Forum 30 2 (2011) 553\u2013562. 10.1111\/j.1467-8659.2011.01893.x","DOI":"10.1111\/j.1467-8659.2011.01893.x"},{"key":"e_1_3_3_2_55_1","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan\u00a0N Gomez \u0141ukasz Kaiser and Illia Polosukhin. 2017. Attention Is All You Need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_3_2_56_1","first-page":"1912","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","author":"Wu Zhirong","year":"2015","unstructured":"Zhirong Wu, Shuran Song, Aditya Khosla, Fisher Yu, Linguang Zhang, Xiaoou Tang, and Jianxiong Xiao. 2015. 3d shapenets: A deep representation for volumetric shapes. In Proceedings of the IEEE conference on computer vision and pattern recognition. 1912\u20131920."},{"key":"e_1_3_3_2_57_1","doi-asserted-by":"publisher","unstructured":"Yunhan Yang Xiaoyang Wu Tong He Hengshuang Zhao and Xihui Liu. 2023. SAM3D: Segment Anything in 3D Scenes. arXiv e-prints Article arXiv:2306.03908 (June 2023) arXiv:2306.03908\u00a0pages. 10.48550\/arXiv.2306.03908 arxiv:https:\/\/arXiv.org\/abs\/2306.03908\u00a0[cs.CV]","DOI":"10.48550\/arXiv.2306.03908"},{"key":"e_1_3_3_2_58_1","unstructured":"Jianglong Ye Naiyan Wang and Xiaolong Wang. 2023. FeatureNeRF: Learning Generalizable NeRFs by Distilling Foundation Models. arxiv:https:\/\/arXiv.org\/abs\/2303.12786\u00a0[cs.CV]"},{"key":"e_1_3_3_2_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.697"},{"key":"e_1_3_3_2_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00317"},{"key":"e_1_3_3_2_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01948"},{"key":"e_1_3_3_2_62_1","volume-title":"International Conference on Learning Representations (ICLR)","author":"Yue Yuanwen","year":"2024","unstructured":"Yuanwen Yue, Sabarinath Mahadevan, Jonas Schult, Francis Engelmann, Bastian Leibe, Konrad Schindler, and Theodora Kontogianni. 2024. AGILE3D: Attention Guided Interactive Multi-object 3D Segmentation. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_3_2_63_1","doi-asserted-by":"publisher","unstructured":"Dingyuan Zhang Dingkang Liang Hongcheng Yang Zhikang Zou Xiaoqing Ye Zhe Liu and Xiang Bai. 2023. SAM3D: Zero-Shot 3D Object Detection via Segment Anything Model. arXiv e-prints Article arXiv:2306.02245 (June 2023) arXiv:2306.02245\u00a0pages. 10.48550\/arXiv.2306.02245 arxiv:https:\/\/arXiv.org\/abs\/2306.02245\u00a0[cs.CV]","DOI":"10.48550\/arXiv.2306.02245"},{"key":"e_1_3_3_2_64_1","doi-asserted-by":"crossref","unstructured":"Renrui Zhang Liuhui Wang Yu Qiao Peng Gao and Hongsheng Li. 2022. Learning 3D Representations from 2D Pre-trained Models via Image-to-Point Masked Autoencoders. arxiv:https:\/\/arXiv.org\/abs\/2212.06785\u00a0[cs.CV]","DOI":"10.1109\/CVPR52729.2023.02085"},{"key":"e_1_3_3_2_65_1","doi-asserted-by":"crossref","unstructured":"Qian Zheng Zhuming Hao Hui Huang Kai Xu Hao Zhang Daniel Cohen-Or and Baoquan Chen. 2015. Skeleton-Intrinsic Symmetrization of Shapes. Computer Graphics Forum 34 2 (2015) 275\u2013286.","DOI":"10.1111\/cgf.12559"},{"key":"e_1_3_3_2_66_1","unstructured":"Qingnan Zhou and Alec Jacobson. 2016. Thingi10K: A Dataset of 10 000 3D-Printing Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1605.04797 (2016)."},{"key":"e_1_3_3_2_67_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00857"}],"event":{"name":"SA '24: SIGGRAPH Asia 2024 Conference Papers","location":"Tokyo Japan","acronym":"SA '24","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"]},"container-title":["SIGGRAPH Asia 2024 Conference Papers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3680528.3687605","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3680528.3687605","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T18:26:35Z","timestamp":1759343195000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3680528.3687605"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,3]]},"references-count":66,"alternative-id":["10.1145\/3680528.3687605","10.1145\/3680528"],"URL":"https:\/\/doi.org\/10.1145\/3680528.3687605","relation":{},"subject":[],"published":{"date-parts":[[2024,12,3]]},"assertion":[{"value":"2024-12-03","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}