{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T13:31:46Z","timestamp":1742995906195,"version":"3.40.3"},"publisher-location":"Cham","reference-count":43,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031727535"},{"type":"electronic","value":"9783031727542"}],"license":[{"start":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T00:00:00Z","timestamp":1730332800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T00:00:00Z","timestamp":1730332800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72754-2_7","type":"book-chapter","created":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T14:57:07Z","timestamp":1730300227000},"page":"111-127","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Active Coarse-to-Fine Segmentation of\u00a0Moveable Parts from\u00a0Real Images"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-3379-6103","authenticated-orcid":false,"given":"Ruiqi","family":"Wang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1429-3804","authenticated-orcid":false,"given":"Akshay","family":"Gadi Patil","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1591-4668","authenticated-orcid":false,"given":"Fenggen","family":"Yu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1991-119X","authenticated-orcid":false,"given":"Hao","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,10,31]]},"reference":[{"key":"7_CR1","unstructured":"Trimble Inc. 3D Warehouse. (2023). https:\/\/3dwarehouse.sketchup.com\/. Accessed 4 Mar 2023"},{"key":"7_CR2","unstructured":"Aggarwal, C.C., Kong, X., Gu, Q., Han, J., Yu, P.S.: Active learning: a survey. In: Data Classification: Algorithms and Applications, pp. 571\u2013597 (2014)"},{"key":"7_CR3","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"640","DOI":"10.1007\/978-3-642-33783-3_46","volume-title":"Computer Vision \u2013 ECCV 2012","author":"L Ballan","year":"2012","unstructured":"Ballan, L., Taneja, A., Gall, J., Van Gool, L., Pollefeys, M.: Motion capture of hands in action using discriminative salient points. In: Fitzgibbon, A., Lazebnik, S., Perona, P., Sato, Y., Schmid, C. (eds.) ECCV 2012. LNCS, vol. 7577, pp. 640\u2013653. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-33783-3_46"},{"key":"7_CR4","unstructured":"Casanova, A., Pinheiro, P.O., Rostamzadeh, N., Pal, C.J.: Reinforced active learning for image segmentation. In: International Conference on Learning Representations (2020)"},{"key":"7_CR5","unstructured":"Chang, A.X., et\u00a0al.: Shapenet: an information-rich 3d model repository. arXiv preprint arXiv:1512.03012 (2015)"},{"key":"7_CR6","doi-asserted-by":"crossref","unstructured":"Cheng, B., Misra, I., Schwing, A.G., Kirillov, A., Girdhar, R.: Masked-attention mask transformer for universal image segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1290\u20131299 (2022)","DOI":"10.1109\/CVPR52688.2022.00135"},{"key":"7_CR7","unstructured":"Dosovitskiy, A., et\u00a0al.: An image is worth 16x16 words: transformers for image recognition at scale. In: ICLR (2021)"},{"key":"7_CR8","unstructured":"Grounded-SAM Contributors: Grounded-Segment-Anything (2023). https:\/\/github.com\/IDEA-Research\/Grounded-Segment-Anything"},{"key":"7_CR9","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., Girshick, R.: Mask r-cnn. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2961\u20132969 (2017)","DOI":"10.1109\/ICCV.2017.322"},{"key":"7_CR10","doi-asserted-by":"crossref","unstructured":"Hu, R., Li, W., Van Kaick, O., Shamir, A., Zhang, H., Huang, H.: Learning to predict part mobility from a single static snapshot. ACM Trans. Graph. 36(6), 1\u201313 (2017)","DOI":"10.1145\/3130800.3130811"},{"key":"7_CR11","doi-asserted-by":"crossref","unstructured":"Huang, J., et al.: Multibodysync: multi-body segmentation and motion estimation via 3d scan synchronization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7108\u20137118 (2021)","DOI":"10.1109\/CVPR46437.2021.00703"},{"key":"7_CR12","doi-asserted-by":"crossref","unstructured":"Huang, Z., Xu, Y., Lassner, C., Li, H., Tung, T.: Arch: animatable reconstruction of clothed humans. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3093\u20133102 (2020)","DOI":"10.1109\/CVPR42600.2020.00316"},{"key":"7_CR13","unstructured":"Jantos, T., Hamdad, M., Granig, W., Weiss, S., Steinbrener, J.: PoET: pose estimation transformer for single-view, multi-object 6D pose estimation. In: 6th Annual Conference on Robot Learning (CoRL 2022) (2022)"},{"key":"7_CR14","doi-asserted-by":"publisher","unstructured":"Jiang, H., Mao, Y., Savva, M., Chang, A.X.: OPD: single-view 3D openable part detection. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022, Part XXXIX, pp. 410\u2013426. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19842-7_24","DOI":"10.1007\/978-3-031-19842-7_24"},{"key":"7_CR15","doi-asserted-by":"crossref","unstructured":"Kanazawa, A., Black, M.J., Jacobs, D.W., Malik, J.: End-to-end recovery of human shape and pose. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7122\u20137131 (2018)","DOI":"10.1109\/CVPR.2018.00744"},{"key":"7_CR16","unstructured":"Kirillov, A., et al.: Segment anything. arXiv preprint arXiv:2304.02643 (2023)"},{"key":"7_CR17","doi-asserted-by":"crossref","unstructured":"Li, X., Wang, H., Yi, L., Guibas, L.J., Abbott, A.L., Song, S.: Category-level articulated object pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3706\u20133715 (2020)","DOI":"10.1109\/CVPR42600.2020.00376"},{"key":"7_CR18","doi-asserted-by":"crossref","unstructured":"Liu, S., et\u00a0al.: Grounding Dino: marrying Dino with grounded pre-training for open-set object detection. arXiv preprint arXiv:2303.05499 (2023)","DOI":"10.1007\/978-3-031-72970-6_3"},{"key":"7_CR19","doi-asserted-by":"crossref","unstructured":"Mahendran, S., Ali, H., Vidal, R.: 3d pose regression using convolutional neural networks. In: Proceedings of the IEEE International Conference on Computer Vision Workshops, pp. 2174\u20132182 (2017)","DOI":"10.1109\/ICCVW.2017.254"},{"key":"7_CR20","doi-asserted-by":"crossref","unstructured":"Mehta, D., et al.: Vnect: real-time 3d human pose estimation with a single RGB camera. ACM Trans. Graph. 36(4), 1\u201314 (2017)","DOI":"10.1145\/3072959.3073596"},{"key":"7_CR21","doi-asserted-by":"crossref","unstructured":"Mo, K., et al.: PartNet: a large-scale benchmark for fine-grained and hierarchical part-level 3D object understanding. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2019)","DOI":"10.1109\/CVPR.2019.00100"},{"key":"7_CR22","doi-asserted-by":"crossref","unstructured":"Mueller, F., et al.: GANerated hands for real-time 3d hand tracking from monocular RGB. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 49\u201359 (2018)","DOI":"10.1109\/CVPR.2018.00013"},{"key":"7_CR23","doi-asserted-by":"crossref","unstructured":"Ning, M., et al.: Multi-anchor active domain adaptation for semantic segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9112\u20139122 (2021)","DOI":"10.1109\/ICCV48922.2021.00898"},{"key":"7_CR24","unstructured":"Ren, P., et al.: A survey of deep active learning. arXiv preprint arXiv:2009.00236 (2020)"},{"key":"7_CR25","unstructured":"Sener, O., Savarese, S.: Active learning for convolutional neural networks: a core-set approach. In: International Conference on Learning Representations (2018)"},{"key":"7_CR26","doi-asserted-by":"crossref","unstructured":"Shin, G., Xie, W., Albanie, S.: All you need are a few pixels: semantic segmentation with pixelpick. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV) Workshops, pp. 1687\u20131697 (2021)","DOI":"10.1109\/ICCVW54120.2021.00194"},{"key":"7_CR27","doi-asserted-by":"crossref","unstructured":"Sinha, S., Ebrahimi, S., Darrell, T.: Variational adversarial active learning. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV) (2019)","DOI":"10.1109\/ICCV.2019.00607"},{"key":"7_CR28","doi-asserted-by":"crossref","unstructured":"Sun, X., Jiang, H., Savva, M., Chang, A.X.: OPDMulti: openable part detection for multiple objects. In: Proceedings of 3D Vision (2024)","DOI":"10.1109\/3DV62453.2024.00100"},{"key":"7_CR29","doi-asserted-by":"crossref","unstructured":"Tang, C., Xie, L., Zhang, G., Zhang, X., Tian, Q., Hu, X.: Active pointly-supervised instance segmentation. In: ECCV 2022, Part XXVIII, pp. 606\u2013623. Springer (2022)","DOI":"10.1007\/978-3-031-19815-1_35"},{"key":"7_CR30","unstructured":"Wada, K.: labelme: Image Polygonal Annotation with Python (2016). https:\/\/github.com\/wkentaro\/labelme"},{"key":"7_CR31","doi-asserted-by":"crossref","unstructured":"Wang, J., Yuille, A.L.: Semantic part segmentation using compositional model combining shape and appearance. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1788\u20131797 (2015)","DOI":"10.1109\/CVPR.2015.7298788"},{"key":"7_CR32","doi-asserted-by":"crossref","unstructured":"Wang, P., Shen, X., Lin, Z., Cohen, S., Price, B., Yuille, A.L.: Joint object and part segmentation using deep learned potentials. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1573\u20131581 (2015)","DOI":"10.1109\/ICCV.2015.184"},{"key":"7_CR33","doi-asserted-by":"crossref","unstructured":"Wang, X., Zhou, B., Shi, Y., Chen, X., Zhao, Q., Xu, K.: Shape2motion: joint analysis of motion parts and attributes from 3d shapes. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8876\u20138884 (2019)","DOI":"10.1109\/CVPR.2019.00908"},{"key":"7_CR34","doi-asserted-by":"crossref","unstructured":"Wu, T.H., et al.: D 2 ada: Dynamic density-aware active domain adaptation for semantic segmentation. In: ECCV 2022, Part XXIX, pp. 449\u2013467. Springer (2022)","DOI":"10.1007\/978-3-031-19818-2_26"},{"key":"7_CR35","doi-asserted-by":"crossref","unstructured":"Xia, F., Wang, P., Chen, X., Yuille, A.L.: Joint multi-person pose estimation and semantic part segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6769\u20136778 (2017)","DOI":"10.1109\/CVPR.2017.644"},{"key":"7_CR36","doi-asserted-by":"crossref","unstructured":"Xiang, F., et al..: SAPIEN: a simulated part-based interactive environment. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2020)","DOI":"10.1109\/CVPR42600.2020.01111"},{"key":"7_CR37","doi-asserted-by":"crossref","unstructured":"Xie, B., Yuan, L., Li, S., Liu, C.H., Cheng, X.: Towards fewer annotations: active learning via region impurity and prediction uncertainty for domain adaptive semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8068\u20138078 (2022)","DOI":"10.1109\/CVPR52688.2022.00790"},{"key":"7_CR38","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"672","DOI":"10.1007\/978-3-030-69525-5_40","volume-title":"Computer Vision \u2013 ACCV 2020","author":"S Xie","year":"2021","unstructured":"Xie, S., Feng, Z., Chen, Y., Sun, S., Ma, C., Song, M.: DEAL: difficulty-aware active learning for semantic segmentation. In: Ishikawa, H., Liu, C.-L., Pajdla, T., Shi, J. (eds.) ACCV 2020. LNCS, vol. 12622, pp. 672\u2013688. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-69525-5_40"},{"key":"7_CR39","doi-asserted-by":"crossref","unstructured":"Yan, Z., et al.: Rpm-net: recurrent prediction of motion and parts from point cloud. ACM Trans. Graph. 38(6), 1\u201315 (2019)","DOI":"10.1145\/3355089.3356573"},{"key":"7_CR40","unstructured":"Zhan, X., Wang, Q., Huang, K.H., Xiong, H., Dou, D., Chan, A.B.: A comparative survey of deep active learning. arXiv preprint arXiv:2203.13450 (2022)"},{"key":"7_CR41","doi-asserted-by":"crossref","unstructured":"Zhou, Y., Barnes, C., Lu, J., Yang, J., Li, H.: On the continuity of rotation representations in neural networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5745\u20135753 (2019)","DOI":"10.1109\/CVPR.2019.00589"},{"key":"7_CR42","unstructured":"Zhu, X., Su, W., Lu, L., Li, B., Wang, X., Dai, J.: Deformable detr: deformable transformers for end-to-end object detection. In: International Conference on Learning Representations (2020)"},{"key":"7_CR43","unstructured":"Zou, X., et al.: Segment everything everywhere all at once. arXiv preprint arXiv:2304.06718 (2023)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72754-2_7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,30]],"date-time":"2024-11-30T14:07:09Z","timestamp":1732975629000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72754-2_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,31]]},"ISBN":["9783031727535","9783031727542"],"references-count":43,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72754-2_7","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,10,31]]},"assertion":[{"value":"31 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}