{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,18]],"date-time":"2025-12-18T14:27:55Z","timestamp":1766068075640,"version":"3.40.3"},"publisher-location":"Cham","reference-count":49,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031728891"},{"type":"electronic","value":"9783031728907"}],"license":[{"start":{"date-parts":[[2024,12,7]],"date-time":"2024-12-07T00:00:00Z","timestamp":1733529600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,7]],"date-time":"2024-12-07T00:00:00Z","timestamp":1733529600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72890-7_24","type":"book-chapter","created":{"date-parts":[[2024,12,6]],"date-time":"2024-12-06T19:45:51Z","timestamp":1733514351000},"page":"387-403","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Pro2SAM: Mask Prompt to\u00a0SAM with\u00a0Grid Points for\u00a0Weakly Supervised Object Localization"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5791-3674","authenticated-orcid":false,"given":"Xi","family":"Yang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2983-4044","authenticated-orcid":false,"given":"Songsong","family":"Duan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4695-6134","authenticated-orcid":false,"given":"Nannan","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1443-0776","authenticated-orcid":false,"given":"Xinbo","family":"Gao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,12,7]]},"reference":[{"key":"24_CR1","unstructured":"Achiam, O., Adler, S., Agarwal, S.: GPT-4 technical report (2023)"},{"key":"24_CR2","doi-asserted-by":"crossref","unstructured":"Bai, H., Zhang, R., Wang, J., Wan, X.: Weakly supervised object localization via transformer with implicit spatial calibration. In: Proceedings of the ECCV (2022)","DOI":"10.1007\/978-3-031-20077-9_36"},{"key":"24_CR3","doi-asserted-by":"crossref","unstructured":"Cao, X., et al.: LocLoc: low-level cues and local-area guides for weakly supervised object localization. In: Proceedings of the ACM MM, pp. 5655\u20135664 (2023)","DOI":"10.1145\/3581783.3612165"},{"key":"24_CR4","unstructured":"Cen, J., et\u00a0al.: Segment anything in 3D with NeRFs. In: Proceedings of the NeurIPS, vol.\u00a036 (2024)"},{"key":"24_CR5","doi-asserted-by":"crossref","unstructured":"Chen, Z., et al.: Category-aware allocation transformer for weakly supervised object localization. In: Proceedings of the ICCV, pp. 6643\u20136652 (2023)","DOI":"10.1109\/ICCV51070.2023.00611"},{"key":"24_CR6","doi-asserted-by":"crossref","unstructured":"Chen, Z., et al.: LCTR: on awakening the local continuity of transformer for weakly supervised object localization. In: Proceedings of the AAAI, pp. 410\u2013418 (2022)","DOI":"10.1609\/aaai.v36i1.19918"},{"key":"24_CR7","doi-asserted-by":"crossref","unstructured":"Choe, J., Oh, S.J., Lee, S., Chun, S., Akata, Z., Shim, H.: Evaluating weakly supervised object localization methods right. In: Proceedings of the CVPR, pp. 3133\u20133142 (2020)","DOI":"10.1109\/CVPR42600.2020.00320"},{"key":"24_CR8","doi-asserted-by":"crossref","unstructured":"Choe, J., Shim, H.: Attention-based dropout layer for weakly supervised object localization. In: Processing of the CVPR (2019)","DOI":"10.1109\/CVPR.2019.00232"},{"key":"24_CR9","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Houlsby, N.: An image is worth $$16 \\times 16$$ words: transformers for image recognition at scale. In: Proceedings of the ICLR (2021)"},{"key":"24_CR10","unstructured":"Feng, C.B., Lai, Q., Liu, K., Su, H., Vong, C.M.: Boosting few-shot semantic segmentation via segment anything model. arXiv preprint arXiv:2401.09826 (2024)"},{"key":"24_CR11","doi-asserted-by":"crossref","unstructured":"Gupta, S., Lakhotia, S., Rawat, A., Tallamraju, R.: Vitol: Vision transformer for weakly supervised object localization. In: Proceedings of the CVPR, pp. 4101\u20134110 (2022)","DOI":"10.1109\/CVPRW56347.2022.00455"},{"key":"24_CR12","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2023.103061","volume":"92","author":"Y Huang","year":"2024","unstructured":"Huang, Y., et al.: Segment anything model for medical images? Med. Image Anal. 92, 103061 (2024)","journal-title":"Med. Image Anal."},{"key":"24_CR13","unstructured":"Ke, L., et\u00a0al.: Segment anything in high quality. In: Proceedings of the NeurIPS, vol.\u00a036 (2024)"},{"key":"24_CR14","unstructured":"Kenton, J.D.M.W.C., Toutanova, L.K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of NAACL-HLT, vol.\u00a01, p.\u00a02 (2019)"},{"key":"24_CR15","doi-asserted-by":"crossref","unstructured":"Kirillov, A., et al.: Segment anything. In: Proceedings of the ICCV, pp. 3992\u20134003 (2023)","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"24_CR16","doi-asserted-by":"crossref","unstructured":"Lewis, M., et al.: Bart: denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension. In: Proceedings of the ACL (2020)","DOI":"10.18653\/v1\/2020.acl-main.703"},{"key":"24_CR17","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the ICCV, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"24_CR18","doi-asserted-by":"crossref","unstructured":"Lu, W., Jia, X., Xie, W., Shen, L., Zhou, Y., Duan, J.: Geometry constrained weakly supervised object localization. In: Proceedings of the ECCV, pp. 481\u2013496 (2020)","DOI":"10.1007\/978-3-030-58574-7_29"},{"key":"24_CR19","doi-asserted-by":"crossref","unstructured":"Mai, J., Yang, M., Luo, W.: Erasing integrated learning: a simple yet effective approach for weakly supervised object localization. In: Processing of the CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00879"},{"key":"24_CR20","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2023.102918","volume":"89","author":"MA Mazurowski","year":"2023","unstructured":"Mazurowski, M.A., Dong, H., Gu, H., Yang, J., Konz, N., Zhang, Y.: Segment anything model for medical image analysis: an experimental study. Med. Image Anal. 89, 102918 (2023)","journal-title":"Med. Image Anal."},{"key":"24_CR21","doi-asserted-by":"crossref","unstructured":"Meng, M., Zhang, T., Tian, Q., Zhang, Y., Wu, F.: Foreground activation maps for weakly supervised object localization. In: Processing of the ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.00337"},{"key":"24_CR22","doi-asserted-by":"crossref","unstructured":"Pan, X., et al.: Unveiling the potential of structure preserving for weakly supervised object localization. In: Processing of the CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.01147"},{"key":"24_CR23","doi-asserted-by":"crossref","unstructured":"Pan, Y., Yao, Y., Cao, Y., Chen, C., Lu, X.: Coarse2fine: local consistency aware re-prediction for weakly supervised object localization. In: Proceedings of the AAAI, vol.\u00a037, pp. 2002\u20132010 (2023)","DOI":"10.1609\/aaai.v37i2.25292"},{"key":"24_CR24","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: Proceedings of the ICML, vol.\u00a0139, pp. 8748\u20138763 (2021)"},{"key":"24_CR25","unstructured":"Raji\u010d, F., Ke, L., Tai, Y.W., Tang, C.K., Danelljan, M., Yu, F.: Segment anything meets point tracking. arXiv preprint arXiv:2307.01197 (2023)"},{"key":"24_CR26","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O., et al.: Imagenet large scale visual recognition challenge. Int. J. Comput. Vis. 115, 211\u2013252 (2015)","journal-title":"Int. J. Comput. Vis."},{"key":"24_CR27","doi-asserted-by":"crossref","unstructured":"Song, Y., Jang, S., Katabi, D., Son, J.: Unsupervised object localization with representer point selection. In: Proceedings of the ICCV, pp. 6534\u20136544 (2023)","DOI":"10.1109\/ICCV51070.2023.00601"},{"key":"24_CR28","unstructured":"Brown, T.B., et\u00a0al.: Language models are few-shot learners. In: Proceedings of the NeurIPS, pp. 1877\u20131901 (2020)"},{"key":"24_CR29","unstructured":"Touvron, H., Cord, M., Douze, M., Massa, F., Sablayrolles, A., J\u00e9gou, H.: Training data-efficient image transformers & distillation through attention. In: Proceedings of the ICML, pp. 10347\u201310357. PMLR (2021)"},{"key":"24_CR30","doi-asserted-by":"crossref","unstructured":"Wang, Y., Shen, X., Hu, S.X., Yuan, Y., Crowley, J.L., Vaufreydaz, D.: Self-supervised transformers for unsupervised object discovery using normalized cut. In: Proceedings of the CVPR, pp. 14543\u201314553 (2022)","DOI":"10.1109\/CVPR52688.2022.01414"},{"key":"24_CR31","doi-asserted-by":"crossref","unstructured":"Wei, J., Wang, Q., Li, Z., Wang, S., Zhou, S.K., Cui, S.: Shallow feature matters for weakly supervised object localization. In: Proceedings of the CVPR, pp. 5993\u20136001 (2021)","DOI":"10.1109\/CVPR46437.2021.00593"},{"key":"24_CR32","unstructured":"Welinder, P., et al.: Caltech-UCSD birds 200 (2010)"},{"key":"24_CR33","doi-asserted-by":"crossref","unstructured":"Wu, P., Zhai, W., Cao, Y.: Background activation suppression for weakly supervised object localization. In: Proceedings of the CVPR, pp. 14228\u201314237 (2022)","DOI":"10.1109\/CVPR52688.2022.01385"},{"key":"24_CR34","doi-asserted-by":"crossref","unstructured":"Wu, P., Zhai, W., Cao, Y., Luo, J., Zha, Z.J.: Spatial-aware token for weakly supervised object localization. In: Proceedings of the ICCV, pp. 1844\u20131854 (2023)","DOI":"10.1109\/ICCV51070.2023.00177"},{"key":"24_CR35","doi-asserted-by":"crossref","unstructured":"Xie, J., Luo, C., Zhu, X., Jin, Z., Lu, W., Shen, L.: Online refinement of low-level feature based activation map for weakly supervised object localization. In: Proceedings of the ICCV, pp. 132\u2013141 (2021)","DOI":"10.1109\/ICCV48922.2021.00020"},{"key":"24_CR36","doi-asserted-by":"crossref","unstructured":"Xie, J., Xiang, J., Chen, J., Hou, X., Zhao, X., Shen, L.: C2AM: contrastive learning of class-agnostic activation map for weakly supervised object localization and semantic segmentation. In: Proceedings of the CVPR, pp. 989\u2013998 (2022)","DOI":"10.1109\/CVPR52688.2022.00106"},{"key":"24_CR37","doi-asserted-by":"crossref","unstructured":"Xu, J., et al.: Cream: weakly supervised object localization via class re-activation mapping. In: Proceedings of the CVPR, pp. 9437\u20139446 (2022)","DOI":"10.1109\/CVPR52688.2022.00922"},{"key":"24_CR38","doi-asserted-by":"crossref","unstructured":"Xu, L., Ouyang, W., Bennamoun, M., Boussaid, F., Xu, D.: Learning multi-modal class-specific tokens for weakly supervised dense object localization. In: Proceedings of the CVPR, pp. 19596\u201319605 (2023)","DOI":"10.1109\/CVPR52729.2023.01877"},{"key":"24_CR39","unstructured":"Xu, M., Yin, X., Qiu, L., Liu, Y., Tong, X., Han, X.: Sampro3D: locating SAM prompts in 3D for zero-shot scene segmentation. arXiv preprint arXiv:2311.17707 (2023)"},{"key":"24_CR40","doi-asserted-by":"crossref","unstructured":"Xue, H., Liu, C., Wan, F., Jiao, J., Ji, X., Ye, Q.: DANet: divergent activation for weakly supervised object localization. In: Proceedings of the ICCV, pp. 6589\u20136598 (2019)","DOI":"10.1109\/ICCV.2019.00669"},{"key":"24_CR41","first-page":"1","volume":"61","author":"Z Yan","year":"2023","unstructured":"Yan, Z., et al.: RingMo-SAM: a foundation model for segment anything in multimodal remote-sensing images. IEEE Trans. Geosci. Remote Sens. 61, 1\u201316 (2023)","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"24_CR42","unstructured":"Yang, J., Gao, M., Li, Z., Gao, S., Wang, F., Zheng, F.: Track anything: segment anything meets videos. arXiv preprint arXiv:2304.11968 (2023)"},{"key":"24_CR43","unstructured":"Yao, Y., et al.: TS-CAM: token semantic coupled attention map for weakly supervised object localization. IEEE Trans. Neural Netw. Learn. Syst. 1\u201313 (2022)"},{"key":"24_CR44","unstructured":"Yu, T., et al.: Inpaint anything: segment anything meets image inpainting. arXiv preprint arXiv:2304.06790 (2023)"},{"key":"24_CR45","doi-asserted-by":"crossref","unstructured":"Zhang, C.L., Cao, Y.H., Wu, J.: Rethinking the route towards weakly supervised object localization. In: Proceedings of the CVPR, pp. 13460\u201313469 (2020)","DOI":"10.1109\/CVPR42600.2020.01347"},{"issue":"9","key":"24_CR46","first-page":"5866","volume":"44","author":"D Zhang","year":"2021","unstructured":"Zhang, D., Han, J., Cheng, G., Yang, M.H.: Weakly supervised object localization and detection: a survey. IEEE Trans. Pattern Anal. Mach. Intell. 44(9), 5866\u20135885 (2021)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"24_CR47","doi-asserted-by":"crossref","unstructured":"Zhang, X., Liu, Y., Lin, Y., Liao, Q., Li, Y.: UV-SAM: adapting segment anything model for urban village identification. In: Proceeding of the AAAI (2024)","DOI":"10.1609\/aaai.v38i20.30260"},{"key":"24_CR48","doi-asserted-by":"crossref","unstructured":"Zhou, B., Khosla, A., Lapedriza, A., Oliva, A., Torralba, A.: Learning deep features for discriminative localization. In: Processing of the CVPR (2016)","DOI":"10.1109\/CVPR.2016.319"},{"key":"24_CR49","doi-asserted-by":"crossref","unstructured":"Zhu, L., Chen, Q., Jin, L., You, Y., Lu, Y.: Bagging regional classification activation maps for weakly supervised object localization. In: Proceedings of the ECCV, pp. 176\u2013192 (2022)","DOI":"10.1007\/978-3-031-20080-9_11"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72890-7_24","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,6]],"date-time":"2024-12-06T20:07:46Z","timestamp":1733515666000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72890-7_24"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,7]]},"ISBN":["9783031728891","9783031728907"],"references-count":49,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72890-7_24","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,12,7]]},"assertion":[{"value":"7 December 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}