{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,31]],"date-time":"2025-03-31T01:35:18Z","timestamp":1743384918538,"version":"3.40.3"},"publisher-location":"Cham","reference-count":25,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031533013"},{"type":"electronic","value":"9783031533020"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-53302-0_2","type":"book-chapter","created":{"date-parts":[[2024,1,28]],"date-time":"2024-01-28T09:02:09Z","timestamp":1706432529000},"page":"17-31","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Training-Free Region Prediction with\u00a0Stable Diffusion"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5020-084X","authenticated-orcid":false,"given":"Yuma","family":"Honbu","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0431-183X","authenticated-orcid":false,"given":"Keiji","family":"Yanai","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,1,29]]},"reference":[{"key":"2_CR1","unstructured":"Bucher, M., Vu, T., Cord, M., P\u00e9rez, P.: Zero-shot semantic segmentation. In: Proceedings of CVF\/IEEE Computer Vision and Pattern Recognition (2019)"},{"key":"2_CR2","unstructured":"Burgert, R., Ranasinghe, K., Li, X., Ryoo, M.S.: Peekaboo: text to image diffusion models are zero-shot segmentors. In: Proceedings of arXiv:2211.13224 (2022)"},{"issue":"4","key":"2_CR3","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","volume":"40","author":"L Chen","year":"2018","unstructured":"Chen, L., Papandreou, G., Kokkinos, I., Murphy, K., Yuille, A.L.: Deeplab: smantic image segmentation with deep convolutional nets, atrous convolution, and fully connected CRFs. IEEE Trans. Pattern Anal. Mach. Intell. 40(4), 834\u2013848 (2018)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"2_CR4","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"833","DOI":"10.1007\/978-3-030-01234-2_49","volume-title":"Computer Vision \u2013 ECCV 2018","author":"L-C Chen","year":"2018","unstructured":"Chen, L.-C., Zhu, Y., Papandreou, G., Schroff, F., Adam, H.: Encoder-decoder with atrous separable convolution for semantic image segmentation. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11211, pp. 833\u2013851. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01234-2_49"},{"issue":"1","key":"2_CR5","doi-asserted-by":"publisher","first-page":"98","DOI":"10.1007\/s11263-014-0733-5","volume":"111","author":"M Everingham","year":"2015","unstructured":"Everingham, M., Eslami, S.M.A., Van Gool, L., Williams, C.K.I., Winn, J., Zisserman, A.: The pascal visual object classes challenge: a retrospective. Int. J. Comput. Vision 111(1), 98\u2013136 (2015)","journal-title":"Int. J. Comput. Vision"},{"key":"2_CR6","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"540","DOI":"10.1007\/978-3-031-20059-5_31","volume-title":"ECCV 2022","author":"G Ghiasi","year":"2022","unstructured":"Ghiasi, G., Gu, X., Cui, Y., Lin, T.: Scaling open-vocabulary image segmentation with image-level labels. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13696, pp. 540\u2013557. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20059-5_31"},{"key":"2_CR7","unstructured":"Hertz, A., Mokady, R., Tenenbaum, J., Aberman, K., Pritch, Y., Cohen-Or, D.: Prompt-to-prompt image editing with cross attention control. arXiv preprint arXiv:2208.01626 (2022)"},{"key":"2_CR8","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. In: Advances in Neural Information Processing Systems, vol. 33 (2020)"},{"key":"2_CR9","unstructured":"Kingma, P., Welling, M.: Auto-encoding variational Bayes. In: Proceedings of International Conference on Machine Learning (2014)"},{"key":"2_CR10","unstructured":"Kr\u00e4henb\u00fchl, P., Koltun, V.: Efficient inference in fully connected CRFs with gaussian edge potentials. In: Advances in Neural Information Processing Systems (2011)"},{"key":"2_CR11","unstructured":"Li, B., Weinberger, K.Q., Belongie, S., Koltun, V., Ranftl, R.: Language-driven semantic segmentation. In: Proceedings of International Conference on Learning Representation (2022)"},{"key":"2_CR12","doi-asserted-by":"crossref","unstructured":"Liang, F., et al.: Open-vocabulary semantic segmentation with mask-adapted clip. arXiv preprint arXiv:2210.04150 (2022)","DOI":"10.1109\/CVPR52729.2023.00682"},{"key":"2_CR13","doi-asserted-by":"crossref","unstructured":"L\u00fcddecke, T., Ecker, A.S.: Image segmentation using text and image prompts. In: Proceedings of CVF\/IEEE Computer Vision and Pattern Recognition, pp. 7086\u20137096 (2022)","DOI":"10.1109\/CVPR52688.2022.00695"},{"key":"2_CR14","doi-asserted-by":"crossref","unstructured":"Mottaghi, R., et al.: The role of context for object detection and semantic segmentation in the wild. In: Proceedings of CVF\/IEEE Computer Vision and Pattern Recognition (2014)","DOI":"10.1109\/CVPR.2014.119"},{"key":"2_CR15","doi-asserted-by":"crossref","unstructured":"Okamoto, K., Yanai, K.: UEC-FoodPIX complete: a large-scale food image segmentation dataset. In: Proceedings of ICPR Workshop on Multimedia Assisted Dietary Management (2021)","DOI":"10.1007\/978-3-030-68821-9_51"},{"key":"2_CR16","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. arXiv preprint arXiv:2103.00020 (2021)"},{"key":"2_CR17","unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., Chu, C., Chen, M.: Hierarchical text-conditional image generation with clip Latents. arXiv preprint arXiv:2204.06125 (2022)"},{"key":"2_CR18","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of CVF\/IEEE Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"2_CR19","unstructured":"Saharia, C., et al.: Photorealistic text-to-image diffusion models with deep language understanding. arXiv preprint arXiv:2205.11487 (2022)"},{"key":"2_CR20","unstructured":"Schuhmann, C., et al.: Laion-5b: an open large-scale dataset for training next generation image-text models. arXiv preprint arXiv:2210.08402 (2022)"},{"key":"2_CR21","doi-asserted-by":"crossref","unstructured":"Wu, C., Lin, Z., Cohen, S., Bui, T., Maji, S.: Phrasecut: language-based image segmentation in the wild. In: Proceedings of CVF\/IEEE Computer Vision and Pattern Recognition, pp. 7086\u20137096 (2020)","DOI":"10.1109\/CVPR42600.2020.01023"},{"key":"2_CR22","unstructured":"Xiongwei, W., Xin, F., Ying, L., Ee-Peng, L., Steven, H., Qianru, S.: A large-scale benchmark for food image segmentation. arXiv preprint arXiv:2105.05409 (2021)"},{"key":"2_CR23","doi-asserted-by":"crossref","unstructured":"Xu, J., et al.: GroupViT: semantic segmentation emerges from text supervision. In: Proceedings of CVF\/IEEE Computer Vision and Pattern Recognition, pp. 18134\u201318144 (2022)","DOI":"10.1109\/CVPR52688.2022.01760"},{"key":"2_CR24","doi-asserted-by":"crossref","unstructured":"Zhou, B., Zhao, H., Puig, X., Fidler, S., Barriuso, A.: Scene parsing through ade20k dataset. In: Proceedings of CVF\/IEEE Computer Vision and Pattern Recognition (2017)","DOI":"10.1109\/CVPR.2017.544"},{"key":"2_CR25","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"696","DOI":"10.1007\/978-3-031-19815-1_40","volume-title":"Computer Vision \u2013 ECCV 2022","author":"C Zhou","year":"2022","unstructured":"Zhou, C., Loy, C.C., Dai, B.: Extract free dense labels from clip. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13688, pp. 696\u2013712. Springer, Cham (2022)"}],"container-title":["Lecture Notes in Computer Science","MultiMedia Modeling"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-53302-0_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,7]],"date-time":"2024-03-07T11:54:58Z","timestamp":1709812498000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-53302-0_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031533013","9783031533020"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-53302-0_2","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"29 January 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"MMM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Multimedia Modeling","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Amsterdam","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"The Netherlands","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 January 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 February 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"mmm2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"ConfTool Pro","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"297","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"112","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"38% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.2","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.2","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}