{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T15:22:31Z","timestamp":1742916151938,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":31,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819736256"},{"type":"electronic","value":"9789819736263"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-97-3626-3_21","type":"book-chapter","created":{"date-parts":[[2024,6,20]],"date-time":"2024-06-20T10:07:45Z","timestamp":1718878065000},"page":"284-296","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Exploiting Diffusion Model as\u00a0Prompt Generator for\u00a0Object Localization"],"prefix":"10.1007","author":[{"given":"Yuqi","family":"Jiang","sequence":"first","affiliation":[]},{"given":"Qiankun","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Yichen","family":"Li","sequence":"additional","affiliation":[]},{"given":"Hao","family":"Jia","sequence":"additional","affiliation":[]},{"given":"Ying","family":"Fu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,6,21]]},"reference":[{"unstructured":"Abstreiter, K., Mittal, S., Bauer, S., Sch\u00f6lkopf, B., Mehrjou, A.: Diffusion-based representation learning. arXiv preprint arXiv:2105.14257 (2021)","key":"21_CR1"},{"unstructured":"Amit, T., Shaharbany, T., Nachmani, E., Wolf, L.: Segdiff: image segmentation with diffusion probabilistic models. arXiv preprint arXiv:2112.00390 (2021)","key":"21_CR2"},{"issue":"4","key":"21_CR3","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3592116","volume":"42","author":"H Chefer","year":"2023","unstructured":"Chefer, H., Alaluf, Y., Vinker, Y., Wolf, L., Cohen-Or, D.: Attend-and-excite: attention-based semantic guidance for text-to-image diffusion models. ACM Trans. Graph. 42(4), 1\u201310 (2023)","journal-title":"ACM Trans. Graph."},{"doi-asserted-by":"crossref","unstructured":"Chen, S., Sun, P., Song, Y., Luo, P.: Diffusiondet: diffusion model for object detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 19830\u201319843 (2023)","key":"21_CR4","DOI":"10.1109\/ICCV51070.2023.01816"},{"unstructured":"Dhariwal, P., Nichol, A.: Diffusion models beat GANs on image synthesis. In: Proceedings of Advances in Neural Information Processing Systems, vol.\u00a034, pp. 8780\u20138794 (2021)","key":"21_CR5"},{"unstructured":"He, J., et al.: Partimagenet: a large, high-quality dataset of parts. arXiv preprint arXiv:2112.00933 (2021)","key":"21_CR6"},{"doi-asserted-by":"crossref","unstructured":"He, K., Chen, X., Xie, S., Li, Y., Doll\u00e1r, P., Girshick, R.: Masked autoencoders are scalable vision learners. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 16000\u201316009 (2022)","key":"21_CR7","DOI":"10.1109\/CVPR52688.2022.01553"},{"doi-asserted-by":"crossref","unstructured":"He, Z., Sun, T., Wang, K., Huang, X., Qiu, X.: DiffusionBERT: improving generative masked language models with diffusion models. arXiv preprint arXiv:2211.15029 (2022)","key":"21_CR8","DOI":"10.18653\/v1\/2023.acl-long.248"},{"unstructured":"Hertz, A., Mokady, R., Tenenbaum, J., Aberman, K., Pritch, Y., Cohen-Or, D.: Prompt-to-prompt image editing with cross attention control. arXiv preprint arXiv:2208.01626 (2022)","key":"21_CR9"},{"unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. In: Proceedings of Advances in Neural Information Processing Systems, vol.\u00a033, pp. 6840\u20136851 (2020)","key":"21_CR10"},{"unstructured":"Ho, J., Salimans, T.: Classifier-free diffusion guidance. arXiv preprint arXiv:2207.12598 (2022)","key":"21_CR11"},{"unstructured":"Kirillov, A., et\u00a0al.: Segment anything. arXiv preprint arXiv:2304.02643 (2023)","key":"21_CR12"},{"unstructured":"Kolesnikov, A., et al.: An image is worth 16x16 words: transformers for image recognition at scale. In: Proceedings of International Conference on Learning Representations (2020)","key":"21_CR13"},{"key":"21_CR14","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"280","DOI":"10.1007\/978-3-031-20077-9_17","volume-title":"Computer Vision - ECCV 2022","author":"Y Li","year":"2022","unstructured":"Li, Y., Mao, H., Girshick, R., He, K.: Exploring plain vision transformer backbones for object detection. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13669, pp. 280\u2013296. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20077-9_17"},{"doi-asserted-by":"crossref","unstructured":"Mokady, R., Hertz, A., Aberman, K., Pritch, Y., Cohen-Or, D.: Null-text inversion for editing real images using guided diffusion models. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6038\u20136047 (2023)","key":"21_CR15","DOI":"10.1109\/CVPR52729.2023.00585"},{"unstructured":"Nichol, A., Dhariwal, P., Ramesh, A., Shyam, P., Mishkin, P., McGrew, B., Sutskever, I., Chen, M.: Glide: towards photorealistic image generation and editing with text-guided diffusion models. arXiv preprint arXiv:2112.10741 (2021)","key":"21_CR16"},{"unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: Proceedings of International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)","key":"21_CR17"},{"unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., Chu, C., Chen, M.: Hierarchical text-conditional image generation with clip Latents. arXiv preprint arXiv:2204.06125, 1(2), 3 (2022)","key":"21_CR18"},{"doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","key":"21_CR19","DOI":"10.1109\/CVPR52688.2022.01042"},{"unstructured":"Saharia, C., et\u00a0al.: Photorealistic text-to-image diffusion models with deep language understanding. In: Proceedings of Advances in Neural Information Processing Systems, vol.\u00a035, pp. 36479\u201336494 (2022)","key":"21_CR20"},{"unstructured":"Schuhmann, C., et al.: Laion-400m: open dataset of clip-filtered 400 million image-text pairs. arXiv preprint arXiv:2111.02114 (2021)","key":"21_CR21"},{"unstructured":"Sohl-Dickstein, J., Weiss, E., Maheswaranathan, N., Ganguli, S.: Deep unsupervised learning using nonequilibrium thermodynamics. In: Proceedings of International Conference on Machine Learning, pp. 2256\u20132265. PMLR (2015)","key":"21_CR22"},{"unstructured":"Song, J., Meng, C., Ermon, S.: Denoising diffusion implicit models. arXiv preprint arXiv:2010.02502 (2020)","key":"21_CR23"},{"unstructured":"Vaswani, A., et al.: Attention is all you need. In: Proceedings of Advances in Neural Information Processing Systems, vol.\u00a030 (2017)","key":"21_CR24"},{"key":"21_CR25","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"35","DOI":"10.1007\/978-3-031-16452-1_4","volume-title":"Medical Image Computing and Computer Assisted Intervention - MICCAI 2022","author":"J Wolleb","year":"2022","unstructured":"Wolleb, J., Bieder, F., Sandk\u00fchler, R., Cattin, P.C.: Diffusion models for medical anomaly detection. In: Wang, L., Dou, Q., Fletcher, P.T., Speidel, S., Li, S. (eds.) MICCAI 2022. LNCS, vol. 13438, pp. 35\u201345. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-16452-1_4"},{"doi-asserted-by":"crossref","unstructured":"Xia, B., et al.: Diffir: efficient diffusion model for image restoration. arXiv preprint arXiv:2303.09472 (2023)","key":"21_CR26","DOI":"10.1109\/ICCV51070.2023.01204"},{"doi-asserted-by":"crossref","unstructured":"Xu, J., Liu, S., Vahdat, A., Byeon, W., Wang, X., De\u00a0Mello, S.: Open-vocabulary panoptic segmentation with text-to-image diffusion models. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2955\u20132966 (2023)","key":"21_CR27","DOI":"10.1109\/CVPR52729.2023.00289"},{"doi-asserted-by":"crossref","unstructured":"Xu, X., Xiong, T., Ding, Z., Tu, Z.: MasQCLIP for open-vocabulary universal image segmentation. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 887\u2013898 (2023)","key":"21_CR28","DOI":"10.1109\/ICCV51070.2023.00088"},{"doi-asserted-by":"crossref","unstructured":"Yang, X., Wang, X.: Diffusion model as representation learner. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 18938\u201318949 (2023)","key":"21_CR29","DOI":"10.1109\/ICCV51070.2023.01736"},{"doi-asserted-by":"crossref","unstructured":"Zhang, H., et al.: A simple framework for open-vocabulary segmentation and detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1020\u20131031 (2023)","key":"21_CR30","DOI":"10.1109\/ICCV51070.2023.00100"},{"unstructured":"Zhang, Z., Zhao, Z., Lin, Z.: Unsupervised representation learning from pre-trained diffusion probabilistic models. In: Proceedings of Advances in Neural Information Processing Systems, vol.\u00a035, pp. 22117\u201322130 (2022)","key":"21_CR31"}],"container-title":["Communications in Computer and Information Science","Digital Multimedia Communications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-3626-3_21","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,20]],"date-time":"2024-06-20T10:20:30Z","timestamp":1718878830000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-3626-3_21"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9789819736256","9789819736263"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-3626-3_21","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"21 June 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"IFTC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Forum on Digital TV and Wireless Multimedia Communications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Beijing","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 December 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 December 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iftc2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.siga.org.cn\/xshd\/iftc2023.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}