{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T20:53:17Z","timestamp":1743022397457,"version":"3.40.3"},"publisher-location":"Cham","reference-count":46,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031781711"},{"type":"electronic","value":"9783031781728"}],"license":[{"start":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T00:00:00Z","timestamp":1733184000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T00:00:00Z","timestamp":1733184000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-78172-8_6","type":"book-chapter","created":{"date-parts":[[2024,12,2]],"date-time":"2024-12-02T09:48:07Z","timestamp":1733132887000},"page":"83-97","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Mask-ControlNet: Higher-Quality Image Generation with\u00a0an Additional Mask Prompt"],"prefix":"10.1007","author":[{"given":"Zhiqi","family":"Huang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Huixin","family":"Xiong","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Haoyu","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Longguang","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhiheng","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,12,3]]},"reference":[{"issue":"4","key":"6_CR1","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3592450","volume":"42","author":"O Avrahami","year":"2023","unstructured":"Avrahami, O., Fried, O., Lischinski, D.: Blended latent diffusion. ACM Trans. Graph. (TOG) 42(4), 1\u201311 (2023)","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"6_CR2","unstructured":"Brock, A., Donahue, J., Simonyan, K.: Large scale GAN training for high fidelity natural image synthesis. arXiv preprint arXiv:1809.11096 (2018)"},{"key":"6_CR3","doi-asserted-by":"crossref","unstructured":"Brooks, T., Holynski, A., Efros, A.A.: InstructPix2Pix: learning to follow image editing instructions (2023)","DOI":"10.1109\/CVPR52729.2023.01764"},{"key":"6_CR4","doi-asserted-by":"crossref","unstructured":"Cai, S., et al.: DiffDreamer: towards consistent unsupervised single-view scene extrapolation with conditional diffusion models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 2139\u20132150, October 2023","DOI":"10.1109\/ICCV51070.2023.00204"},{"key":"6_CR5","doi-asserted-by":"crossref","unstructured":"Caron, M.,et al.: Emerging properties in self-supervised vision transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9650\u20139660 (2021)","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"6_CR6","doi-asserted-by":"crossref","unstructured":"Corneanu, C., Gadde, R., Martinez, A.M.: LatentPaint: image inpainting in latent space with diffusion models. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 4334\u20134343 (2024)","DOI":"10.1109\/WACV57701.2024.00428"},{"key":"6_CR7","doi-asserted-by":"crossref","unstructured":"Esser, P., Rombach, R., Ommer, B.: Taming transformers for high-resolution image synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12873\u201312883 (2021)","DOI":"10.1109\/CVPR46437.2021.01268"},{"key":"6_CR8","unstructured":"Gal, R., et al.: An image is worth one word: personalizing text-to-image generation using textual inversion (2022)"},{"issue":"11","key":"6_CR9","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1145\/3422622","volume":"63","author":"I Goodfellow","year":"2020","unstructured":"Goodfellow, I., et al.: Generative adversarial networks. Commun. ACM 63(11), 139\u2013144 (2020)","journal-title":"Commun. ACM"},{"key":"6_CR10","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. Adv. Neural. Inf. Process. Syst. 33, 6840\u20136851 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"6_CR11","unstructured":"Hu, E.J., et al.: LoRa: low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)"},{"key":"6_CR12","unstructured":"Huang, L., Chen, D., Liu, Y., Shen, Y., Zhao, D., Zhou, J.: Composer: creative and controllable image synthesis with composable conditions. arXiv preprint arXiv:2302.09778 (2023)"},{"key":"6_CR13","doi-asserted-by":"crossref","unstructured":"Ju, X., Liu, X., Wang, X., Bian, Y., Shan, Y., Xu, Q.: BrushNet: a plug-and-play image inpainting model with decomposed dual-branch diffusion (2024). https:\/\/arxiv.org\/abs\/2403.06976","DOI":"10.1007\/978-3-031-72661-3_9"},{"key":"6_CR14","doi-asserted-by":"crossref","unstructured":"Ju, X., Zeng, A., Zhao, C., Wang, J., Zhang, L., Xu, Q.: HumanSD: a native skeleton-guided diffusion model for human image generation. arXiv preprint arXiv:2304.04269 (2023)","DOI":"10.1109\/ICCV51070.2023.01465"},{"key":"6_CR15","doi-asserted-by":"crossref","unstructured":"Kang, M., et al.: Scaling up GANs for text-to-image synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10124\u201310134 (2023)","DOI":"10.1109\/CVPR52729.2023.00976"},{"key":"6_CR16","unstructured":"Karras, T., Aila, T., Laine, S., Lehtinen, J.: Progressive growing of GANs for improved quality, stability, and variation. arXiv preprint arXiv:1710.10196 (2017)"},{"key":"6_CR17","doi-asserted-by":"crossref","unstructured":"Kawar, B., et al.: Imagic: text-based real image editing with diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6007\u20136017 (2023)","DOI":"10.1109\/CVPR52729.2023.00582"},{"key":"6_CR18","unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114 (2013)"},{"key":"6_CR19","unstructured":"Kingma, D.P., Dhariwal, P.: Glow: generative flow with invertible 1x1 convolutions. Adv. Neural Inf. Process. Syst. 31 (2018)"},{"key":"6_CR20","unstructured":"Kirillov, A., et\u00a0al.: Segment anything. arXiv preprint arXiv:2304.02643 (2023)"},{"key":"6_CR21","doi-asserted-by":"crossref","unstructured":"Kumari, N., Zhang, B., Wang, S.Y., Shechtman, E., Zhang, R., Zhu, J.Y.: Ablating concepts in text-to-image diffusion models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 22691\u201322702 (2023)","DOI":"10.1109\/ICCV51070.2023.02074"},{"key":"6_CR22","doi-asserted-by":"crossref","unstructured":"Kumari, N., Zhang, B., Zhang, R., Shechtman, E., Zhu, J.Y.: Multi-concept customization of text-to-image diffusion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1931\u20131941 (2023)","DOI":"10.1109\/CVPR52729.2023.00192"},{"key":"6_CR23","unstructured":"Li, J., Li, D., Xiong, C., Hoi, S.: BLIP: bootstrapping language-image pre-training for unified vision-language understanding and generation. In: Chaudhuri, K., Jegelka, S., Song, L., Szepesvari, C., Niu, G., Sabato, S. (eds.) Proceedings of the 39th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol.\u00a0162, pp. 12888\u201312900. PMLR, 17\u201323 July 2022. https:\/\/proceedings.mlr.press\/v162\/li22n.html"},{"key":"6_CR24","doi-asserted-by":"crossref","unstructured":"Li, Y., et al.: GLIGEN: open-set grounded text-to-image generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22511\u201322521 (2023)","DOI":"10.1109\/CVPR52729.2023.02156"},{"key":"6_CR25","doi-asserted-by":"crossref","unstructured":"Liu, X., et al.: More control for free! Image synthesis with semantic diffusion guidance. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 289\u2013299 (2023)","DOI":"10.1109\/WACV56688.2023.00037"},{"key":"6_CR26","doi-asserted-by":"crossref","unstructured":"Lugmayr, A., Danelljan, M., Romero, A., Yu, F., Timofte, R., Van\u00a0Gool, L.: Repaint: inpainting using denoising diffusion probabilistic models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11461\u201311471 (2022)","DOI":"10.1109\/CVPR52688.2022.01117"},{"key":"6_CR27","doi-asserted-by":"crossref","unstructured":"Mou, C., et al.: T2I-adapter: learning adapters to dig out more controllable ability for text-to-image diffusion models. arXiv preprint arXiv:2302.08453 (2023)","DOI":"10.1609\/aaai.v38i5.28226"},{"key":"6_CR28","unstructured":"Van\u00a0den Oord, A., Kalchbrenner, N., Espeholt, L., Vinyals, O., Graves, A., et\u00a0al.: Conditional image generation with pixelCNN decoders. Adv. Neural Inf. Process. Syst. 29 (2016)"},{"key":"6_CR29","doi-asserted-by":"crossref","unstructured":"Peebles, W., Xie, S.: Scalable diffusion models with transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 4195\u20134205 (2023)","DOI":"10.1109\/ICCV51070.2023.00387"},{"key":"6_CR30","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"issue":"14","key":"6_CR31","first-page":"71","volume":"8","author":"MDM Reddy","year":"2021","unstructured":"Reddy, M.D.M., Basha, M.S.M., Hari, M.M.C., Penchalaiah, M.N.: DALL-E: creating images from text. UGC Care Group I J. 8(14), 71\u201375 (2021)","journal-title":"UGC Care Group I J."},{"key":"6_CR32","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"6_CR33","doi-asserted-by":"crossref","unstructured":"Ruiz, N., Li, Y., Jampani, V., Pritch, Y., Rubinstein, M., Aberman, K.: DreamBooth: fine tuning text-to-image diffusion models for subject-driven generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22500\u201322510 (2023)","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"6_CR34","unstructured":"Seitzer, M.: PyTorch-fid: fid score for pytorch (2020)"},{"key":"6_CR35","unstructured":"Sohl-Dickstein, J., Weiss, E., Maheswaranathan, N., Ganguli, S.: Deep unsupervised learning using nonequilibrium thermodynamics. In: International Conference on Machine Learning, pp. 2256\u20132265. PMLR (2015)"},{"key":"6_CR36","unstructured":"Song, J., Meng, C., Ermon, S.: Denoising diffusion implicit models. arXiv preprint arXiv:2010.02502 (2020)"},{"issue":"5","key":"6_CR37","doi-asserted-by":"publisher","first-page":"874","DOI":"10.1016\/j.jvcir.2014.01.008","volume":"25","author":"A Tanchenko","year":"2014","unstructured":"Tanchenko, A.: Visual-PSNR measure of image quality. J. Vis. Commun. Image Represent. 25(5), 874\u2013878 (2014)","journal-title":"J. Vis. Commun. Image Represent."},{"key":"6_CR38","unstructured":"Van Den\u00a0Oord, A., Kalchbrenner, N., Kavukcuoglu, K.: Pixel recurrent neural networks. In: International Conference on Machine Learning, pp. 1747\u20131756. PMLR (2016)"},{"key":"6_CR39","doi-asserted-by":"crossref","unstructured":"Voynov, A., Aberman, K., Cohen-Or, D.: Sketch-guided text-to-image diffusion models. In: ACM SIGGRAPH 2023 Conference Proceedings, pp. 1\u201311 (2023)","DOI":"10.1145\/3588432.3591560"},{"issue":"4","key":"6_CR40","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1109\/TIP.2003.819861","volume":"13","author":"Z Wang","year":"2004","unstructured":"Wang, Z., Bovik, A.C., Sheikh, H.R., Simoncelli, E.P.: Image quality assessment: from error visibility to structural similarity. IEEE Trans. Image Process. 13(4), 600\u2013612 (2004)","journal-title":"IEEE Trans. Image Process."},{"key":"6_CR41","doi-asserted-by":"crossref","unstructured":"Xie, S., Zhang, Z., Lin, Z., Hinz, T., Zhang, K.: SmartBrush: text and shape guided object inpainting with diffusion model. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22428\u201322437 (2023)","DOI":"10.1109\/CVPR52729.2023.02148"},{"key":"6_CR42","unstructured":"Yu, T., et al.: Inpaint anything: segment anything meets image inpainting (2023). https:\/\/arxiv.org\/abs\/2304.06790"},{"key":"6_CR43","doi-asserted-by":"crossref","unstructured":"Zhang, L., Rao, A., Agrawala, M.: Adding conditional control to text-to-image diffusion models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3836\u20133847 (2023)","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"6_CR44","doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., Efros, A.A., Shechtman, E., Wang, O.: The unreasonable effectiveness of deep features as a perceptual metric. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 586\u2013595 (2018)","DOI":"10.1109\/CVPR.2018.00068"},{"key":"6_CR45","doi-asserted-by":"crossref","unstructured":"Zheng, G., Zhou, X., Li, X., Qi, Z., Shan, Y., Li, X.: LayoutDiffusion: controllable diffusion model for layout-to-image generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22490\u201322499 (2023)","DOI":"10.1109\/CVPR52729.2023.02154"},{"key":"6_CR46","doi-asserted-by":"crossref","unstructured":"Zhou, Y., Liu, B., Zhu, Y., Yang, X., Chen, C., Xu, J.: Shifted diffusion for text-to-image generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10157\u201310166 (2023)","DOI":"10.1109\/CVPR52729.2023.00979"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-78172-8_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,2]],"date-time":"2024-12-02T10:04:14Z","timestamp":1733133854000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-78172-8_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,3]]},"ISBN":["9783031781711","9783031781728"],"references-count":46,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-78172-8_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,12,3]]},"assertion":[{"value":"3 December 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICPR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Pattern Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Kolkata","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"India","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 December 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 December 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icpr2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icpr2024.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}