{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T18:03:05Z","timestamp":1774720985897,"version":"3.50.1"},"reference-count":54,"publisher":"Springer Science and Business Media LLC","issue":"10-11","license":[{"start":{"date-parts":[[2020,2,24]],"date-time":"2020-02-24T00:00:00Z","timestamp":1582502400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,2,24]],"date-time":"2020-02-24T00:00:00Z","timestamp":1582502400000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"name":"NSERC Discovery"},{"name":"NSERC DAS"},{"name":"NSERC CFI"},{"name":"NVIDIA GPU Grant"},{"name":"Mitacs Globalink Research Award"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2020,11]]},"DOI":"10.1007\/s11263-020-01300-7","type":"journal-article","created":{"date-parts":[[2020,2,24]],"date-time":"2020-02-24T11:07:22Z","timestamp":1582542442000},"page":"2418-2435","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":35,"title":["Layout2image: Image Generation from Layout"],"prefix":"10.1007","volume":"128","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2120-2571","authenticated-orcid":false,"given":"Bo","family":"Zhao","sequence":"first","affiliation":[]},{"given":"Weidong","family":"Yin","sequence":"additional","affiliation":[]},{"given":"Lili","family":"Meng","sequence":"additional","affiliation":[]},{"given":"Leonid","family":"Sigal","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,2,24]]},"reference":[{"key":"1300_CR1","unstructured":"Caesar, H., Uijlings, J., & Ferrari, V. (2016). Coco-stuff: Thing and stuff classes in context. arXiv: 1612.03716."},{"key":"1300_CR2","unstructured":"Chen, X., Duan, Y., Houthooft, R., Schulman, J., Sutskever, I., & Abbeel, P. (2016). InfoGAN: Interpretable representation learning by information maximizing generative adversarial nets. In NIPS."},{"key":"1300_CR3","unstructured":"Cheung, B., Livezey, J. A., Bansal, A. K., & Olshausen, B.A. (2015). Discovering hidden factors of variation in deep networks. In ICLR workshop."},{"key":"1300_CR4","unstructured":"de\u00a0Vries, H., Strub, F., Mary, J., Larochelle, H., Pietquin, O., & Courville, A. (2017). Modulating early visual processing by language. In NIPS."},{"key":"1300_CR5","unstructured":"Denton, E., & Birodkar, V. (2017). Unsupervised learning of disentangled representations from video. In NIPS."},{"key":"1300_CR6","doi-asserted-by":"crossref","unstructured":"Dosovitskiy, A., Tobias\u00a0Springenberg, J., & Brox, T. (2015). Learning to generate chairs with convolutional neural networks. In CVPR.","DOI":"10.1109\/CVPR.2015.7298761"},{"key":"1300_CR7","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep residual learning for image recognition. In CVPR.","DOI":"10.1109\/CVPR.2016.90"},{"key":"1300_CR8","unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., Nessler, B., & Hochreiter, S. (2017). GANs trained by a two time-scale update rule converge to a local Nash equilibrium. In NIPS."},{"issue":"8","key":"1300_CR9","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., & Schmidhuber, J. (1997). Long short-term memory. Neural Computing, 9(8), 1735\u20131780.","journal-title":"Neural Computing"},{"key":"1300_CR10","doi-asserted-by":"crossref","unstructured":"Hong, S., Yang, D., Choi, J., & Lee, H. (2018). Inferring semantic layout for hierarchical text-to-image synthesis. In CVPR.","DOI":"10.1109\/CVPR.2018.00833"},{"key":"1300_CR11","unstructured":"Ioffe, S., & Szegedy, C. (2015). Batch normalization: accelerating deep network training by reducing internal covariate shift. arXiv:1502.03167."},{"key":"1300_CR12","doi-asserted-by":"crossref","unstructured":"Isola, P., Zhu, J. Y., Zhou, T., & Efros, A. A. (2017). Image-to-image translation with conditional adversarial networks. In CVPR.","DOI":"10.1109\/CVPR.2017.632"},{"key":"1300_CR13","doi-asserted-by":"crossref","unstructured":"Johnson, J., Gupta, A., & Fei-Fei, L. (2018). Image generation from scene graphs. In CVPR.","DOI":"10.1109\/CVPR.2018.00133"},{"key":"1300_CR14","unstructured":"Karacan, L., Akata, Z., Erdem, A., & Erdem, E. (2016). Learning to generate images of outdoor scenes from attributes and semantic layouts. arXiv:1612.00215."},{"key":"1300_CR15","unstructured":"Kim, J. H., Parikh, D., Batra, D., Zhang, B. T., & Tian, Y. (2017). Codraw: visual dialog for collaborative drawing. arXiv:1712.05558."},{"key":"1300_CR16","unstructured":"Kingma, D. P., & Ba, J. (2014). Adam: A method for stochastic optimization. arXiv:1412.6980."},{"key":"1300_CR17","unstructured":"Kingma, D. P., & Welling, M. (2014). Auto-encoding variational bayes. In ICLR."},{"key":"1300_CR18","doi-asserted-by":"crossref","unstructured":"Krishna, R., Zhu, Y., Groth, O., Johnson, J., Hata, K., Kravitz, J., Chen, S., Kalantidis, Y., Li, L. J., & Shamma, D. A., et\u00a0al. (2017). Visual genome: Connecting language and vision using crowdsourced dense image annotations. In IJCV.","DOI":"10.1007\/s11263-016-0981-7"},{"key":"1300_CR19","unstructured":"Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). Imagenet classification with deep convolutional neural networks. In NIPS."},{"key":"1300_CR20","doi-asserted-by":"crossref","unstructured":"Lai, W. S., Huang, J. B., Ahuja, N., & Yang, M. H. (2017). Deep Laplacian pyramid networks for fast and accurate super-resolution. In CVPR.","DOI":"10.1109\/CVPR.2017.618"},{"key":"1300_CR21","doi-asserted-by":"crossref","unstructured":"Lee, H. Y., Tseng, H. Y., Huang, J. B., Singh, M., & Yang, M. H. (2018). Diverse image-to-image translation via disentangled representations. In ECCV.","DOI":"10.1007\/978-3-030-01246-5_3"},{"key":"1300_CR22","unstructured":"Liu, M. Y., Breuel, T., & Kautz, J. (2017). Unsupervised image-to-image translation networks. In NIPS."},{"key":"1300_CR23","doi-asserted-by":"crossref","unstructured":"Ma, L., Sun, Q., Georgoulis, S., Gool, L. V., Schiele, B., & Fritz, M. (2018a). Disentangled person image generation. In CVPR.","DOI":"10.1109\/CVPR.2018.00018"},{"key":"1300_CR24","doi-asserted-by":"crossref","unstructured":"Ma, L., Sun, Q., Georgoulis, S., Van\u00a0Gool, L., Schiele, B., & Fritz, M. (2018b). Disentangled person image generation. In IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2018.00018"},{"key":"1300_CR25","unstructured":"Mansimov, E., Parisotto, E., Ba, J. L., & Salakhutdinov, R. (2015). Generating images from captions with attention. arXiv:1511.02793."},{"key":"1300_CR26","unstructured":"Mathieu, M., Zhao, J., Sprechmann, P., Ramesh, A., & LeCun, Y. (2016). Disentangling factors of variation in deep representations using adversarial training. In NIPS."},{"key":"1300_CR27","unstructured":"Mirza, M., & Osindero, S. (2014). Conditional generative adversarial nets. arXiv:1411.1784."},{"key":"1300_CR28","unstructured":"Miyato, T., Kataoka, T., Koyama, M., & Yoshida, Y. (2018). Spectral normalization for generative adversarial networks. In ICLR."},{"key":"1300_CR29","doi-asserted-by":"crossref","unstructured":"Murez, Z., Kolouri, S., Kriegman, D., Ramamoorthi, R., & Kim, K. (2018). Image to image translation for domain adaptation. In CVPR.","DOI":"10.1109\/CVPR.2018.00473"},{"key":"1300_CR30","doi-asserted-by":"crossref","unstructured":"Nilsback, M., & Zisserman, A. (2008). Automated flower classification over a large number of classes. In Indian conference on computer vision, graphics image processing.","DOI":"10.1109\/ICVGIP.2008.47"},{"key":"1300_CR31","unstructured":"Oord, A. v. d., Kalchbrenner, N., & Kavukcuoglu, K. (2016). Pixel recurrent neural networks. arXiv:1601.06759."},{"key":"1300_CR32","doi-asserted-by":"crossref","unstructured":"Park, T., Liu, M. Y., Wang, T. C., & Zhu, J. Y. (2019). GauGAN: semantic image synthesis with spatially adaptive normalization. In SIGGRAPH \u201919.","DOI":"10.1145\/3306305.3332370"},{"key":"1300_CR33","doi-asserted-by":"crossref","unstructured":"Pathak, D., Krahenbuhl, P., Donahue, J., Darrell, T., & Efros, A. A. (2016). Context encoders: feature learning by inpainting. In CVPR.","DOI":"10.1109\/CVPR.2016.278"},{"key":"1300_CR34","unstructured":"Reed, S., Oord, A. v. d., Kalchbrenner, N., Colmenarejo, S. G., Wang, Z., Belov, D., & de\u00a0Freitas, N. (2017). Parallel multiscale autoregressive density estimation. arXiv:1703.03664."},{"key":"1300_CR35","unstructured":"Reed, S. E., Akata, Z., Mohan, S., Tenka, S., Schiele, B., & Lee, H. (2016) Learning what and where to draw. In NIPS ."},{"key":"1300_CR36","unstructured":"Salimans, T., Goodfellow, I., Zaremba, W., Cheung, V., Radford, A., & Chen, X. (2016). Improved techniques for training GANs. In NIPS."},{"key":"1300_CR37","doi-asserted-by":"crossref","unstructured":"Sangkloy, P., Lu, J., Fang, C., Yu, F., & Hays, J. (2017). Scribbler: Controlling deep image synthesis with sketch and color. In The IEEE conference on computer vision and pattern recognition (CVPR)","DOI":"10.1109\/CVPR.2017.723"},{"key":"1300_CR38","unstructured":"Sharma, S., Suhubdy, D., Michalski, V., Kahou, S. E., & Bengio, Y. (2018). ChatPainter: Improving text to image generation using dialogue. arXiv:1802.08216."},{"key":"1300_CR39","unstructured":"Shi, X., Chen, Z., Wang, H., Yeung, D. Y., Wong, W. K., & Woo, W. C. (2015). Convolutional LSTM network: a machine learning approach for precipitation nowcasting. In NIPS."},{"key":"1300_CR40","unstructured":"Simonyan, K., & Zisserman, A. (2014). Very deep convolutional networks for large-scale image recognition. arXiv:1409.1556."},{"key":"1300_CR41","unstructured":"Sohn, K., Lee, H., & Yan, X. (2015). Learning structured output representation using deep conditional generative models. In NIPS."},{"key":"1300_CR42","unstructured":"Tan, F., Feng, S., & Ordonez, V. (2018). Text2scene: generating abstract scenes from textual descriptions. arXiv:1809.01110."},{"key":"1300_CR43","unstructured":"van\u00a0den Oord, A., Kalchbrenner, N., Espeholt, L., Vinyals, O., & Graves, A., et\u00a0al. (2016). Conditional image generation with pixelcnn decoders. In NIPS."},{"key":"1300_CR44","unstructured":"Wang, T. C., Liu, M. Y., Zhu, J. Y., Tao, A., Kautz, J., & Catanzaro, B. (2017). High-resolution image synthesis and semantic manipulation with conditional GANs. In 2018 IEEE\/CVF conference on computer vision and pattern recognition (pp. 8798\u20138807)."},{"key":"1300_CR45","doi-asserted-by":"crossref","unstructured":"Wang, T. C., Liu, M. Y., Zhu, J. Y., Tao, A., Kautz, J., & Catanzaro, B. (2018). High-resolution image synthesis and semantic manipulation with conditional GANs. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 8798\u20138807).","DOI":"10.1109\/CVPR.2018.00917"},{"key":"1300_CR46","unstructured":"Welinder, P., Branson, S., Mita, T., Wah, C., Schroff, F., Belongie, S., & Perona, P. (2010). Caltech-UCSD birds 200. Technical Report, CNS-TR-2010-001, California Institute of Technology."},{"key":"1300_CR47","doi-asserted-by":"crossref","unstructured":"Xian, W., Sangkloy, P., Agrawal, V., Raj, A., Lu, J., Fang, C., Yu, F., & Hays, J. (2018). TextureGAN: Controlling deep image synthesis with texture patches. In CVPR.","DOI":"10.1109\/CVPR.2018.00882"},{"key":"1300_CR48","doi-asserted-by":"crossref","unstructured":"Yang, C., Lu, X., Lin, Z., Shechtman, E., Wang, O., & Li, H. (2017). High-resolution image inpainting using multi-scale neural patch synthesis. In CVPR.","DOI":"10.1109\/CVPR.2017.434"},{"key":"1300_CR49","doi-asserted-by":"crossref","unstructured":"Zhang, H., Xu, T., Li, H., Zhang, S., Huang, X., Wang, X., & Metaxas, D. (2017). StackGAN: Text to photo-realistic image synthesis with stacked generative adversarial networks. In ICCV.","DOI":"10.1109\/ICCV.2017.629"},{"key":"1300_CR50","doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., Efros, A. A., Shechtman, E., & Wang, O. (2018). The unreasonable effectiveness of deep features as a perceptual metric. In CVPR.","DOI":"10.1109\/CVPR.2018.00068"},{"key":"1300_CR51","doi-asserted-by":"crossref","unstructured":"Zhang, W., Sun, J., & Tang, X. (2008). Cat head detection\u2014How to effectively exploit shape and texture features. In ECCV.","DOI":"10.1007\/978-3-540-88693-8_59"},{"key":"1300_CR52","doi-asserted-by":"crossref","unstructured":"Zhao, B., Chang, B., Jie, Z., & Sigal, L. (2018). Modular generative adversarial networks. In ECCV.","DOI":"10.1007\/978-3-030-01264-9_10"},{"key":"1300_CR53","doi-asserted-by":"crossref","unstructured":"Zhu, J. Y., Park, T., Isola, P., & Efros, A. A. (2017a). Unpaired image-to-image translation using cycle-consistent adversarial networks. In ICCV.","DOI":"10.1109\/ICCV.2017.244"},{"key":"1300_CR54","unstructured":"Zhu, J. Y., Zhang, R., Pathak, D., Darrell, T., Efros, A. A., Wang, O., & Shechtman, E. (2017b). Toward multimodal image-to-image translation. In NIPS."}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-020-01300-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11263-020-01300-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-020-01300-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,2,23]],"date-time":"2021-02-23T00:11:12Z","timestamp":1614039072000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11263-020-01300-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,2,24]]},"references-count":54,"journal-issue":{"issue":"10-11","published-print":{"date-parts":[[2020,11]]}},"alternative-id":["1300"],"URL":"https:\/\/doi.org\/10.1007\/s11263-020-01300-7","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,2,24]]},"assertion":[{"value":"14 April 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 February 2020","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 February 2020","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}