{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T18:06:40Z","timestamp":1774030000803,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":49,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,10,12]],"date-time":"2020-10-12T00:00:00Z","timestamp":1602460800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,10,12]]},"DOI":"10.1145\/3394171.3413551","type":"proceedings-article","created":{"date-parts":[[2020,10,12]],"date-time":"2020-10-12T12:27:38Z","timestamp":1602505658000},"page":"4383-4391","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":55,"title":["Sequential Attention GAN for Interactive Image Editing"],"prefix":"10.1145","author":[{"given":"Yu","family":"Cheng","sequence":"first","affiliation":[{"name":"Microsoft Dynamics 365 AI Research, Redmond, WA, USA"}]},{"given":"Zhe","family":"Gan","sequence":"additional","affiliation":[{"name":"Microsoft Dynamics 365 AI Research, Redmond, WA, USA"}]},{"given":"Yitong","family":"Li","sequence":"additional","affiliation":[{"name":"Duke University, Durham, NC, USA"}]},{"given":"Jingjing","family":"Liu","sequence":"additional","affiliation":[{"name":"Microsoft Dynamics 365 AI Research, Redmond, WA, USA"}]},{"given":"Jianfeng","family":"Gao","sequence":"additional","affiliation":[{"name":"Microsoft Research, Redmond, WA, USA"}]}],"member":"320","published-online":{"date-parts":[[2020,10,12]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.279"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00466"},{"key":"e_1_3_2_2_3_1","volume-title":"The neural painter: Multi-turn image generation. arXiv preprint arXiv:1806.06183","author":"Benmalek R. Y.","year":"2018","unstructured":"R. Y. Benmalek , C. Cardie , S. J. Belongie , X. He , and J. Gao . The neural painter: Multi-turn image generation. arXiv preprint arXiv:1806.06183 , 2018 . R. Y. Benmalek, C. Cardie, S. J. Belongie, X. He, and J. Gao. The neural painter: Multi-turn image generation. arXiv preprint arXiv:1806.06183, 2018."},{"key":"e_1_3_2_2_4_1","volume-title":"ICLR","author":"Bordes A.","year":"2017","unstructured":"A. Bordes , Y.-L. Boureau , and J. Weston . Learning end-to-end goal-oriented dialog . In ICLR , 2017 . A. Bordes, Y.-L. Boureau, and J. Weston. Learning end-to-end goal-oriented dialog. In ICLR, 2017."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1177\/1745691610393980"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00909"},{"key":"e_1_3_2_2_7_1","volume-title":"NeurIPS Workshop","author":"Chung J.","year":"2014","unstructured":"J. Chung , C. Gulcehre , K. Cho , and Y. Bengio . Empirical evaluation of gated recurrent neural networks on sequence modeling . In NeurIPS Workshop , 2014 . J. Chung, C. Gulcehre, K. Cho, and Y. Bengio. Empirical evaluation of gated recurrent neural networks on sequence modeling. In NeurIPS Workshop, 2014."},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.121"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.475"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.355"},{"key":"e_1_3_2_2_12_1","volume-title":"Neural approaches to conversational ai. arXiv preprint arXiv:1809.08267","author":"Gao J.","year":"2018","unstructured":"J. Gao , M. Galley , and L. Li . Neural approaches to conversational ai. arXiv preprint arXiv:1809.08267 , 2018 . J. Gao, M. Galley, and L. Li. Neural approaches to conversational ai. arXiv preprint arXiv:1809.08267, 2018."},{"key":"e_1_3_2_2_13_1","volume-title":"NeurIPS","author":"Goodfellow I.","year":"2014","unstructured":"I. Goodfellow , J. Pouget-Abadie , M. Mirza , B. Xu , D. Warde-Farley , S. Ozair , A. Courville , and Y. Bengio . Generative adversarial nets . In NeurIPS , 2014 . I. Goodfellow, J. Pouget-Abadie, M. Mirza, B. Xu, D. Warde-Farley, S. Ozair, A. Courville, and Y. Bengio. Generative adversarial nets. In NeurIPS, 2014."},{"key":"e_1_3_2_2_14_1","volume-title":"NeurIPS","author":"Guo X.","year":"2018","unstructured":"X. Guo , H. Wu , Y. Cheng , S. Rennie , and R. S. Feris . Dialog-based interactive image retrieval . In NeurIPS , 2018 . X. Guo, H. Wu, Y. Cheng, S. Rennie, and R. S. Feris. Dialog-based interactive image retrieval. In NeurIPS, 2018."},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_2_16_1","author":"He X.","year":"2008","unstructured":"X. He , L. Deng , and W. Chou . Discriminative learning in sequential pattern recognition. IEEE Signal Processing Magazine , 2008 . X. He, L. Deng, and W. Chou. Discriminative learning in sequential pattern recognition. IEEE Signal Processing Magazine, 2008.","journal-title":"Discriminative learning in sequential pattern recognition. IEEE Signal Processing Magazine"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46448-0_7"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.632"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00133"},{"key":"e_1_3_2_2_20_1","volume-title":"Codraw: Visual dialog for collaborative drawing. CoRR, abs\/1712.05558","author":"Kim J.","year":"2017","unstructured":"J. Kim , D. Parikh , D. Batra , B. Zhang , and Y. Tian . Codraw: Visual dialog for collaborative drawing. CoRR, abs\/1712.05558 , 2017 . J. Kim, D. Parikh, D. Batra, B. Zhang, and Y. Tian. Codraw: Visual dialog for collaborative drawing. CoRR, abs\/1712.05558, 2017."},{"key":"e_1_3_2_2_21_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma D. P.","year":"2014","unstructured":"D. P. Kingma and J. Ba . Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 , 2014 . D. P. Kingma and J. Ba. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980, 2014."},{"key":"e_1_3_2_2_22_1","first-page":"2203","volume-title":"NeurIPS","author":"Li C.-L.","year":"2017","unstructured":"C.-L. Li , W.-C. Chang , Y. Cheng , Y. Yang , and B. Poczos . Mmd gan: Towards deeper understanding of moment matching network . In NeurIPS , pages 2203 -- 2213 , 2017 . C.-L. Li, W.-C. Chang, Y. Cheng, Y. Yang, and B. Poczos. Mmd gan: Towards deeper understanding of moment matching network. In NeurIPS, pages 2203--2213, 2017."},{"key":"e_1_3_2_2_23_1","volume-title":"Bachgan: High-resolution image synthesis from salient object layout. arXiv preprint arXiv:2003.11690","author":"Li Y.","year":"2020","unstructured":"Y. Li , Y. Cheng , Z. Gan , L. Yu , L. Wang , and J. Liu . Bachgan: High-resolution image synthesis from salient object layout. arXiv preprint arXiv:2003.11690 , 2020 . Y. Li, Y. Cheng, Z. Gan, L. Yu, L. Wang, and J. Liu. Bachgan: High-resolution image synthesis from salient object layout. arXiv preprint arXiv:2003.11690, 2020."},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00649"},{"key":"e_1_3_2_2_25_1","volume-title":"NeurIPS","author":"Liu M.-Y.","year":"2017","unstructured":"M.-Y. Liu , T. Breuel , and J. Kautz . Unsupervised image-to-image translation networks . In NeurIPS , 2017 . M.-Y. Liu, T. Breuel, and J. Kautz. Unsupervised image-to-image translation networks. In NeurIPS, 2017."},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.124"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-5033"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W17-5539"},{"key":"e_1_3_2_2_29_1","volume-title":"Conditional generative adversarial nets. arXiv preprint arXiv:1411.1784","author":"Mirza M.","year":"2014","unstructured":"M. Mirza and S. Osindero . Conditional generative adversarial nets. arXiv preprint arXiv:1411.1784 , 2014 . M. Mirza and S. Osindero. Conditional generative adversarial nets. arXiv preprint arXiv:1411.1784, 2014."},{"key":"e_1_3_2_2_30_1","volume-title":"IJCNLP","author":"Mostafazadeh N.","year":"2017","unstructured":"N. Mostafazadeh , C. Brockett , B. Dolan , M. Galley , J. Gao , G. Spithourakis , and L. Vanderwende . Image-grounded conversations: Multimodal context for natural question and response generation . In IJCNLP , 2017 . N. Mostafazadeh, C. Brockett, B. Dolan, M. Galley, J. Gao, G. Spithourakis, and L. Vanderwende. Image-grounded conversations: Multimodal context for natural question and response generation. In IJCNLP, 2017."},{"key":"e_1_3_2_2_31_1","volume-title":"NeurIPS","author":"Nam S.","year":"2018","unstructured":"S. Nam , Y. Kim , and S. J. Kim . Text-adaptive generative adversarial networks: Manipulating images with natural language . In NeurIPS , 2018 . S. Nam, Y. Kim, and S. J. Kim. Text-adaptive generative adversarial networks: Manipulating images with natural language. In NeurIPS, 2018."},{"key":"e_1_3_2_2_32_1","volume-title":"ICML","author":"Odena A.","year":"2017","unstructured":"A. Odena , C. Olah , and J. Shlens . Conditional image synthesis with auxiliary classifier GANs . In ICML , 2017 . A. Odena, C. Olah, and J. Shlens. Conditional image synthesis with auxiliary classifier GANs. In ICML, 2017."},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.278"},{"key":"e_1_3_2_2_34_1","volume-title":"ICML","author":"Reed S.","year":"2016","unstructured":"S. Reed , Z. Akata , X. Yan , L. Logeswaran , B. Schiele , and H. Lee . Generative adversarial text to image synthesis . In ICML , 2016 . S. Reed, Z. Akata, X. Yan, L. Logeswaran, B. Schiele, and H. Lee. Generative adversarial text to image synthesis. In ICML, 2016."},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46448-0_49"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.723"},{"key":"e_1_3_2_2_37_1","volume-title":"AAAI","author":"Serban I. V.","year":"2016","unstructured":"I. V. Serban , A. Sordoni , Y. Bengio , A. Courville , and J. Pineau . Building end-to-end dialogue systems using generative hierarchical neural network models . In AAAI , 2016 . I. V. Serban, A. Sordoni, Y. Bengio, A. Courville, and J. Pineau. Building end-to-end dialogue systems using generative hierarchical neural network models. In AAAI, 2016."},{"key":"e_1_3_2_2_38_1","volume-title":"Chatpainter: Improving text to image generation using dialogue. CoRR, abs\/1802.08216","author":"Sharma S.","year":"2018","unstructured":"S. Sharma , D. Suhubdy , V. Michalski , S. E. Kahou , and Y. Bengio . Chatpainter: Improving text to image generation using dialogue. CoRR, abs\/1802.08216 , 2018 . S. Sharma, D. Suhubdy, V. Michalski, S. E. Kahou, and Y. Bengio. Chatpainter: Improving text to image generation using dialogue. CoRR, abs\/1802.08216, 2018."},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.541"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46493-0_20"},{"key":"e_1_3_2_2_41_1","volume-title":"Texturegan: Controlling deep image synthesis with texture patches. arXiv preprint arXiv:1706.02823","author":"Xian W.","year":"2017","unstructured":"W. Xian , P. Sangkloy , V. Agrawal , A. Raj , J. Lu , C. Fang , F. Yu , and J. Hays . Texturegan: Controlling deep image synthesis with texture patches. arXiv preprint arXiv:1706.02823 , 2017 . W. Xian, P. Sangkloy, V. Agrawal, A. Raj, J. Lu, C. Fang, F. Yu, and J. Hays. Texturegan: Controlling deep image synthesis with texture patches. arXiv preprint arXiv:1706.02823, 2017."},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00143"},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.32"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.629"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.652"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00878"},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.244"},{"key":"e_1_3_2_2_48_1","volume-title":"NeurIPS","author":"Zhu J.-Y.","year":"2017","unstructured":"J.-Y. Zhu , R. Zhang , D. Pathak , T. Darrell , A. A. Efros , O. Wang , and E. Shechtman . Toward multimodal image-to-image translation . In NeurIPS , 2017 . J.-Y. Zhu, R. Zhang, D. Pathak, T. Darrell, A. A. Efros, O. Wang, and E. Shechtman. Toward multimodal image-to-image translation. In NeurIPS, 2017."},{"key":"e_1_3_2_2_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.186"}],"event":{"name":"MM '20: The 28th ACM International Conference on Multimedia","location":"Seattle WA USA","acronym":"MM '20","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 28th ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3394171.3413551","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3394171.3413551","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:47:14Z","timestamp":1750193234000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3394171.3413551"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,10,12]]},"references-count":49,"alternative-id":["10.1145\/3394171.3413551","10.1145\/3394171"],"URL":"https:\/\/doi.org\/10.1145\/3394171.3413551","relation":{},"subject":[],"published":{"date-parts":[[2020,10,12]]},"assertion":[{"value":"2020-10-12","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}