{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:27:45Z","timestamp":1750220865481,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":66,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,3,17]],"date-time":"2020-03-17T00:00:00Z","timestamp":1584403200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,3,17]]},"DOI":"10.1145\/3377325.3377487","type":"proceedings-article","created":{"date-parts":[[2020,3,4]],"date-time":"2020-03-04T23:14:49Z","timestamp":1583363689000},"page":"335-346","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Generating need-adapted multimodal fragments"],"prefix":"10.1145","author":[{"given":"Gaurav","family":"Verma","sequence":"first","affiliation":[{"name":"Adobe Research, Bangalore, India"}]},{"given":"Suryateja","family":"BV","sequence":"additional","affiliation":[{"name":"IIT Kanpur, Kanpur, India"}]},{"given":"Samagra","family":"Sharma","sequence":"additional","affiliation":[{"name":"IIT Roorkee, Roorkee, India"}]},{"given":"Balaji Vasan","family":"Srinivasan","sequence":"additional","affiliation":[{"name":"Adobe Research, Bangalore, India"}]}],"member":"320","published-online":{"date-parts":[[2020,3,17]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Contextual String Embeddings for Sequence Labeling. In COLING 2018, 27th International Conference on Computational Linguistics. 1638--1649","author":"Akbik Alan","year":"2018","unstructured":"Alan Akbik , Duncan Blythe , and Roland Vollgraf . 2018 . Contextual String Embeddings for Sequence Labeling. In COLING 2018, 27th International Conference on Computational Linguistics. 1638--1649 . Alan Akbik, Duncan Blythe, and Roland Vollgraf. 2018. Contextual String Embeddings for Sequence Labeling. In COLING 2018, 27th International Conference on Computational Linguistics. 1638--1649."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.59"},{"key":"e_1_3_2_1_3_1","volume-title":"Gerard de Melo, and Matthew Stone.","author":"Alikhani Malihe","year":"2019","unstructured":"Malihe Alikhani , Sreyasi Nag Chowdhury , Gerard de Melo, and Matthew Stone. 2019 . CITE : A Corpus of Image-Text Discourse Relations . arXiv preprint arXiv:1904.06286 (2019). Malihe Alikhani, Sreyasi Nag Chowdhury, Gerard de Melo, and Matthew Stone. 2019. CITE: A Corpus of Image-Text Discourse Relations. arXiv preprint arXiv:1904.06286 (2019)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2798607"},{"volume-title":"Text and image: A critical introduction to the visual\/verbal divide","author":"Bateman John","key":"e_1_3_2_1_5_1","unstructured":"John Bateman . 2014. Text and image: A critical introduction to the visual\/verbal divide . Routledge . John Bateman. 2014. Text and image: A critical introduction to the visual\/verbal divide. Routledge."},{"key":"e_1_3_2_1_6_1","volume-title":"Large scale gan training for high fidelity natural image synthesis. arXiv preprint arXiv:1809.11096","author":"Brock Andrew","year":"2018","unstructured":"Andrew Brock , Jeff Donahue , and Karen Simonyan . 2018. Large scale gan training for high fidelity natural image synthesis. arXiv preprint arXiv:1809.11096 ( 2018 ). Andrew Brock, Jeff Donahue, and Karen Simonyan. 2018. Large scale gan training for high fidelity natural image synthesis. arXiv preprint arXiv:1809.11096 (2018)."},{"key":"e_1_3_2_1_7_1","volume-title":"Proceedings of the 2013 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies. 673--679","author":"Brooke Julian","year":"2013","unstructured":"Julian Brooke and Graeme Hirst . 2013 . A multi-dimensional Bayesian approach to lexical style . In Proceedings of the 2013 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies. 673--679 . Julian Brooke and Graeme Hirst. 2013. A multi-dimensional Bayesian approach to lexical style. In Proceedings of the 2013 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies. 673--679."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.321"},{"key":"e_1_3_2_1_9_1","volume-title":"Combining Learned Lyrical Structures and Vocabulary for Improved Lyric Generation. arXiv preprint arXiv:1811.04651","author":"Castro Pablo Samuel","year":"2018","unstructured":"Pablo Samuel Castro and Maria Attarian . 2018. Combining Learned Lyrical Structures and Vocabulary for Improved Lyric Generation. arXiv preprint arXiv:1811.04651 ( 2018 ). Pablo Samuel Castro and Maria Attarian. 2018. Combining Learned Lyrical Structures and Vocabulary for Improved Lyric Generation. arXiv preprint arXiv:1811.04651 (2018)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3126686.3126723"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/1571941.1572102"},{"key":"e_1_3_2_1_12_1","unstructured":"Edgar Dale. 1969. Audiovisual methods in teaching. (1969).  Edgar Dale. 1969. Audiovisual methods in teaching. (1969)."},{"key":"e_1_3_2_1_13_1","volume-title":"NeurIPS Workshop on Machine Learning for Creativity and Design","author":"Dehmamy Nima","year":"2018","unstructured":"Nima Dehmamy , Luca Stornaiuolo , and Mauro Martino . 2018 . Vox2Net: From 3D Shapes to Network Sculptures . In NeurIPS Workshop on Machine Learning for Creativity and Design 2018. 1--3. Nima Dehmamy, Luca Stornaiuolo, and Mauro Martino. 2018. Vox2Net: From 3D Shapes to Network Sculptures. In NeurIPS Workshop on Machine Learning for Creativity and Design 2018. 1--3."},{"key":"e_1_3_2_1_14_1","volume-title":"GILT: Generating images from long text. arXiv preprint arXiv:1901.02404","author":"El Ori Bar","year":"2019","unstructured":"Ori Bar El , Ori Licht , and Netanel Yosephian . 2019 . GILT: Generating images from long text. arXiv preprint arXiv:1901.02404 (2019). Ori Bar El, Ori Licht, and Netanel Yosephian. 2019. GILT: Generating images from long text. arXiv preprint arXiv:1901.02404 (2019)."},{"key":"e_1_3_2_1_15_1","volume-title":"Jamie Ryan Kiros, and Sanja Fidler","author":"Faghri Fartash","year":"2017","unstructured":"Fartash Faghri , David J Fleet , Jamie Ryan Kiros, and Sanja Fidler . 2017 . Vse++: Improved visual-semantic embeddings. arXiv preprint arXiv:1707.05612 2, 7 (2017), 8. Fartash Faghri, David J Fleet, Jamie Ryan Kiros, and Sanja Fidler. 2017. Vse++: Improved visual-semantic embeddings. arXiv preprint arXiv:1707.05612 2, 7 (2017), 8."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"crossref","unstructured":"Rebecca Anne Fiebrink. 2011. Real-time human interaction with supervised learning algorithms for music composition and performance. Citeseer.  Rebecca Anne Fiebrink. 2011. Real-time human interaction with supervised learning algorithms for music composition and performance. Citeseer.","DOI":"10.1145\/1753846.1753889"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.108"},{"key":"e_1_3_2_1_18_1","volume-title":"Unsupervised visual sense disambiguation for verbs using multimodal embeddings. arXiv preprint arXiv:1603.09188","author":"Gella Spandana","year":"2016","unstructured":"Spandana Gella , Mirella Lapata , and Frank Keller . 2016. Unsupervised visual sense disambiguation for verbs using multimodal embeddings. arXiv preprint arXiv:1603.09188 ( 2016 ). Spandana Gella, Mirella Lapata, and Frank Keller. 2016. Unsupervised visual sense disambiguation for verbs using multimodal embeddings. arXiv preprint arXiv:1603.09188 (2016)."},{"key":"e_1_3_2_1_19_1","unstructured":"Katy Ilonka Gero Giannis Karamanolakis and Lydia Chilton. [n.d.]. Transfer Learning for Style-Specific Text Generation. ([n. d.]).  Katy Ilonka Gero Giannis Karamanolakis and Lydia Chilton. [n.d.]. Transfer Learning for Style-Specific Text Generation. ([n. d.])."},{"key":"e_1_3_2_1_20_1","unstructured":"Ian Goodfellow Jean Pouget-Abadie Mehdi Mirza Bing Xu David Warde-Farley Sherjil Ozair Aaron Courville and Yoshua Bengio. 2014. Generative adversarial nets. In Advances in neural information processing systems. 2672--2680.  Ian Goodfellow Jean Pouget-Abadie Mehdi Mirza Bing Xu David Warde-Farley Sherjil Ozair Aaron Courville and Yoshua Bengio. 2014. Generative adversarial nets. In Advances in neural information processing systems. 2672--2680."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01237-3_37"},{"key":"e_1_3_2_1_22_1","volume-title":"Multi-Sentence Documents. arXiv preprint arXiv:1904.07826","author":"Hessel Jack","year":"2019","unstructured":"Jack Hessel , Lillian Lee , and David Mimno . 2019. Unsupervised Discovery of Multimodal Links in Multi-Image , Multi-Sentence Documents. arXiv preprint arXiv:1904.07826 ( 2019 ). Jack Hessel, Lillian Lee, and David Mimno. 2019. Unsupervised Discovery of Multimodal Links in Multi-Image, Multi-Sentence Documents. arXiv preprint arXiv:1904.07826 (2019)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.5555\/3305381.3305545"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.123"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"crossref","unstructured":"Phillip Isola Devi Parikh Antonio Torralba and Aude Oliva. 2011. Understanding the intrinsic memorability of images. In Advances in Neural Information Processing Systems. 2429--2437.  Phillip Isola Devi Parikh Antonio Torralba and Aude Oliva. 2011. Understanding the intrinsic memorability of images. In Advances in Neural Information Processing Systems. 2429--2437.","DOI":"10.21236\/ADA554133"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.632"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/WCSP.2016.7752571"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.494"},{"key":"e_1_3_2_1_29_1","volume-title":"Practical Text Classification With Large Pre-Trained Language Models. arXiv preprint arXiv:1812.01207","author":"Kant Neel","year":"2018","unstructured":"Neel Kant , Raul Puri , Nikolai Yakovenko , and Bryan Catanzaro . 2018. Practical Text Classification With Large Pre-Trained Language Models. arXiv preprint arXiv:1812.01207 ( 2018 ). Neel Kant, Raul Puri, Nikolai Yakovenko, and Bryan Catanzaro. 2018. Practical Text Classification With Large Pre-Trained Language Models. arXiv preprint arXiv:1812.01207 (2018)."},{"key":"e_1_3_2_1_30_1","volume-title":"News Article Teaser Tweets and How to Generate Them. arXiv preprint arXiv:1807.11535","author":"Karn Sanjeev Kumar","year":"2018","unstructured":"Sanjeev Kumar Karn , Mark Buckley , Ulli Waltinger , and Hinrich Sch\u00fctze . 2018. News Article Teaser Tweets and How to Generate Them. arXiv preprint arXiv:1807.11535 ( 2018 ). Sanjeev Kumar Karn, Mark Buckley, Ulli Waltinger, and Hinrich Sch\u00fctze. 2018. News Article Teaser Tweets and How to Generate Them. arXiv preprint arXiv:1807.11535 (2018)."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33016610"},{"key":"e_1_3_2_1_32_1","volume-title":"Integrating Text and Image: Determining Multimodal Document Intent in Instagram Posts. arXiv preprint arXiv:1904.09073","author":"Kruk Julia","year":"2019","unstructured":"Julia Kruk , Jonah Lubin , Karan Sikka , Xiao Lin , Dan Jurafsky , and Ajay Divakaran . 2019. Integrating Text and Image: Determining Multimodal Document Intent in Instagram Posts. arXiv preprint arXiv:1904.09073 ( 2019 ). Julia Kruk, Jonah Lubin, Karan Sikka, Xiao Lin, Dan Jurafsky, and Ajay Divakaran. 2019. Integrating Text and Image: Determining Multimodal Document Intent in Instagram Posts. arXiv preprint arXiv:1904.09073 (2019)."},{"key":"e_1_3_2_1_33_1","unstructured":"Marie Anna Lee. [n.d.]. Relationship between words and images. ([n. d.]). http:\/\/marieannalee.com\/arts091\/lectures\/text&image.pdf.  Marie Anna Lee. [n.d.]. Relationship between words and images. ([n. d.]). http:\/\/marieannalee.com\/arts091\/lectures\/text&image.pdf."},{"key":"e_1_3_2_1_34_1","volume-title":"Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC-2018)","author":"Lefakis Leonidas","year":"2018","unstructured":"Leonidas Lefakis , Alan Akbik , and Roland Vollgraf . 2018 . FEIDEGGER: A Multimodal Corpus of Fashion Images and Descriptions in German . In Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC-2018) . Leonidas Lefakis, Alan Akbik, and Roland Vollgraf. 2018. FEIDEGGER: A Multimodal Corpus of Fashion Images and Descriptions in German. In Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC-2018)."},{"key":"e_1_3_2_1_35_1","volume-title":"Manzil Zaheer, and Barnabas Poczos.","author":"Li Chun-Liang","year":"2018","unstructured":"Chun-Liang Li , Eunsu Kang , Songwei Ge , Lingyao Zhang , Austin Dill , Manzil Zaheer, and Barnabas Poczos. 2018 . Hallucinating Point Cloud into 3D Sculptural Object . arXiv preprint arXiv:1811.05389 (2018). Chun-Liang Li, Eunsu Kang, Songwei Ge, Lingyao Zhang, Austin Dill, Manzil Zaheer, and Barnabas Poczos. 2018. Hallucinating Point Cloud into 3D Sculptural Object. arXiv preprint arXiv:1811.05389 (2018)."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01219-9_28"},{"key":"e_1_3_2_1_37_1","volume-title":"Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74--81.","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin . 2004 . Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74--81. Chin-Yew Lin. 2004. Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74--81."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.3115\/1073445.1073465"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.5555\/2898607.2898830"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2019.2927476"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.264"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"e_1_3_2_1_43_1","volume-title":"Proceedings ASCILITE 2010: 27th annual conference of the Australasian Society for Computers in Learning in Tertiary Education: curriculum, technology and transformation for an unknown future","author":"Sankey Michael","year":"2010","unstructured":"Michael Sankey , Dawn Birch , and Michael Gardiner . 2010 . Engaging students through multimodal learning environments: The journey continues . In Proceedings ASCILITE 2010: 27th annual conference of the Australasian Society for Computers in Learning in Tertiary Education: curriculum, technology and transformation for an unknown future . University of Queensland, 852--863. Michael Sankey, Dawn Birch, and Michael Gardiner. 2010. Engaging students through multimodal learning environments: The journey continues. In Proceedings ASCILITE 2010: 27th annual conference of the Australasian Society for Computers in Learning in Tertiary Education: curriculum, technology and transformation for an unknown future. University of Queensland, 852--863."},{"key":"e_1_3_2_1_44_1","first-page":"8","article-title":"Affective analysis of professional and amateur abstract paintings using statistical analysis and art theory","volume":"5","author":"Sartori Andreza","year":"2015","unstructured":"Andreza Sartori , Victoria Yanulevskaya , Almila Akdag Salah , Jasper Uijlings , Elia Bruni , and Nicu Sebe . 2015 . Affective analysis of professional and amateur abstract paintings using statistical analysis and art theory . ACM Transactions on Interactive Intelligent Systems (TiiS) 5 , 2 (2015), 8 . Andreza Sartori, Victoria Yanulevskaya, Almila Akdag Salah, Jasper Uijlings, Elia Bruni, and Nicu Sebe. 2015. Affective analysis of professional and amateur abstract paintings using statistical analysis and art theory. ACM Transactions on Interactive Intelligent Systems (TiiS) 5, 2 (2015), 8.","journal-title":"ACM Transactions on Interactive Intelligent Systems (TiiS)"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.445"},{"key":"e_1_3_2_1_46_1","volume-title":"Asian Conference on Computer Vision. Springer, 483--498","author":"Siarohin Aliaksandr","year":"2018","unstructured":"Aliaksandr Siarohin , Gloria Zen , Nicu Sebe , and Elisa Ricci . 2018 . Enhancing perceptual attributes with bayesian style generation . In Asian Conference on Computer Vision. Springer, 483--498 . Aliaksandr Siarohin, Gloria Zen, Nicu Sebe, and Elisa Ricci. 2018. Enhancing perceptual attributes with bayesian style generation. In Asian Conference on Computer Vision. Springer, 483--498."},{"key":"e_1_3_2_1_47_1","volume-title":"Lucas Van Bramer, and Ajay Divakaran","author":"Sikka Karan","year":"2019","unstructured":"Karan Sikka , Lucas Van Bramer, and Ajay Divakaran . 2019 . Deep Unified Multimodal Embeddings for Understanding both Content and Users in Social Media Networks . arXiv preprint arXiv:1905.07075 (2019). Karan Sikka, Lucas Van Bramer, and Ajay Divakaran. 2019. Deep Unified Multimodal Embeddings for Understanding both Content and Users in Social Media Networks. arXiv preprint arXiv:1905.07075 (2019)."},{"key":"e_1_3_2_1_48_1","unstructured":"Jonathan A Simon. [n.d.]. Entendrepreneur: Generating Humorous Portmanteaus using Word-Embeddings. ([n. d.]).  Jonathan A Simon. [n.d.]. Entendrepreneur: Generating Humorous Portmanteaus using Word-Embeddings. ([n. d.])."},{"key":"e_1_3_2_1_49_1","unstructured":"Xavier Snelgrove and Matthew Tesfaldet. [n.d.]. Interactive CPPNs in GLSL. ([n.d.]).  Xavier Snelgrove and Matthew Tesfaldet. [n.d.]. Interactive CPPNs in GLSL. ([n.d.])."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/TG.2018.2846639"},{"volume-title":"Two-Stream Convolutional Networks for Dynamic Texture Synthesis. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR).","author":"Tesfaldet Matthew","key":"e_1_3_2_1_51_1","unstructured":"Matthew Tesfaldet , Marcus A. Brubaker , and Konstantinos G. Derpanis . 2018 . Two-Stream Convolutional Networks for Dynamic Texture Synthesis. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR). Matthew Tesfaldet, Marcus A. Brubaker, and Konstantinos G. Derpanis. 2018. Two-Stream Convolutional Networks for Dynamic Texture Synthesis. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1111\/1467-9868.00196"},{"key":"e_1_3_2_1_53_1","volume-title":"Multimodal Review Generation for Recommender Systems. In The World Wide Web Conference. ACM","author":"Truong Quoc-Tuan","year":"2019","unstructured":"Quoc-Tuan Truong and Hady Lauw . 2019 . Multimodal Review Generation for Recommender Systems. In The World Wide Web Conference. ACM , 1864--1874. Quoc-Tuan Truong and Hady Lauw. 2019. Multimodal Review Generation for Recommender Systems. In The World Wide Web Conference. ACM, 1864--1874."},{"key":"e_1_3_2_1_54_1","volume-title":"The blessings of multiple causes. arXiv preprint arXiv:1805.06826","author":"Wang Yixin","year":"2018","unstructured":"Yixin Wang and David M Blei . 2018. The blessings of multiple causes. arXiv preprint arXiv:1805.06826 ( 2018 ). Yixin Wang and David M Blei. 2018. The blessings of multiple causes. arXiv preprint arXiv:1805.06826 (2018)."},{"key":"e_1_3_2_1_55_1","volume-title":"or\" Paper on Computational Humor Accepted Despite Making Serious Advances\". arXiv preprint arXiv:1901.03253","author":"West Robert","year":"2019","unstructured":"Robert West and Eric Horvitz . 2019. Reverse-Engineering Satire , or\" Paper on Computational Humor Accepted Despite Making Serious Advances\". arXiv preprint arXiv:1901.03253 ( 2019 ). Robert West and Eric Horvitz. 2019. Reverse-Engineering Satire, or\" Paper on Computational Humor Accepted Despite Making Serious Advances\". arXiv preprint arXiv:1901.03253 (2019)."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.136"},{"key":"e_1_3_2_1_57_1","volume-title":"Learning neural templates for text generation. arXiv preprint arXiv:1808.10122","author":"Wiseman Sam","year":"2018","unstructured":"Sam Wiseman , Stuart M Shieber , and Alexander M Rush . 2018. Learning neural templates for text generation. arXiv preprint arXiv:1808.10122 ( 2018 ). Sam Wiseman, Stuart M Shieber, and Alexander M Rush. 2018. Learning neural templates for text generation. arXiv preprint arXiv:1808.10122 (2018)."},{"key":"e_1_3_2_1_58_1","unstructured":"Mike Wu and Noah Goodman. 2018. Multimodal generative models for scalable weakly-supervised learning. In Advances in Neural Information Processing Systems. 5575--5585.  Mike Wu and Noah Goodman. 2018. Multimodal generative models for scalable weakly-supervised learning. In Advances in Neural Information Processing Systems. 5575--5585."},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00143"},{"key":"e_1_3_2_1_60_1","volume-title":"Recipeqa: A challenge dataset for multimodal comprehension of cooking recipes. arXiv preprint arXiv:1809.00812","author":"Yagcioglu Semih","year":"2018","unstructured":"Semih Yagcioglu , Aykut Erdem , Erkut Erdem , and Nazli Ikizler-Cinbis . 2018 . Recipeqa: A challenge dataset for multimodal comprehension of cooking recipes. arXiv preprint arXiv:1809.00812 (2018). Semih Yagcioglu, Aykut Erdem, Erkut Erdem, and Nazli Ikizler-Cinbis. 2018. Recipeqa: A challenge dataset for multimodal comprehension of cooking recipes. arXiv preprint arXiv:1809.00812 (2018)."},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01267-0_51"},{"key":"e_1_3_2_1_62_1","volume-title":"Equal but not the same: Understanding the implicit relationship between persuasive images and text. arXiv preprint arXiv:1807.08205","author":"Zhang Mingda","year":"2018","unstructured":"Mingda Zhang , Rebecca Hwa , and Adriana Kovashka . 2018. Equal but not the same: Understanding the implicit relationship between persuasive images and text. arXiv preprint arXiv:1807.08205 ( 2018 ). Mingda Zhang, Rebecca Hwa, and Adriana Kovashka. 2018. Equal but not the same: Understanding the implicit relationship between persuasive images and text. arXiv preprint arXiv:1807.08205 (2018)."},{"key":"e_1_3_2_1_63_1","volume-title":"Neural latent extractive document summarization. arXiv preprint arXiv:1808.07187","author":"Zhang Xingxing","year":"2018","unstructured":"Xingxing Zhang , Mirella Lapata , Furu Wei , and Ming Zhou . 2018. Neural latent extractive document summarization. arXiv preprint arXiv:1808.07187 ( 2018 ). Xingxing Zhang, Mirella Lapata, Furu Wei, and Ming Zhou. 2018. Neural latent extractive document summarization. arXiv preprint arXiv:1808.07187 (2018)."},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1145\/3240508.3240536"},{"key":"e_1_3_2_1_65_1","volume-title":"Integrating Transformer and Paraphrase Rules for Sentence Simplification. arXiv preprint arXiv:1810.11193","author":"Zhao Sanqiang","year":"2018","unstructured":"Sanqiang Zhao , Rui Meng , Daqing He , Saptono Andi , and Parmanto Bambang . 2018. Integrating Transformer and Paraphrase Rules for Sentence Simplification. arXiv preprint arXiv:1810.11193 ( 2018 ). Sanqiang Zhao, Rui Meng, Daqing He, Saptono Andi, and Parmanto Bambang. 2018. Integrating Transformer and Paraphrase Rules for Sentence Simplification. arXiv preprint arXiv:1810.11193 (2018)."},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1448"}],"event":{"name":"IUI '20: 25th International Conference on Intelligent User Interfaces","sponsor":["SIGAI ACM Special Interest Group on Artificial Intelligence","SIGCHI ACM Special Interest Group on Computer-Human Interaction"],"location":"Cagliari Italy","acronym":"IUI '20"},"container-title":["Proceedings of the 25th International Conference on Intelligent User Interfaces"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3377325.3377487","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3377325.3377487","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T23:23:42Z","timestamp":1750202622000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3377325.3377487"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,3,17]]},"references-count":66,"alternative-id":["10.1145\/3377325.3377487","10.1145\/3377325"],"URL":"https:\/\/doi.org\/10.1145\/3377325.3377487","relation":{},"subject":[],"published":{"date-parts":[[2020,3,17]]},"assertion":[{"value":"2020-03-17","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}