{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:27:00Z","timestamp":1750220820472,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":34,"publisher":"ACM","license":[{"start":{"date-parts":[[2019,10,15]],"date-time":"2019-10-15T00:00:00Z","timestamp":1571097600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Australia Research Council","award":["DE190100539"],"award-info":[{"award-number":["DE190100539"]}]},{"name":"National Program on Key Basic Research Project","award":["2015CB352300"],"award-info":[{"award-number":["2015CB352300"]}]},{"name":"Shenzhen Nanshan District Ling-Hang Team Grant","award":["LHTD20170005"],"award-info":[{"award-number":["LHTD20170005"]}]},{"name":"National Natural Science Foundation of China Major Project","award":["U1611461"],"award-info":[{"award-number":["U1611461"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2019,10,15]]},"DOI":"10.1145\/3343031.3351094","type":"proceedings-article","created":{"date-parts":[[2019,10,21]],"date-time":"2019-10-21T16:32:26Z","timestamp":1571675546000},"page":"1543-1551","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Watch, Reason and Code"],"prefix":"10.1145","author":[{"given":"Xuguang","family":"Duan","sequence":"first","affiliation":[{"name":"Tsinghua University, Beijing, China"}]},{"given":"Qi","family":"Wu","sequence":"additional","affiliation":[{"name":"The University of Adelaide, Adelaide, Australia"}]},{"given":"Chuang","family":"Gan","sequence":"additional","affiliation":[{"name":"MIT-Watson AI Lab, Watson, China"}]},{"given":"Yiwei","family":"Zhang","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}]},{"given":"Wenbing","family":"Huang","sequence":"additional","affiliation":[{"name":"Tencent AI Lab, Shenzhen, China"}]},{"given":"Anton","family":"van den Hengel","sequence":"additional","affiliation":[{"name":"The University of Adelaide, Adelaide, Australia"}]},{"given":"Wenwu","family":"Zhu","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2019,10,15]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2008.284"},{"volume-title":"International Conference on Learning Representations (ICLR). OpenReviews. net.","year":"2017","author":"Balog M","key":"e_1_3_2_1_2_1"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/MC.2014.344"},{"volume-title":"Leveraging Grammar and Reinforcement Learning for Neural Program Synthesis. International Conference on Learning Representations (ICLR)","year":"2018","author":"Bunel Rudy","key":"e_1_3_2_1_4_1"},{"key":"e_1_3_2_1_5_1","unstructured":"Jacob Devlin Rudy R Bunel Rishabh Singh Matthew Hausknecht and Pushmeet Kohli. 2017. Neural Program Meta-Induction. In Advances in Neural Information Processing Systems (NIPS). 2080--2088.  Jacob Devlin Rudy R Bunel Rishabh Singh Matthew Hausknecht and Pushmeet Kohli. 2017. Neural Program Meta-Induction. In Advances in Neural Information Processing Systems (NIPS). 2080--2088."},{"key":"e_1_3_2_1_6_1","unstructured":"Xuguang Duan Wenbing Huang Chuang Gan Jingdong Wang Wenwu Zhu and Junzhou Huang. 2018. Weakly supervised dense event captioning in videos. In NIPS. 3059--3069.  Xuguang Duan Wenbing Huang Chuang Gan Jingdong Wang Wenwu Zhu and Junzhou Huang. 2018. Weakly supervised dense event captioning in videos. In NIPS. 3059--3069."},{"volume-title":"Learning to Infer Graphics Programs from Hand-Drawn Images. arXiv preprint arXiv:1707.09627","year":"2017","author":"Ellis Kevin","key":"e_1_3_2_1_7_1"},{"volume-title":"Stylenet: Generating attractive visual captions with styles. In CVPR . 3137--3146.","year":"2017","author":"Gan Chuang","key":"e_1_3_2_1_8_1"},{"key":"e_1_3_2_1_9_1","unstructured":"Chuang Gan Boqing Gong Kun Liu Hao Su and Leonidas J Guibas. 2018. Geometry guided convolutional neural networks for self-supervised video representation learning. In CVPR. 5589--5597.  Chuang Gan Boqing Gong Kun Liu Hao Su and Leonidas J Guibas. 2018. Geometry guided convolutional neural networks for self-supervised video representation learning. In CVPR. 5589--5597."},{"key":"e_1_3_2_1_10_1","unstructured":"Chuang Gan Naiyan Wang Yi Yang Dit-Yan Yeung and Alex G Hauptmann. 2015. DevNet: A deep event network for multimedia event detection and evidence recounting. In CVPR . 2568--2577.  Chuang Gan Naiyan Wang Yi Yang Dit-Yan Yeung and Alex G Hauptmann. 2015. DevNet: A deep event network for multimedia event detection and evidence recounting. In CVPR . 2568--2577."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"crossref","unstructured":"Zhe Gan Chuang Gan Xiaodong He Yunchen Pu Kenneth Tran Jianfeng Gao Lawrence Carin and Li Deng. 2017b. Semantic compositional networks for visual captioning. In CVPR . 5630--5639.  Zhe Gan Chuang Gan Xiaodong He Yunchen Pu Kenneth Tran Jianfeng Gao Lawrence Carin and Li Deng. 2017b. Semantic compositional networks for visual captioning. In CVPR . 5630--5639.","DOI":"10.1109\/CVPR.2017.127"},{"volume-title":"Neural turing machines. arXiv preprint arXiv:1410.5401","year":"2014","author":"Graves Alex","key":"e_1_3_2_1_12_1"},{"volume-title":"Long short-term memory. Neural computation","year":"1997","author":"Hochreiter Sepp","key":"e_1_3_2_1_13_1"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.450"},{"key":"e_1_3_2_1_15_1","unstructured":"Armand Joulin and Tomas Mikolov. 2015. Inferring algorithmic patterns with stack-augmented recurrent nets. In Advances in neural information processing systems (NIPS). 190--198.  Armand Joulin and Tomas Mikolov. 2015. Inferring algorithmic patterns with stack-augmented recurrent nets. In Advances in neural information processing systems (NIPS). 190--198."},{"volume-title":"Neural gpus learn algorithms. arXiv preprint arXiv:1511.08228","year":"2015","author":"Kaiser \u0141ukasz","key":"e_1_3_2_1_16_1"},{"volume-title":"ViZDoom: A Doom-based AI Research Platform for Visual Reinforcement Learning. In IEEE Conference on Computational Intelligence and Games . IEEE","year":"2016","author":"Kempka Micha\u0142","key":"e_1_3_2_1_17_1"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.83"},{"volume-title":"Neural random-access machines. arXiv preprint arXiv:1511.06392","year":"2015","author":"Kurach Karol","key":"e_1_3_2_1_19_1"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00013"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.111"},{"volume-title":"International Conference on Learning Representations (ICLR)","year":"2017","author":"Parisotto Emilio","key":"e_1_3_2_1_22_1"},{"volume-title":"Karel the robot: a gentle introduction to the art of programming","author":"Pattis Richard E","key":"e_1_3_2_1_23_1"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2011.6126349"},{"key":"e_1_3_2_1_25_1","unstructured":"Adam Santoro David Raposo David G Barrett Mateusz Malinowski Razvan Pascanu Peter Battaglia and Tim Lillicrap. 2017. A simple neural network module for relational reasoning. In Advances in neural information processing systems (NIPS). 4967--4976.  Adam Santoro David Raposo David G Barrett Mateusz Malinowski Razvan Pascanu Peter Battaglia and Tim Lillicrap. 2017. A simple neural network module for relational reasoning. In Advances in neural information processing systems (NIPS). 4967--4976."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.548"},{"key":"e_1_3_2_1_27_1","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Two-stream convolutional networks for action recognition in videos. In Advances in neural information processing systems (NIPS). 568--576.  Karen Simonyan and Andrew Zisserman. 2014. Two-stream convolutional networks for action recognition in videos. In Advances in neural information processing systems (NIPS). 568--576."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/2983323.2983349"},{"volume-title":"International Conference on Machine Learning (ICML). 4797--4806","year":"2018","author":"Sun Shao-Hua","key":"e_1_3_2_1_29_1"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.515"},{"volume-title":"Deliberation Networks: Sequence Generation Beyond One-Pass Decoding. In Advances in Neural Information Processing Systems (NIPS)","year":"2017","author":"Xia Yingce","key":"e_1_3_2_1_31_1"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.571"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.496"},{"volume-title":"To Find Where You Talk: Temporal Sentence Localization in Video with Attention Based Location Regression. arXiv preprint arXiv:1804.07014","year":"2018","author":"Yuan Yitian","key":"e_1_3_2_1_34_1"}],"event":{"name":"MM '19: The 27th ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Nice France","acronym":"MM '19"},"container-title":["Proceedings of the 27th ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3343031.3351094","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3343031.3351094","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T23:13:12Z","timestamp":1750201992000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3343031.3351094"}},"subtitle":["Learning to Represent Videos Using Program"],"short-title":[],"issued":{"date-parts":[[2019,10,15]]},"references-count":34,"alternative-id":["10.1145\/3343031.3351094","10.1145\/3343031"],"URL":"https:\/\/doi.org\/10.1145\/3343031.3351094","relation":{},"subject":[],"published":{"date-parts":[[2019,10,15]]},"assertion":[{"value":"2019-10-15","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}