{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:34:58Z","timestamp":1750221298662,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":15,"publisher":"ACM","license":[{"start":{"date-parts":[[2018,10,15]],"date-time":"2018-10-15T00:00:00Z","timestamp":1539561600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Next-Generation Information Computing Development Program through the National Research Foundation of Korea(NRF) funded by the Ministry of Science, ICT (NRF-2017M3C4A7069370)","award":["NRF-2017M3C4A7069370"],"award-info":[{"award-number":["NRF-2017M3C4A7069370"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2018,10,15]]},"DOI":"10.1145\/3265987.3265988","type":"proceedings-article","created":{"date-parts":[[2018,10,17]],"date-time":"2018-10-17T12:18:31Z","timestamp":1539778711000},"page":"27-30","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Multi-task Joint Learning for Videos in the Wild"],"prefix":"10.1145","author":[{"given":"Yong Won","family":"Hong","sequence":"first","affiliation":[{"name":"Yonsei University, Seoul, Rebublic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hoseong","family":"Kim","sequence":"additional","affiliation":[{"name":"Yonsei University, Seoul, Rebublic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hyeran","family":"Byun","sequence":"additional","affiliation":[{"name":"Yonsei University, Seoul, Rebublic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2018,10,15]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"YouTube- 8M: A Large-Scale Video Classification Benchmark. CoRR abs\/1609.08675","author":"Abu-El-Haija Sami","year":"2016","unstructured":"Sami Abu-El-Haija , Nisarg Kothari , Joonseok Lee , Paul Natsev , George Toderici , Balakrishnan Varadarajan , and Sudheendra Vijayanarasimhan . 2016. YouTube- 8M: A Large-Scale Video Classification Benchmark. CoRR abs\/1609.08675 ( 2016 ). arXiv:1609.08675 http:\/\/arxiv.org\/abs\/1609.08675 Sami Abu-El-Haija, Nisarg Kothari, Joonseok Lee, Paul Natsev, George Toderici, Balakrishnan Varadarajan, and Sudheendra Vijayanarasimhan. 2016. YouTube- 8M: A Large-Scale Video Classification Benchmark. CoRR abs\/1609.08675 (2016). arXiv:1609.08675 http:\/\/arxiv.org\/abs\/1609.08675"},{"key":"e_1_3_2_1_2_1","volume-title":"Survey of Visual Question Answering: Datasets and Techniques. CoRR abs\/1705.03865","author":"Gupta Akshay Kumar","year":"2017","unstructured":"Akshay Kumar Gupta . 2017. Survey of Visual Question Answering: Datasets and Techniques. CoRR abs\/1705.03865 ( 2017 ). arXiv:1705.03865 http:\/\/arxiv.org\/abs\/1705.03865 Akshay Kumar Gupta. 2017. Survey of Visual Question Answering: Datasets and Techniques. CoRR abs\/1705.03865 (2017). arXiv:1705.03865 http:\/\/arxiv.org\/abs\/1705.03865"},{"key":"e_1_3_2_1_3_1","volume-title":"Deep Residual Learning for Image Recognition. CoRR abs\/1512.03385","author":"He Kaiming","year":"2015","unstructured":"Kaiming He , Xiangyu Zhang , Shaoqing Ren , and Jian Sun . 2015. Deep Residual Learning for Image Recognition. CoRR abs\/1512.03385 ( 2015 ). arXiv:1512.03385 http:\/\/arxiv.org\/abs\/1512.03385 Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun. 2015. Deep Residual Learning for Image Recognition. CoRR abs\/1512.03385 (2015). arXiv:1512.03385 http:\/\/arxiv.org\/abs\/1512.03385"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.223"},{"key":"e_1_3_2_1_5_1","volume-title":"The Kinetics Human Action Video Dataset. CoRR abs\/1705.06950","author":"Kay Will","year":"2017","unstructured":"Will Kay , Joao Carreira , Karen Simonyan , Brian Zhang , Chloe Hillier , Sudheendra Vijayanarasimhan , Fabio Viola , Tim Green , Trevor Back , Paul Natsev , Mustafa Suleyman , and Andrew Zisserman . 2017. The Kinetics Human Action Video Dataset. CoRR abs\/1705.06950 ( 2017 ). arXiv:1705.06950 http:\/\/arxiv.org\/abs\/1705.06950 Will Kay, Joao Carreira, Karen Simonyan, Brian Zhang, Chloe Hillier, Sudheendra Vijayanarasimhan, Fabio Viola, Tim Green, Trevor Back, Paul Natsev, Mustafa Suleyman, and Andrew Zisserman. 2017. The Kinetics Human Action Video Dataset. CoRR abs\/1705.06950 (2017). arXiv:1705.06950 http:\/\/arxiv.org\/abs\/1705.06950"},{"volume-title":"Advances in Neural Information Processing Systems 25","author":"Krizhevsky Alex","key":"e_1_3_2_1_6_1","unstructured":"Alex Krizhevsky , Ilya Sutskever , and Geoffrey E Hinton . 2012. ImageNet Classification with Deep Convolutional Neural Networks . In Advances in Neural Information Processing Systems 25 , F. Pereira, C. J. C. Burges, L. Bottou, and K. Q.Weinberger (Eds.). Curran Associates, Inc. , 1097--1105. http:\/\/papers.nips.cc\/paper\/ 4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf Alex Krizhevsky, Ilya Sutskever, and Geoffrey E Hinton. 2012. ImageNet Classification with Deep Convolutional Neural Networks. In Advances in Neural Information Processing Systems 25, F. Pereira, C. J. C. Burges, L. Bottou, and K. Q.Weinberger (Eds.). Curran Associates, Inc., 1097--1105. http:\/\/papers.nips.cc\/paper\/ 4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2011.6126543"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"e_1_3_2_1_9_1","volume-title":"Two-Stream Convolutional Networks for Action Recognition in Videos. CoRR abs\/1406.2199","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman . 2014. Two-Stream Convolutional Networks for Action Recognition in Videos. CoRR abs\/1406.2199 ( 2014 ). arXiv:1406.2199 http:\/\/arxiv.org\/abs\/1406.2199 Karen Simonyan and Andrew Zisserman. 2014. Two-Stream Convolutional Networks for Action Recognition in Videos. CoRR abs\/1406.2199 (2014). arXiv:1406.2199 http:\/\/arxiv.org\/abs\/1406.2199"},{"key":"e_1_3_2_1_10_1","volume-title":"Amir Roshan Zamir, and Mubarak Shah","author":"Soomro Khurram","year":"2012","unstructured":"Khurram Soomro , Amir Roshan Zamir, and Mubarak Shah . 2012 . UCF101: A Dataset of 101 Human Actions Classes From Videos in The Wild. CoRRabs\/ 1212.0402 (2012). arXiv:1212.0402 http:\/\/arxiv.org\/abs\/1212.0402 Khurram Soomro, Amir Roshan Zamir, and Mubarak Shah. 2012. UCF101: A Dataset of 101 Human Actions Classes From Videos in The Wild. CoRRabs\/1212.0402 (2012). arXiv:1212.0402 http:\/\/arxiv.org\/abs\/1212.0402"},{"key":"e_1_3_2_1_11_1","volume-title":"Going Deeper with Convolutions. CoRR abs\/1409.4842","author":"Szegedy Christian","year":"2014","unstructured":"Christian Szegedy , Wei Liu , Yangqing Jia , Pierre Sermanet , Scott E. Reed , Dragomir Anguelov , Dumitru Erhan , Vincent Vanhoucke , and Andrew Rabinovich . 2014. Going Deeper with Convolutions. CoRR abs\/1409.4842 ( 2014 ). arXiv:1409.4842 http:\/\/arxiv.org\/abs\/1409.4842 Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott E. Reed, Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, and Andrew Rabinovich. 2014. Going Deeper with Convolutions. CoRR abs\/1409.4842 (2014). arXiv:1409.4842 http:\/\/arxiv.org\/abs\/1409.4842"},{"key":"e_1_3_2_1_12_1","volume-title":"C3D: Generic Features for Video Analysis. CoRR abs\/1412.0767","author":"Tran Du","year":"2014","unstructured":"Du Tran , Lubomir D. Bourdev , Rob Fergus , Lorenzo Torresani , and Manohar Paluri . 2014. C3D: Generic Features for Video Analysis. CoRR abs\/1412.0767 ( 2014 ). arXiv:1412.0767 http:\/\/arxiv.org\/abs\/1412.0767 Du Tran, Lubomir D. Bourdev, Rob Fergus, Lorenzo Torresani, and Manohar Paluri. 2014. C3D: Generic Features for Video Analysis. CoRR abs\/1412.0767 (2014). arXiv:1412.0767 http:\/\/arxiv.org\/abs\/1412.0767"},{"key":"e_1_3_2_1_13_1","volume-title":"A Closer Look at Spatiotemporal Convolutions for Action Recognition. CoRR abs\/1711.11248","author":"Tran Du","year":"2017","unstructured":"Du Tran , Heng Wang , Lorenzo Torresani , Jamie Ray , Yann LeCun , and Manohar Paluri . 2017. A Closer Look at Spatiotemporal Convolutions for Action Recognition. CoRR abs\/1711.11248 ( 2017 ). arXiv:1711.11248 http:\/\/arxiv.org\/abs\/1711.11248 Du Tran, Heng Wang, Lorenzo Torresani, Jamie Ray, Yann LeCun, and Manohar Paluri. 2017. A Closer Look at Spatiotemporal Convolutions for Action Recognition. CoRR abs\/1711.11248 (2017). arXiv:1711.11248 http:\/\/arxiv.org\/abs\/1711.11248"},{"key":"e_1_3_2_1_14_1","volume-title":"Show and Tell: A Neural Image Caption Generator. CoRR abs\/1411.4555","author":"Vinyals Oriol","year":"2014","unstructured":"Oriol Vinyals , Alexander Toshev , Samy Bengio , and Dumitru Erhan . 2014. Show and Tell: A Neural Image Caption Generator. CoRR abs\/1411.4555 ( 2014 ). arXiv:1411.4555 http:\/\/arxiv.org\/abs\/1411.4555 Oriol Vinyals, Alexander Toshev, Samy Bengio, and Dumitru Erhan. 2014. Show and Tell: A Neural Image Caption Generator. CoRR abs\/1411.4555 (2014). arXiv:1411.4555 http:\/\/arxiv.org\/abs\/1411.4555"},{"key":"e_1_3_2_1_15_1","volume-title":"Nonlocal Neural Networks. CoRR abs\/1711.07971","author":"Wang Xiaolong","year":"2017","unstructured":"Xiaolong Wang , Ross B. Girshick , Abhinav Gupta , and Kaiming He. 2017. Nonlocal Neural Networks. CoRR abs\/1711.07971 ( 2017 ). arXiv:1711.07971 http:\/\/arxiv.org\/abs\/1711.07971 Xiaolong Wang, Ross B. Girshick, Abhinav Gupta, and Kaiming He. 2017. Nonlocal Neural Networks. CoRR abs\/1711.07971 (2017). arXiv:1711.07971 http:\/\/arxiv.org\/abs\/1711.07971"}],"event":{"name":"MM '18: ACM Multimedia Conference","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Seoul Republic of Korea","acronym":"MM '18"},"container-title":["Proceedings of the 1st Workshop and Challenge on Comprehensive Video Understanding in the Wild"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3265987.3265988","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3265987.3265988","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T02:13:15Z","timestamp":1750212795000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3265987.3265988"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,10,15]]},"references-count":15,"alternative-id":["10.1145\/3265987.3265988","10.1145\/3265987"],"URL":"https:\/\/doi.org\/10.1145\/3265987.3265988","relation":{},"subject":[],"published":{"date-parts":[[2018,10,15]]},"assertion":[{"value":"2018-10-15","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}