{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T21:47:35Z","timestamp":1766267255443,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":38,"publisher":"ACM","license":[{"start":{"date-parts":[[2015,10,13]],"date-time":"2015-10-13T00:00:00Z","timestamp":1444694400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2015,10,13]]},"DOI":"10.1145\/2733373.2806226","type":"proceedings-article","created":{"date-parts":[[2016,2,26]],"date-time":"2016-02-26T19:09:21Z","timestamp":1456513761000},"page":"371-380","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":96,"title":["Temporal Localization of Fine-Grained Actions in Videos by Domain Transfer from Web Images"],"prefix":"10.1145","author":[{"given":"Chen","family":"Sun","sequence":"first","affiliation":[{"name":"University of Southern California, Los Angeles, CA, USA"}]},{"given":"Sanketh","family":"Shetty","sequence":"additional","affiliation":[{"name":"Google, Inc., Mountain View, CA, USA"}]},{"given":"Rahul","family":"Sukthankar","sequence":"additional","affiliation":[{"name":"Google, Inc., Mountain View, CA, USA"}]},{"given":"Ram","family":"Nevatia","sequence":"additional","affiliation":[{"name":"University of Southern California, Los Angeles, CA, USA"}]}],"member":"320","published-online":{"date-parts":[[2015,10,13]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/2578726.2578729"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.412"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298878"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638947"},{"key":"e_1_3_2_1_6_1","volume-title":"NIPS","author":"Graves A.","year":"2008","unstructured":"A. Graves and J. Schmidhuber . Offline handwriting recognition with multidimensional recurrent neural networks . In NIPS , 2008 . A. Graves and J. Schmidhuber. Offline handwriting recognition with multidimensional recurrent neural networks. In NIPS, 2008."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/2461466.2461482"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.100"},{"key":"e_1_3_2_1_10_1","unstructured":"Y.-G. Jiang J. Liu A. Roshan Zamir G. Toderici I. Laptev M. Shah and R. Sukthankar. THUMOS challenge: Action recognition with a large number of classes. http:\/\/crcv.ucf.edu\/THUMOS14\/ 2014.  Y.-G. Jiang J. Liu A. Roshan Zamir G. Toderici I. Laptev M. Shah and R. Sukthankar. THUMOS challenge: Action recognition with a large number of classes. http:\/\/crcv.ucf.edu\/THUMOS14\/ 2014."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.223"},{"key":"e_1_3_2_1_12_1","volume-title":"TACL","author":"Kiros R.","year":"2015","unstructured":"R. Kiros , R. Salakhutdinov , and R. S. Zemel . Unifying visual-semantic embeddings with multimodal neural language models . TACL , 2015 . R. Kiros, R. Salakhutdinov, and R. S. Zemel. Unifying visual-semantic embeddings with multimodal neural language models. TACL, 2015."},{"key":"e_1_3_2_1_13_1","volume-title":"NIPS","author":"Krizhevsky A.","year":"2012","unstructured":"A. Krizhevsky , I. Sutskever , and G. E. Hinton . Imagenet classification with deep convolutional neural networks . In NIPS , 2012 . A. Krizhevsky, I. Sutskever, and G. E. Hinton. Imagenet classification with deep convolutional neural networks. In NIPS, 2012."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2011.6126543"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1023\/B:VISI.0000029664.99615.94"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.228"},{"key":"e_1_3_2_1_17_1","volume-title":"TRECVID 2013 -- an overview of the goals, tasks, data, evaluation mechanisms and metrics. In TRECVID","author":"Over P.","year":"2013","unstructured":"P. Over , G. Awad , M. Michel , J. Fiscus , G. Sanders , W. Kraaij , A. F. Smeaton , and G. Queenot . TRECVID 2013 -- an overview of the goals, tasks, data, evaluation mechanisms and metrics. In TRECVID , 2013 . P. Over, G. Awad, M. Michel, J. Fiscus, G. Sanders, W. Kraaij, A. F. Smeaton, and G. Queenot. TRECVID 2013 -- an overview of the goals, tasks, data, evaluation mechanisms and metrics. In TRECVID, 2013."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2007.383266"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10599-4_35"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6247801"},{"key":"e_1_3_2_1_21_1","volume-title":"ImageNet Large Scale Visual Recognition Challenge","author":"Russakovsky O.","year":"2014","unstructured":"O. Russakovsky , J. Deng , H. Su , J. Krause , S. Satheesh , S. Ma , Z. Huang , A. Karpathy , A. Khosla , M. Bernstein , A. C. Berg , and L. Fei-Fei . ImageNet Large Scale Visual Recognition Challenge , 2014 . O. Russakovsky, J. Deng, H. Su, J. Krause, S. Satheesh, S. Ma, Z. Huang, A. Karpathy, A. Khosla, M. Bernstein, A. C. Berg, and L. Fei-Fei. ImageNet Large Scale Visual Recognition Challenge, 2014."},{"key":"e_1_3_2_1_22_1","volume-title":"Long short-term memory based recurrent neural network architectures for large vocabulary speech recognition. CoRR, abs\/1402.1128","author":"Sak H.","year":"2014","unstructured":"H. Sak , A. Senior , and F. Beaufays . Long short-term memory based recurrent neural network architectures for large vocabulary speech recognition. CoRR, abs\/1402.1128 , 2014 . H. Sak, A. Senior, and F. Beaufays. Long short-term memory based recurrent neural network architectures for large vocabulary speech recognition. CoRR, abs\/1402.1128, 2014."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2004.747"},{"key":"e_1_3_2_1_24_1","volume-title":"NIPS","author":"Simonyan K.","year":"2014","unstructured":"K. Simonyan and A. Zisserman . Two-stream convolutional networks for action recognition in videos . In NIPS , 2014 . K. Simonyan and A. Zisserman. Two-stream convolutional networks for action recognition in videos. In NIPS, 2014."},{"key":"e_1_3_2_1_25_1","unstructured":"K. Soomro A. R. Zamir and M. Shah. UCF101: A dataset of 101 human actions classes from videos in the wild. CRCV-TR-12-01.  K. Soomro A. R. Zamir and M. Shah. UCF101: A dataset of 101 human actions classes from videos in the wild. CRCV-TR-12-01."},{"key":"e_1_3_2_1_26_1","volume-title":"ICML","author":"Srivastava N.","year":"2015","unstructured":"N. Srivastava , E. Mansimov , and R. Salakhutdinov . Unsupervised learning of video representations using LSTMs . In ICML , 2015 . N. Srivastava, E. Mansimov, and R. Salakhutdinov. Unsupervised learning of video representations using LSTMs. In ICML, 2015."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/2578726.2578757"},{"key":"e_1_3_2_1_28_1","volume-title":"NIPS","author":"Sutskever I.","year":"2014","unstructured":"I. Sutskever , O. Vinyals , and Q. V. Le . Sequence to sequence learning with neural networks . In NIPS , 2014 . I. Sutskever, O. Vinyals, and Q. V. Le. Sequence to sequence learning with neural networks. In NIPS, 2014."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2013.341"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-012-0594-8"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.441"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_37"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1990.2.4.490"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995402"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/2457450.2457456"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2012.67"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299101"}],"event":{"name":"MM '15: ACM Multimedia Conference","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Brisbane Australia","acronym":"MM '15"},"container-title":["Proceedings of the 23rd ACM international conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2733373.2806226","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2733373.2806226","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T06:16:47Z","timestamp":1750227407000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2733373.2806226"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,10,13]]},"references-count":38,"alternative-id":["10.1145\/2733373.2806226","10.1145\/2733373"],"URL":"https:\/\/doi.org\/10.1145\/2733373.2806226","relation":{},"subject":[],"published":{"date-parts":[[2015,10,13]]},"assertion":[{"value":"2015-10-13","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}