{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,10]],"date-time":"2026-07-10T11:24:32Z","timestamp":1783682672239,"version":"3.55.0"},"reference-count":47,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2022,8,8]],"date-time":"2022-08-08T00:00:00Z","timestamp":1659916800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,8,8]],"date-time":"2022-08-08T00:00:00Z","timestamp":1659916800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Front. Comput. Sci."],"published-print":{"date-parts":[[2023,4]]},"DOI":"10.1007\/s11704-022-1167-9","type":"journal-article","created":{"date-parts":[[2022,8,8]],"date-time":"2022-08-08T11:02:52Z","timestamp":1659956572000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Weakly supervised action anticipation without object annotations"],"prefix":"10.1007","volume":"17","author":[{"given":"Yi","family":"Zhong","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jia-Hui","family":"Pan","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Haoxin","family":"Li","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Wei-Shi","family":"Zheng","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2022,8,8]]},"reference":[{"key":"1167_CR1","doi-asserted-by":"crossref","unstructured":"Mahmud T, Hasan M, Roy-Chowdhury A K. Joint prediction of activity labels and starting times in untrimmed videos. In: Proceedings of the IEEE International Conference on Computer Vision. 2017, 5784\u20135793","DOI":"10.1109\/ICCV.2017.616"},{"key":"1167_CR2","unstructured":"Mahmud T, Billah M, Hasan M, Roy-Chowdhury A K. Captioning near-future activity sequences. 2019, arXiv preprint arXiv: 1908.00943"},{"key":"1167_CR3","doi-asserted-by":"crossref","unstructured":"Rohrbach M, Amin S, Andriluka M, Schiele B. A database for fine grained activity detection of cooking activities. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2012, 1194\u20131201","DOI":"10.1109\/CVPR.2012.6247801"},{"key":"1167_CR4","doi-asserted-by":"crossref","unstructured":"Baradel F, Neverova N, Wolf C, Mille J, Mori G. Object level visual reasoning in videos. In: Proceedings of the 15th European Conference on Computer Vision. 2018, 106\u2013122","DOI":"10.1007\/978-3-030-01261-8_7"},{"key":"1167_CR5","doi-asserted-by":"crossref","unstructured":"Ryoo M S. Human activity prediction: early recognition of ongoing activities from streaming videos. In: Proceedings of the IEEE International Conference on Computer Vision. 2011, 1036\u20131043","DOI":"10.1109\/ICCV.2011.6126349"},{"key":"1167_CR6","doi-asserted-by":"crossref","unstructured":"Xu Z, Qing L, Miao J. Activity auto-completion: predicting human activities from partial videos. In: Proceedings of the IEEE International Conference on Computer Vision. 2015, 3191\u20133199","DOI":"10.1109\/ICCV.2015.365"},{"issue":"9","key":"1167_CR7","doi-asserted-by":"publisher","first-page":"1844","DOI":"10.1109\/TPAMI.2015.2491928","volume":"38","author":"Y Kong","year":"2016","unstructured":"Kong Y, Fu Y. Max-margin action prediction machine. IEEE Transactions on Pattern Analysis and Machine Intelligence, 2016, 38(9): 1844\u20131858","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1167_CR8","doi-asserted-by":"crossref","unstructured":"Lan T, Chen T C, Savarese S. A hierarchical representation for future action prediction. In: Proceedings of the 13th European Conference on Computer Vision. 2014, 689\u2013704","DOI":"10.1007\/978-3-319-10578-9_45"},{"key":"1167_CR9","doi-asserted-by":"crossref","unstructured":"Hu J F, Zheng W S, Ma L, Wang G, Lai J. Real-time RGB-D activity prediction by soft regression. In: Proceedings of the 14th European Conference on Computer Vision. 2016, 280\u2013296","DOI":"10.1007\/978-3-319-46448-0_17"},{"key":"1167_CR10","doi-asserted-by":"crossref","unstructured":"Tran D, Bourdev L, Fergus R, Torresani L, Paluri M. Learning spatiotemporal features with 3D convolutional networks. In: Proceedings of the IEEE International Conference on Computer Vision. 2015, 4489\u20134497","DOI":"10.1109\/ICCV.2015.510"},{"key":"1167_CR11","doi-asserted-by":"crossref","unstructured":"Carreira J, Zisserman A. Quo vadis, action recognition? A new model and the kinetics dataset. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2017, 4724\u20134733","DOI":"10.1109\/CVPR.2017.502"},{"key":"1167_CR12","doi-asserted-by":"crossref","unstructured":"Kong Y, Tao Z, Fu Y. Deep sequential context networks for action prediction. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2017, 3662\u20133670","DOI":"10.1109\/CVPR.2017.390"},{"key":"1167_CR13","doi-asserted-by":"crossref","unstructured":"Qin J, Liu L, Shao L, Ni B, Chen C, Shen F, Wang Y. Binary coding for partial action analysis with limited observation ratios. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2017, 6728\u20136737","DOI":"10.1109\/CVPR.2017.712"},{"key":"1167_CR14","doi-asserted-by":"publisher","first-page":"198","DOI":"10.1016\/j.patcog.2018.08.006","volume":"85","author":"D G Lee","year":"2019","unstructured":"Lee D G, Lee S W. Prediction of partially observed human activity based on pre-trained deep representation. Pattern Recognition, 2019, 85: 198\u2013206","journal-title":"Pattern Recognition"},{"key":"1167_CR15","doi-asserted-by":"crossref","unstructured":"Zolfaghari M, Singh K, Brox T. ECO: efficient convolutional network for online video understanding. In: Proceedings of the 15th European Conference on Computer Vision. 2018, 713\u2013730","DOI":"10.1007\/978-3-030-01216-8_43"},{"key":"1167_CR16","doi-asserted-by":"crossref","unstructured":"Singh G, Saha S, Sapienza M, Torr P, Cuzzolin F. Online real-time multiple spatiotemporal action localisation and prediction. In: Proceedings of the IEEE International Conference on Computer Vision. 2017, 3657\u20133666","DOI":"10.1109\/ICCV.2017.393"},{"issue":"5","key":"1167_CR17","doi-asserted-by":"publisher","first-page":"2272","DOI":"10.1109\/TIP.2017.2751145","volume":"27","author":"S Lai","year":"2018","unstructured":"Lai S, Zheng W S, Hu J F, Zhang J. Global-local temporal saliency action prediction. IEEE Transactions on Image Processing, 2018, 27(5): 2272\u20132285","journal-title":"IEEE Transactions on Image Processing"},{"key":"1167_CR18","doi-asserted-by":"crossref","unstructured":"Kong Y, Gao S, Sun B, Fu Y. Action prediction from videos via memorizing hard-to-predict samples. In: Proceedings of the 32nd AAAI Conference on Artificial Intelligence. 2018, 7000\u20137007","DOI":"10.1609\/aaai.v32i1.12324"},{"key":"1167_CR19","doi-asserted-by":"crossref","unstructured":"Vondrick C, Pirsiavash H, Torralba A. Anticipating visual representations from unlabeled video. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2016, 98\u2013106","DOI":"10.1109\/CVPR.2016.18"},{"key":"1167_CR20","doi-asserted-by":"crossref","unstructured":"Zhong Y, Zheng W S. Unsupervised learning for forecasting action representations. In: Proceedings of the 25th IEEE International Conference on Image Processing. 2018, 1073\u20131077","DOI":"10.1109\/ICIP.2018.8451428"},{"key":"1167_CR21","doi-asserted-by":"crossref","unstructured":"Furnari A, Farinella G M. What would you expect? Anticipating egocentric actions with rolling-unrolling LSTMs and modality attention. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. 2019, 6251\u20136260","DOI":"10.1109\/ICCV.2019.00635"},{"key":"1167_CR22","doi-asserted-by":"crossref","unstructured":"Gao J, Yang Z, Nevatia R. RED: reinforced encoder-decoder networks for action anticipation. In: Proceedings of the British Machine Vision Conference. 2017","DOI":"10.5244\/C.31.92"},{"key":"1167_CR23","doi-asserted-by":"crossref","unstructured":"Zeng K H, Shen W B, Huang D A, Sun M, Niebles J C. Visual forecasting by imitating dynamics in natural sequences. In: Proceedings of the IEEE International Conference on Computer Vision. 2017, 3018\u20133027","DOI":"10.1109\/ICCV.2017.326"},{"key":"1167_CR24","unstructured":"Ng Y B, Fernando B. Forecasting future action sequences with attention: a new approach to weakly supervised action forecasting. 2019, arXiv preprint arXiv: 1912.04608"},{"key":"1167_CR25","unstructured":"Pirri F, Mauro L, Alati E, Ntouskos V, Izadpanahkakhk M, Omrani E. Anticipation and next action forecasting in video: an end-to-end model with memory. 2019, arXiv preprint arXiv: 1901.03728"},{"key":"1167_CR26","unstructured":"Snell J, Swersky K, Zemel R. Prototypical networks for few-shot learning. In: Proceedings of the 31st Conference on Neural Information Processing Systems. 2017, 4080\u20134090"},{"key":"1167_CR27","doi-asserted-by":"crossref","unstructured":"Farha Y A, Richard A, Gall J. When will you do what?-Anticipating temporal occurrences of activities. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2018, 5343\u20135352","DOI":"10.1109\/CVPR.2018.00560"},{"key":"1167_CR28","doi-asserted-by":"crossref","unstructured":"Ke Q, Fritz M, Schiele B. Time-conditioned action anticipation in one shot. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2019, 9917\u20139926","DOI":"10.1109\/CVPR.2019.01016"},{"key":"1167_CR29","doi-asserted-by":"crossref","unstructured":"Wu T Y, Chien T A, Chan C S, Hu C W, Sun M. Anticipating daily intention using on-wrist motion triggered sensing. In: Proceedings of the IEEE International Conference on Computer Vision. 2017, 48\u201356","DOI":"10.1109\/ICCV.2017.15"},{"key":"1167_CR30","doi-asserted-by":"crossref","unstructured":"Sun C, Shrivastava A, Vondrick C, Sukthankar R, Murphy K, Schmid C. Relational action forecasting. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2019, 273\u2013283","DOI":"10.1109\/CVPR.2019.00036"},{"key":"1167_CR31","doi-asserted-by":"crossref","unstructured":"Zhang J, Elhoseiny M, Cohen S, Chang W, Elgammal A. Relationship proposal networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2017, 5226\u20135234","DOI":"10.1109\/CVPR.2017.555"},{"key":"1167_CR32","doi-asserted-by":"crossref","unstructured":"Hu H, Gu J, Zhang Z, Dai J, Wei Y. Relation networks for object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2018, 3588\u20133597","DOI":"10.1109\/CVPR.2018.00378"},{"key":"1167_CR33","doi-asserted-by":"crossref","unstructured":"Gkioxari G, Girshick R, Doll\u00e1r P, He K. Detecting and recognizing human-object interactions. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2018, 8359\u20138367","DOI":"10.1109\/CVPR.2018.00872"},{"key":"1167_CR34","unstructured":"Veli\u010dkovi\u0107 P, Casanova A, Lio P, Cucurull G, Romero A, Bengio Y. Graph attention networks. In: Proceedings of the 6th International Conference on Learning Representations. 2018"},{"key":"1167_CR35","doi-asserted-by":"crossref","unstructured":"Kato K, Li Y, Gupta A. Compositional learning for human object interaction. In: Proceedings of the 15th European Conference on Computer Vision. 2018, 247\u2013264","DOI":"10.1007\/978-3-030-01264-9_15"},{"key":"1167_CR36","doi-asserted-by":"crossref","unstructured":"Wang X, Gupta A. Videos as space-time region graphs. In: Proceedings of the 15th European Conference on Computer Vision. 2018, 413\u2013431","DOI":"10.1007\/978-3-030-01228-1_25"},{"key":"1167_CR37","doi-asserted-by":"crossref","unstructured":"Qi S, Wang W, Jia B, Shen J, Zhu S C. Learning human-object interactions by graph parsing neural networks. In: Proceedings of the 15th European Conference on Computer Vision. 2018, 407\u2013423","DOI":"10.1007\/978-3-030-01240-3_25"},{"key":"1167_CR38","doi-asserted-by":"publisher","first-page":"308","DOI":"10.1016\/j.patcog.2019.06.012","volume":"95","author":"Q Zhang","year":"2019","unstructured":"Zhang Q, Chang J, Meng G, Xu S, Xiang S, Pan C. Learning graph structure via graph convolutional networks. Pattern Recognition, 2019, 95: 308\u2013318","journal-title":"Pattern Recognition"},{"key":"1167_CR39","doi-asserted-by":"crossref","unstructured":"Chao Y W, Yang J, Price B, Cohen S, Deng J. Forecasting human dynamics from static images. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2017, 3643\u20133651","DOI":"10.1109\/CVPR.2017.388"},{"key":"1167_CR40","doi-asserted-by":"crossref","unstructured":"Li C, Zhang Z, Lee W S, Lee G H. Convolutional sequence to sequence model for human dynamics. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2018, 5226\u20135234","DOI":"10.1109\/CVPR.2018.00548"},{"key":"1167_CR41","doi-asserted-by":"crossref","unstructured":"Martinez J, Black M J, Romero J. On human motion prediction using recurrent neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2017, 4674\u20134683","DOI":"10.1109\/CVPR.2017.497"},{"key":"1167_CR42","doi-asserted-by":"crossref","unstructured":"B\u00fctepage J, Black M J, Kragic D, Kjellstr\u00f6m H. Deep representation learning for human motion prediction and classification. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2017, 1591\u20131599","DOI":"10.1109\/CVPR.2017.173"},{"key":"1167_CR43","doi-asserted-by":"publisher","first-page":"532","DOI":"10.1016\/j.patcog.2017.07.003","volume":"72","author":"V Bloom","year":"2017","unstructured":"Bloom V, Argyriou V, Makris D. Linear latent low dimensional space for online early action recognition and prediction. Pattern Recognition, 2017, 72: 532\u2013547","journal-title":"Pattern Recognition"},{"key":"1167_CR44","unstructured":"Redmon J, Farhadi A. YOLOv3: an incremental improvement. 2018, arXiv preprint arXiv: 1804.02767"},{"key":"1167_CR45","doi-asserted-by":"crossref","unstructured":"Fang H S, Xie S, Tai Y W, Lu C. RMPE: regional multi-person pose estimation. In: Proceedings of the IEEE International Conference on Computer Vision. 2017, 2353\u20132362","DOI":"10.1109\/ICCV.2017.256"},{"key":"1167_CR46","unstructured":"Xiu Y, Li J, Wang H, Fang Y, Lu C. Pose flow: efficient online pose tracking. In: Proceedings of the British Machine Vision Conference. 2018"},{"key":"1167_CR47","doi-asserted-by":"crossref","unstructured":"Dalal N, Triggs B, Schmid C. Human detection using oriented histograms of flow and appearance. In: Proceedings of the 9th European Conference on Computer Vision. 2006, 428\u2013441","DOI":"10.1007\/11744047_33"}],"container-title":["Frontiers of Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11704-022-1167-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11704-022-1167-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11704-022-1167-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,19]],"date-time":"2024-05-19T20:34:54Z","timestamp":1716150894000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11704-022-1167-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,8,8]]},"references-count":47,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2023,4]]}},"alternative-id":["1167"],"URL":"https:\/\/doi.org\/10.1007\/s11704-022-1167-9","relation":{},"ISSN":["2095-2228","2095-2236"],"issn-type":[{"value":"2095-2228","type":"print"},{"value":"2095-2236","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,8,8]]},"assertion":[{"value":"10 April 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 November 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 August 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"172313"}}