{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,28]],"date-time":"2026-01-28T21:52:13Z","timestamp":1769637133081,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":44,"publisher":"ACM","license":[{"start":{"date-parts":[[2018,10,15]],"date-time":"2018-10-15T00:00:00Z","timestamp":1539561600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100011002","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61525204,61732010"],"award-info":[{"award-number":["61525204,61732010"]}],"id":[{"id":"10.13039\/501100011002","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2018,10,15]]},"DOI":"10.1145\/3240508.3240638","type":"proceedings-article","created":{"date-parts":[[2018,10,18]],"date-time":"2018-10-18T17:52:08Z","timestamp":1539885128000},"page":"941-949","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":10,"title":["Tracking-assisted Weakly Supervised Online Visual Object Segmentation in Unconstrained Videos"],"prefix":"10.1145","author":[{"given":"Zongpu","family":"Zhang","sequence":"first","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yang","family":"Hua","sequence":"additional","affiliation":[{"name":"Queen's University Belfast, Belfast, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tao","family":"Song","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhengui","family":"Xue","sequence":"additional","affiliation":[{"name":"Ulster University &amp; Shanghai Jiao Tong University, Belfast, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ruhui","family":"Ma","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Neil","family":"Robertson","sequence":"additional","affiliation":[{"name":"Queen's University Belfast, Belfast, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Haibing","family":"Guan","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2018,10,15]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"crossref","unstructured":"Khoreva Anna Benenson Rodrigo Hosang Jan Hein Matthias and Schiele Bernt. 2017. Simple Does It: Weakly Supervised Instance and Semantic Segmentation. In CVPR .  Khoreva Anna Benenson Rodrigo Hosang Jan Hein Matthias and Schiele Bernt. 2017. Simple Does It: Weakly Supervised Instance and Semantic Segmentation. In CVPR .","DOI":"10.1109\/CVPR.2017.181"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2006.48"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.55"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/1531326.1531376"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"crossref","unstructured":"Sergi Caelles Kevis-Kokitsi Maninis Jordi Pont-Tuset Laura Leal-Taixe Daniel Cremers and Luc Van Gool. 2017. One-Shot Video Object Segmentation. In CVPR .  Sergi Caelles Kevis-Kokitsi Maninis Jordi Pont-Tuset Laura Leal-Taixe Daniel Cremers and Luc Van Gool. 2017. One-Shot Video Object Segmentation. In CVPR .","DOI":"10.1109\/CVPR.2017.565"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2013.267"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.312"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"crossref","unstructured":"Jingchun Cheng Yi-Hsuan Tsai Shengjin Wang and Ming-Hsuan Yang. 2017. SegFlow: Joint Learning for Video Object Segmentation and Optical Flow. In ICCV .  Jingchun Cheng Yi-Hsuan Tsai Shengjin Wang and Ming-Hsuan Yang. 2017. SegFlow: Joint Learning for Video Object Segmentation and Optical Flow. In ICCV .","DOI":"10.1109\/ICCV.2017.81"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.191"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"crossref","unstructured":"Martin Danelljan Goutam Bhat Fahad Shahbaz Khan and Michael Felsberg. 2017. ECO: Efficient Convolution Operators for Tracking. CVPR .  Martin Danelljan Goutam Bhat Fahad Shahbaz Khan and Michael Felsberg. 2017. ECO: Efficient Convolution Operators for Tracking. CVPR .","DOI":"10.1109\/CVPR.2017.733"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.490"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"crossref","unstructured":"Martin Danelljan Andreas Robinson Fahad Shahbaz Khan and Michael Felsberg. 2016. Beyond correlation filters: Learning continuous convolution operators for visual tracking. In ECCV .  Martin Danelljan Andreas Robinson Fahad Shahbaz Khan and Michael Felsberg. 2016. Beyond correlation filters: Learning continuous convolution operators for visual tracking. In ECCV .","DOI":"10.1007\/978-3-319-46454-1_29"},{"key":"e_1_3_2_1_13_1","unstructured":"Suyog Dutt Jain Bo Xiong and Kristen Grauman. 2017. FusionSeg: Learning to Combine Motion and Appearance for Fully Automatic Segmentation of Generic Objects in Videos. In CVPR .  Suyog Dutt Jain Bo Xiong and Kristen Grauman. 2017. FusionSeg: Learning to Combine Motion and Appearance for Fully Automatic Segmentation of Generic Objects in Videos. In CVPR ."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/2964284.2967279"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/2816795.2818105"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"crossref","unstructured":"Andreas Geiger Philip Lenz and Raquel Urtasun. 2012. Are we ready for autonomous driving? the kitti vision benchmark suite. In CVPR .   Andreas Geiger Philip Lenz and Raquel Urtasun. 2012. Are we ready for autonomous driving? the kitti vision benchmark suite. In CVPR .","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"crossref","unstructured":"Matthias Grundmann Vivek Kwatra Mei Han and Irfan Essa. 2010. Efficient hierarchical graph-based video segmentation. In CVPR .  Matthias Grundmann Vivek Kwatra Mei Han and Irfan Essa. 2010. Efficient hierarchical graph-based video segmentation. In CVPR .","DOI":"10.1109\/CVPR.2010.5539893"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2015.2509974"},{"key":"e_1_3_2_1_19_1","unstructured":"Kaiming He Xiangyu Zhang Shaoqing Ren and Jian Sun. 2016. Deep residual learning for image recognition. In CVPR .  Kaiming He Xiangyu Zhang Shaoqing Ren and Jian Sun. 2016. Deep residual learning for image recognition. In CVPR ."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.354"},{"key":"e_1_3_2_1_21_1","unstructured":"Eddy Ilg Nikolaus Mayer Tonmoy Saikia Margret Keuper Alexey Dosovitskiy and Thomas Brox. Flownet 2.0: Evolution of optical flow estimation with deep networks. In CVPR .  Eddy Ilg Nikolaus Mayer Tonmoy Saikia Margret Keuper Alexey Dosovitskiy and Thomas Brox. Flownet 2.0: Evolution of optical flow estimation with deep networks. In CVPR ."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"crossref","unstructured":"Varun Jampani Raghudeep Gadde and Peter V Gehler. 2017. Video propagation networks. In CVPR .  Varun Jampani Raghudeep Gadde and Peter V Gehler. 2017. Video propagation networks. In CVPR .","DOI":"10.1109\/CVPR.2017.336"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2014.2368273"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.374"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/76.988659"},{"key":"e_1_3_2_1_26_1","volume-title":"The Visual Object Tracking VOT2017 Challenge Results. In ICCV Workshop on Visual Object Tracking Challenge .","author":"Kristan Matej"},{"key":"e_1_3_2_1_27_1","volume-title":"The Visual Object Tracking VOT2016 Challenge Results. In ECCV Workshop on Visual Object Tracking Challenge .","author":"Kristan Matej"},{"key":"e_1_3_2_1_28_1","unstructured":"Tsung-Yi Lin Michael Maire Serge Belongie James Hays Pietro Perona Deva Ramanan Piotr Doll\u00e1r and C Lawrence Zitnick. 2014. Microsoft coco: Common objects in context. In ECCV .  Tsung-Yi Lin Michael Maire Serge Belongie James Hays Pietro Perona Deva Ramanan Piotr Doll\u00e1r and C Lawrence Zitnick. 2014. Microsoft coco: Common objects in context. In ECCV ."},{"key":"e_1_3_2_1_29_1","unstructured":"Hyeonseob Nam and Bohyung Han. 2016. Learning multi-domain convolutional neural networks for visual tracking. In CVPR .  Hyeonseob Nam and Bohyung Han. 2016. Learning multi-domain convolutional neural networks for visual tracking. In CVPR ."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.223"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"crossref","unstructured":"Federico Perazzi Anna Khoreva Rodrigo Benenson Bernt Schiele and Alexander Sorkine-Hornung. 2017. Learning Video Object Segmentation From Static Images. In CVPR .  Federico Perazzi Anna Khoreva Rodrigo Benenson Bernt Schiele and Alexander Sorkine-Hornung. 2017. Learning Video Object Segmentation From Static Images. In CVPR .","DOI":"10.1109\/CVPR.2017.372"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"crossref","unstructured":"Federico Perazzi Jordi Pont-Tuset Brian McWilliams Luc Van Gool Markus Gross and Alexander Sorkine-Hornung. 2016. A Benchmark Dataset and Evaluation Methodology for Video Object Segmentation. In CVPR .  Federico Perazzi Jordi Pont-Tuset Brian McWilliams Luc Van Gool Markus Gross and Alexander Sorkine-Hornung. 2016. A Benchmark Dataset and Evaluation Methodology for Video Object Segmentation. In CVPR .","DOI":"10.1109\/CVPR.2016.85"},{"key":"e_1_3_2_1_33_1","unstructured":"X. Liu N.-G. Cho S.-W. Lee S. Fidler R. Urtasun R. Mottaghi X. Chen and A. Yuille. 2014. The role of context for object detection and semantic segmentation in the wild. In CVPR .  X. Liu N.-G. Cho S.-W. Lee S. Fidler R. Urtasun R. Mottaghi X. Chen and A. Yuille. 2014. The role of context for object detection and semantic segmentation in the wild. In CVPR ."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/1015706.1015720"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"e_1_3_2_1_36_1","unstructured":"Karen Simonyan and Andrew Zisserman. 2015. Very Deep Convolutional Networks for Large-Scale Image Recognition. (2015).  Karen Simonyan and Andrew Zisserman. 2015. Very Deep Convolutional Networks for Large-Scale Image Recognition. (2015)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2005.304"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"crossref","unstructured":"Pavel Tokmakov Karteek Alahari and Cordelia Schmid. 2017a. Learning motion patterns in videos. In CVPR .  Pavel Tokmakov Karteek Alahari and Cordelia Schmid. 2017a. Learning motion patterns in videos. In CVPR .","DOI":"10.1109\/CVPR.2017.64"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"crossref","unstructured":"Pavel Tokmakov Karteek Alahari and Cordelia Schmid. 2017b. Learning Video Object Segmentation with Visual Memory. In ICCV .  Pavel Tokmakov Karteek Alahari and Cordelia Schmid. 2017b. Learning Video Object Segmentation with Visual Memory. In ICCV .","DOI":"10.1109\/ICCV.2017.480"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"crossref","unstructured":"Yi-Hsuan Tsai Ming-Hsuan Yang and Michael J Black. 2016. Video segmentation via object flow. In CVPR .  Yi-Hsuan Tsai Ming-Hsuan Yang and Michael J Black. 2016. Video segmentation via object flow. In CVPR .","DOI":"10.1109\/CVPR.2016.423"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.444"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2014.2388226"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33783-3_45"},{"key":"e_1_3_2_1_45_1","unstructured":"JY Zhou Ee Ping Ong and Chi Chung Ko. Video object segmentation and tracking for content-based video coding. In ICME .  JY Zhou Ee Ping Ong and Chi Chung Ko. Video object segmentation and tracking for content-based video coding. In ICME ."}],"event":{"name":"MM '18: ACM Multimedia Conference","location":"Seoul Republic of Korea","acronym":"MM '18","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 26th ACM international conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3240508.3240638","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3240508.3240638","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T00:57:34Z","timestamp":1750208254000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3240508.3240638"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,10,15]]},"references-count":44,"alternative-id":["10.1145\/3240508.3240638","10.1145\/3240508"],"URL":"https:\/\/doi.org\/10.1145\/3240508.3240638","relation":{},"subject":[],"published":{"date-parts":[[2018,10,15]]},"assertion":[{"value":"2018-10-15","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}