{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T15:38:31Z","timestamp":1758123511844,"version":"3.41.0"},"publisher-location":"Cham","reference-count":36,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031918551","type":"print"},{"value":"9783031918568","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-91856-8_21","type":"book-chapter","created":{"date-parts":[[2025,5,23]],"date-time":"2025-05-23T10:06:01Z","timestamp":1747994761000},"page":"361-377","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["PVUW 2024 Challenge on\u00a0Complex Video Understanding: Methods and\u00a0Results"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4868-6526","authenticated-orcid":false,"given":"Henghui","family":"Ding","sequence":"first","affiliation":[]},{"given":"Chang","family":"Liu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2812-8781","authenticated-orcid":false,"given":"Yunchao","family":"Wei","sequence":"additional","affiliation":[]},{"given":"Nikhila","family":"Ravi","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1582-5684","authenticated-orcid":false,"given":"Shuting","family":"He","sequence":"additional","affiliation":[]},{"given":"Song","family":"Bai","sequence":"additional","affiliation":[]},{"given":"Philip","family":"Torr","sequence":"additional","affiliation":[]},{"given":"Deshui","family":"Miao","sequence":"additional","affiliation":[]},{"given":"Xin","family":"Li","sequence":"additional","affiliation":[]},{"given":"Zhenyu","family":"He","sequence":"additional","affiliation":[]},{"given":"Yaowei","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Ming-Hsuan","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Zhensong","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Jiangtao","family":"Yao","sequence":"additional","affiliation":[]},{"given":"Chengjing","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Ting","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Luoqi","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Xinyu","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Jing","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Kexin","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Yuting","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Licheng","family":"Jiao","sequence":"additional","affiliation":[]},{"given":"Shuyuan","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Mingqi","family":"Gao","sequence":"additional","affiliation":[]},{"given":"Jingnan","family":"Luo","sequence":"additional","affiliation":[]},{"given":"Jinyu","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Jungong","family":"Han","sequence":"additional","affiliation":[]},{"given":"Feng","family":"Zheng","sequence":"additional","affiliation":[]},{"given":"Bin","family":"Cao","sequence":"additional","affiliation":[]},{"given":"Yisi","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Xuanxu","family":"Lin","sequence":"additional","affiliation":[]},{"given":"Xingjian","family":"He","sequence":"additional","affiliation":[]},{"given":"Bo","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Jing","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Feiyu","family":"Pan","sequence":"additional","affiliation":[]},{"given":"Hao","family":"Fang","sequence":"additional","affiliation":[]},{"given":"Xiankai","family":"Lu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,5,12]]},"reference":[{"key":"21_CR1","unstructured":"Cao, B., Zhang, Y., Lin, X., He, X., Zhao, B., Liu, J.: 2nd place solution for MeViS track in CVPR 2024 PVUW workshop: motion expression guided video segmentation. arXiv preprint arXiv:2406.13939 (2024)"},{"key":"21_CR2","doi-asserted-by":"crossref","unstructured":"Cheng, B., Misra, I., Schwing, A.G., Kirillov, A., Girdhar, R.: Masked-attention mask transformer for universal image segmentation. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.00135"},{"key":"21_CR3","doi-asserted-by":"crossref","unstructured":"Cheng, H.K., Oh, S.W., Price, B., Lee, J.Y., Schwing, A.: Putting the object back into video object segmentation. arXiv preprint arXiv:2310.12982 (2023)","DOI":"10.1109\/CVPR52733.2024.00304"},{"key":"21_CR4","doi-asserted-by":"crossref","unstructured":"Cheng, H.K., Schwing, A.G.: Xmem: long-term video object segmentation with an atkinson-shiffrin memory model. In: ECCV (2022)","DOI":"10.1007\/978-3-031-19815-1_37"},{"key":"21_CR5","doi-asserted-by":"crossref","unstructured":"Ding, H., Cohen, S., Price, B., Jiang, X.: Phraseclick: toward achieving flexible interactive segmentation by phrase and click. In: ECCV (2020)","DOI":"10.1007\/978-3-030-58580-8_25"},{"key":"21_CR6","unstructured":"Ding, H., et al.: LSVOS challenge report: Large-scale complex and long video object segmentation. In: ECCV Workshop (2024)"},{"key":"21_CR7","doi-asserted-by":"crossref","unstructured":"Ding, H., Jiang, X., Shuai, B., Liu, A.Q., Wang, G.: Context contrasted feature and gated multi-scale aggregation for scene segmentation. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00254"},{"key":"21_CR8","doi-asserted-by":"crossref","unstructured":"Ding, H., Liu, C., He, S., Jiang, X., Loy, C.C.: MeViS: a large-scale benchmark for video segmentation with motion expressions. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.00254"},{"key":"21_CR9","doi-asserted-by":"crossref","unstructured":"Ding, H., Liu, C., He, S., Jiang, X., Torr, P.H., Bai, S.: MOSE: a new dataset for video object segmentation in complex scenes. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.01850"},{"key":"21_CR10","doi-asserted-by":"crossref","unstructured":"Ding, H., Liu, C., Wang, S., Jiang, X.: Vision-language transformer and query generation for referring segmentation. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.01601"},{"key":"21_CR11","doi-asserted-by":"crossref","unstructured":"Ding, H., Liu, C., Wang, S., Jiang, X.: VLT: vision-language transformer and query generation for referring segmentation. IEEE TPAMI (2023)","DOI":"10.1109\/TPAMI.2022.3217852"},{"key":"21_CR12","doi-asserted-by":"crossref","unstructured":"Fang, H., Zhang, T., Zhou, X., Zhang, X.: Learning better video query with SAM for video instance segmentation. IEEE TCSVT (2024)","DOI":"10.1109\/TCSVT.2024.3361076"},{"key":"21_CR13","unstructured":"Gao, M., Luo, J., Yang, J., Han, J., Zheng, F.: 1st place solution for MeViS track in CVPR 2024 PVUW workshop: motion expression guided video segmentation. arXiv preprint arXiv:2406.07043 (2024)"},{"key":"21_CR14","doi-asserted-by":"crossref","unstructured":"He, S., Ding, H.: Decoupling static and hierarchical motion perception for referring video segmentation. In: CVPR (2024)","DOI":"10.1109\/CVPR52733.2024.01266"},{"key":"21_CR15","unstructured":"Ke, L., et al.: Segment anything in high quality. In: NeurIPS (2024)"},{"key":"21_CR16","doi-asserted-by":"crossref","unstructured":"Kirillov, A., et al.: Segment anything. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"21_CR17","doi-asserted-by":"crossref","unstructured":"Li, X., et al.: Transformer-based visual segmentation: a survey. IEEE TPAMI (2024)","DOI":"10.1109\/TPAMI.2024.3434373"},{"key":"21_CR18","doi-asserted-by":"crossref","unstructured":"Liu, C., Ding, H., Jiang, X.: GRES: generalized referring expression segmentation. In: CVPR, pp. 23592\u201323601 (2023)","DOI":"10.1109\/CVPR52729.2023.02259"},{"key":"21_CR19","doi-asserted-by":"crossref","unstructured":"Liu, C., Ding, H., Zhang, Y., Jiang, X.: Multi-modal mutual attention and iterative interaction for referring image segmentation. IEEE TIP (2023)","DOI":"10.1109\/TIP.2023.3277791"},{"key":"21_CR20","doi-asserted-by":"crossref","unstructured":"Liu, C., Jiang, X., Ding, H.: Instance-specific feature propagation for referring segmentation. IEEE TMM (2023)","DOI":"10.1109\/TMM.2022.3163578"},{"issue":"1","key":"21_CR21","doi-asserted-by":"publisher","first-page":"16","DOI":"10.1007\/s44267-024-00049-8","volume":"2","author":"C Liu","year":"2024","unstructured":"Liu, C., Jiang, X., Ding, H.: PrimitiveNet: decomposing the global constraints for referring segmentation. Vis. Intell. 2(1), 16 (2024)","journal-title":"Vis. Intell."},{"key":"21_CR22","unstructured":"Liu, X., Zhang, J., Zhang, K., Yang, Y., Jiao, L., Yang, S.: 3rd place solution for MOSE track in CVPR 2024 PVUW workshop: complex video object segmentation. arXiv preprint arXiv:2406.03668 (2024)"},{"key":"21_CR23","doi-asserted-by":"crossref","unstructured":"Liu, Z., Mao, H., Wu, C.Y., Feichtenhofer, C., Darrell, T., Xie, S.: A convnet for the 2020s. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.01167"},{"key":"21_CR24","doi-asserted-by":"crossref","unstructured":"Mao, J., Huang, J., Toshev, A., Camburu, O., Yuille, A.L., Murphy, K.: Generation and comprehension of unambiguous object descriptions. In: CVPR, pp. 11\u201320 (2016)","DOI":"10.1109\/CVPR.2016.9"},{"key":"21_CR25","unstructured":"Miao, D., Li, X., He, Z., Wang, Y., Yang, M.H.: 1st place solution for MOSE track in CVPR 2024 PVUW workshop: complex video object segmentation. arXiv preprint arXiv:2406.04600 (2024)"},{"key":"21_CR26","unstructured":"Pan, F., Fang, H., Lu, X.: 3rd place solution for MeViS track in CVPR 2024 PVUW workshop: motion expression guided video segmentation. arXiv preprint arXiv:2406.04842 (2024)"},{"key":"21_CR27","unstructured":"Pavao, A., et al.: Codalab competitions: an open source platform to organize scientific challenges. J. Mach. Learn. Res. (2023). http:\/\/jmlr.org\/papers\/v24\/21-1436.html"},{"key":"21_CR28","doi-asserted-by":"crossref","unstructured":"Perazzi, F., Pont-Tuset, J., McWilliams, B., Van Gool, L., Gross, M., Sorkine-Hornung, A.: A benchmark dataset and evaluation methodology for video object segmentation. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.85"},{"key":"21_CR29","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: ICML (2021)"},{"key":"21_CR30","doi-asserted-by":"crossref","unstructured":"Seo, S., Lee, J.Y., Han, B.: Urvos: unified referring video object segmentation network with a large-scale benchmark. In: ECCV (2020)","DOI":"10.1007\/978-3-030-58555-6_13"},{"key":"21_CR31","doi-asserted-by":"crossref","unstructured":"Wu, J., et al.: Towards open vocabulary learning: a survey. IEEE TPAMI (2024)","DOI":"10.1109\/TPAMI.2024.3361862"},{"key":"21_CR32","unstructured":"Xu, N., et al.: Youtube-vos: a large-scale video object segmentation benchmark. CoRR abs\/1809.03327 (2018). http:\/\/arxiv.org\/abs\/1809.03327"},{"key":"21_CR33","unstructured":"Xu, Z., Yao, J., Wu, C., Liu, T., Liu, L.: 2nd place solution for MOSE track in CVPR 2024 PVUW workshop: complex video object segmentation. arXiv preprint arXiv:2406.08192 (2024)"},{"key":"21_CR34","doi-asserted-by":"crossref","unstructured":"Yan, S., et al.: Referred by multi-modality: a unified temporal transformer for video object segmentation. In: AAAI (2024)","DOI":"10.1609\/aaai.v38i6.28465"},{"key":"21_CR35","doi-asserted-by":"crossref","unstructured":"Yu, L., Poirson, P., Yang, S., Berg, A.C., Berg, T.L.: Modeling context in referring expressions. In: ECCV (2016)","DOI":"10.1007\/978-3-319-46475-6_5"},{"key":"21_CR36","doi-asserted-by":"crossref","unstructured":"Zhang, T., et al.: DVIS: decoupled video instance segmentation framework. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.00124"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024 Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-91856-8_21","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,23]],"date-time":"2025-05-23T10:06:14Z","timestamp":1747994774000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-91856-8_21"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031918551","9783031918568"],"references-count":36,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-91856-8_21","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"12 May 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}