{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T13:27:23Z","timestamp":1773840443600,"version":"3.50.1"},"publisher-location":"Cham","reference-count":66,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031729393","type":"print"},{"value":"9783031729409","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,17]],"date-time":"2024-11-17T00:00:00Z","timestamp":1731801600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,17]],"date-time":"2024-11-17T00:00:00Z","timestamp":1731801600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72940-9_24","type":"book-chapter","created":{"date-parts":[[2024,11,16]],"date-time":"2024-11-16T21:31:24Z","timestamp":1731792684000},"page":"421-438","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["$$\\mathrm R^2$$-Tuning: Efficient Image-to-Video Transfer Learning for\u00a0Video Temporal Grounding"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9597-0525","authenticated-orcid":false,"given":"Ye","family":"Liu","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0006-0543-4475","authenticated-orcid":false,"given":"Jixuan","family":"He","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2730-0543","authenticated-orcid":false,"given":"Wanhua","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2555-5232","authenticated-orcid":false,"given":"Junsik","family":"Kim","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2329-5484","authenticated-orcid":false,"given":"Donglai","family":"Wei","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3620-2582","authenticated-orcid":false,"given":"Hanspeter","family":"Pfister","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6720-234X","authenticated-orcid":false,"given":"Chang Wen","family":"Chen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,17]]},"reference":[{"key":"24_CR1","doi-asserted-by":"crossref","unstructured":"Anne\u00a0Hendricks, L., Wang, O., Shechtman, E., Sivic, J., Darrell, T., Russell, B.: Localizing moments in video with natural language. In: ICCV, pp. 5803\u20135812 (2017)","DOI":"10.1109\/ICCV.2017.618"},{"issue":"11","key":"24_CR2","doi-asserted-by":"publisher","first-page":"1838","DOI":"10.1109\/JPROC.2021.3117472","volume":"109","author":"E Apostolidis","year":"2021","unstructured":"Apostolidis, E., Adamantidou, E., Metsai, A.I., Mezaris, V., Patras, I.: Video summarization using deep neural networks: a survey. Proc. IEEE 109(11), 1838\u20131863 (2021)","journal-title":"Proc. IEEE"},{"key":"24_CR3","doi-asserted-by":"crossref","unstructured":"Badamdorj, T., Rochan, M., Wang, Y., Cheng, L.: Joint visual and audio learning for video highlight detection. In: ICCV, pp. 8127\u20138137 (2021)","DOI":"10.1109\/ICCV48922.2021.00802"},{"key":"24_CR4","unstructured":"Bain, M., Nagrani, A., Varol, G., Zisserman, A.: A clip-hitchhiker\u2019s guide to long video retrieval. Tech. Rep. arXiv:2205.08508 (2022)"},{"key":"24_CR5","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo vadis, action recognition? A new model and the kinetics dataset. In: CVPR, pp. 6299\u20136308 (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"24_CR6","unstructured":"Dosovitskiy, A., et\u00a0al.: An image is worth 16$$\\,\\times \\,$$16 words: transformers for image recognition at scale. In: ICLR (2020)"},{"key":"24_CR7","unstructured":"Escorcia, V., Soldan, M., Sivic, J., Ghanem, B., Russell, B.: Temporal localization of moments in video collections with natural language. Tech. Rep. arXiv:1907.12763 (2019)"},{"key":"24_CR8","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Fan, H., Malik, J., He, K.: SlowFast networks for video recognition. In: ICCV, pp. 6202\u20136211 (2019)","DOI":"10.1109\/ICCV.2019.00630"},{"key":"24_CR9","doi-asserted-by":"crossref","unstructured":"Gao, J., Sun, C., Yang, Z., Nevatia, R.: Tall: temporal activity localization via language query. In: ICCV, pp. 5267\u20135275 (2017)","DOI":"10.1109\/ICCV.2017.563"},{"key":"24_CR10","unstructured":"Grauman, K., et\u00a0al.: Ego4D: around the world in 3,000 hours of egocentric video. In: CVPR, pp. 18995\u201319012 (2022)"},{"key":"24_CR11","doi-asserted-by":"crossref","unstructured":"Gygli, M., Grabner, H., Riemenschneider, H., Van\u00a0Gool, L.: Creating summaries from user videos. In: ECCV, pp. 505\u2013520 (2014)","DOI":"10.1007\/978-3-319-10584-0_33"},{"key":"24_CR12","doi-asserted-by":"crossref","unstructured":"Gygli, M., Song, Y., Cao, L.: Video2GIF: automatic generation of animated gifs from video. In: CVPR, pp. 1001\u20131009 (2016)","DOI":"10.1109\/CVPR.2016.114"},{"key":"24_CR13","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: CVPR, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"24_CR14","doi-asserted-by":"crossref","unstructured":"Hong, F.T., Huang, X., Li, W.H., Zheng, W.S.: MINI-Net: multiple instance ranking network for video highlight detection. In: ECCV, pp. 345\u2013360 (2020)","DOI":"10.1007\/978-3-030-58601-0_21"},{"key":"24_CR15","doi-asserted-by":"crossref","unstructured":"Huang, S., et al.: VoP: text-video co-operative prompt tuning for cross-modal retrieval. In: CVPR, pp. 6565\u20136574 (2023)","DOI":"10.1109\/CVPR52729.2023.00635"},{"key":"24_CR16","doi-asserted-by":"crossref","unstructured":"Jang, J., Park, J., Kim, J., Kwon, H., Sohn, K.: Knowing where to focus: event-aware transformer for video grounding. In: ICCV, pp. 13846\u201313856 (2023)","DOI":"10.1109\/ICCV51070.2023.01273"},{"key":"24_CR17","doi-asserted-by":"crossref","unstructured":"Jia, M., et al.: Visual prompt tuning. In: ECCV, pp. 709\u2013727 (2022)","DOI":"10.1007\/978-3-031-19827-4_41"},{"key":"24_CR18","doi-asserted-by":"crossref","unstructured":"Jiang, R., Liu, L., Chen, C.: CLIP-count: towards text-guided zero-shot object counting. In: ACM MM (2023)","DOI":"10.1145\/3581783.3611789"},{"key":"24_CR19","doi-asserted-by":"crossref","unstructured":"Ju, C., Han, T., Zheng, K., Zhang, Y., Xie, W.: Prompting visual-language models for efficient video understanding. In: ECCV, pp. 105\u2013124 (2022)","DOI":"10.1007\/978-3-031-19833-5_7"},{"key":"24_CR20","doi-asserted-by":"publisher","first-page":"2880","DOI":"10.1109\/TASLP.2020.3030497","volume":"28","author":"Q Kong","year":"2020","unstructured":"Kong, Q., Cao, Y., Iqbal, T., Wang, Y., Wang, W., Plumbley, M.D.: PANNs: large-scale pretrained audio neural networks for audio pattern recognition. IEEE\/ACM Trans. Audio, Speech Lang. Process. 28, 2880\u20132894 (2020)","journal-title":"IEEE\/ACM Trans. Audio, Speech Lang. Process."},{"key":"24_CR21","unstructured":"Larsson, G., Maire, M., Shakhnarovich, G.: FractalNet: ultra-deep neural networks without residuals. Tech. Rep. arXiv:1605.07648 (2016)"},{"key":"24_CR22","unstructured":"Lei, J., Berg, T.L., Bansal, M.: QVHighlights: detecting moments and highlights in videos via natural language queries. In: NeurIPS (2021)"},{"key":"24_CR23","doi-asserted-by":"crossref","unstructured":"Lei, J., Yu, L., Berg, T.L., Bansal, M.: TVR: a large-scale dataset for video-subtitle moment retrieval. In: ECCV, pp. 447\u2013463 (2020)","DOI":"10.1007\/978-3-030-58589-1_27"},{"key":"24_CR24","unstructured":"Li, P., et al.: MomentDiff: generative video moment retrieval from random to real. Tech. Rep. arXiv:2307.02869 (2023)"},{"key":"24_CR25","unstructured":"Li, S., et al.: Probing visual-audio representation for video highlight detection via hard-pairs guided contrastive learning. Tech. Rep. arXiv:2206.10157 (2022)"},{"key":"24_CR26","doi-asserted-by":"crossref","unstructured":"Lin, K.Q., et al: UniVTG: towards unified video-language temporal grounding. In: CVPR, pp. 2794\u20132804 (2023)","DOI":"10.1109\/ICCV51070.2023.00262"},{"key":"24_CR27","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Goyal, P., Girshick, R., He, K., Doll\u00e1r, P.: Focal loss for dense object detection. In: ICCV, pp. 2980\u20132988 (2017)","DOI":"10.1109\/ICCV.2017.324"},{"key":"24_CR28","doi-asserted-by":"crossref","unstructured":"Lin, Z., et al.: Frozen clip models are efficient video learners. In: ECCV, pp. 388\u2013404 (2022)","DOI":"10.1007\/978-3-031-19833-5_23"},{"key":"24_CR29","unstructured":"Liu, L., Yu, B.X., Chang, J., Tian, Q., Chen, C.W.: Prompt-matched semantic segmentation. Tech. Rep. arXiv:2208.10159 (2022)"},{"key":"24_CR30","doi-asserted-by":"crossref","unstructured":"Liu, W., Mei, T., Zhang, Y., Che, C., Luo, J.: Multi-task deep visual-semantic embedding for video thumbnail selection. In: CVPR, pp. 3707\u20133715 (2015)","DOI":"10.1109\/CVPR.2015.7298994"},{"key":"24_CR31","doi-asserted-by":"crossref","unstructured":"Liu, Y., Li, S., Wu, Y., Chen, C.W., Shan, Y., Qie, X.: UMT: unified multi-modal transformers for joint video moment retrieval and highlight detection. In: CVPR, pp. 3042\u20133051 (2022)","DOI":"10.1109\/CVPR52688.2022.00305"},{"key":"24_CR32","doi-asserted-by":"publisher","first-page":"293","DOI":"10.1016\/j.neucom.2022.07.028","volume":"508","author":"H Luo","year":"2022","unstructured":"Luo, H., et al.: CLIP4clip: an empirical study of clip for end to end video clip retrieval. Neurocomputing 508, 293\u2013304 (2022)","journal-title":"Neurocomputing"},{"key":"24_CR33","doi-asserted-by":"crossref","unstructured":"Mahasseni, B., Lam, M., Todorovic, S.: Unsupervised video summarization with adversarial LSTM networks. In: CVPR, pp. 202\u2013211 (2017)","DOI":"10.1109\/CVPR.2017.318"},{"key":"24_CR34","unstructured":"Moon, W., Hyun, S., Lee, S., Heo, J.P.: Correlation-guided query-dependency calibration in video representation learning for temporal grounding. Tech. Rep. arXiv:2311.08835 (2023)"},{"key":"24_CR35","doi-asserted-by":"crossref","unstructured":"Moon, W., Hyun, S., Park, S., Park, D., Heo, J.P.: Query-dependent video representation for moment retrieval and highlight detection. In: CVPR, pp. 23023\u201323033 (2023)","DOI":"10.1109\/CVPR52729.2023.02205"},{"key":"24_CR36","doi-asserted-by":"crossref","unstructured":"Nan, G., et al.: Interventional video grounding with dual contrastive learning. In: CVPR, pp. 2765\u20132775 (2021)","DOI":"10.1109\/CVPR46437.2021.00279"},{"key":"24_CR37","doi-asserted-by":"crossref","unstructured":"Ni, B., et al.: Expanding language-image pretrained models for general video recognition. In: ECCV, pp. 1\u201318 (2022)","DOI":"10.1007\/978-3-031-19772-7_1"},{"key":"24_CR38","unstructured":"Oord, A.v.d., Li, Y., Vinyals, O.: Representation learning with contrastive predictive coding. In: NeurIPS (2018)"},{"key":"24_CR39","unstructured":"Pan, J., Lin, Z., Zhu, X., Shao, J., Li, H.: ST-Adapter: parameter-efficient image-to-video transfer learning. In: NeurIPS, pp. 26462\u201326477 (2022)"},{"key":"24_CR40","doi-asserted-by":"crossref","unstructured":"Qing, Z., et al.: Disentangling spatial and temporal learning for efficient image-to-video transfer learning. In: CVPR, pp. 13934\u201313944 (2023)","DOI":"10.1109\/ICCV51070.2023.01281"},{"key":"24_CR41","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: ICML, pp. 8748\u20138763 (2021)"},{"key":"24_CR42","doi-asserted-by":"crossref","unstructured":"Rasheed, H., Khattak, M.U., Maaz, M., Khan, S., Khan, F.S.: Fine-tuned clip models are efficient video learners. In: CVPR, pp. 6545\u20136554 (2023)","DOI":"10.1109\/CVPR52729.2023.00633"},{"key":"24_CR43","doi-asserted-by":"publisher","first-page":"25","DOI":"10.1162\/tacl_a_00207","volume":"1","author":"M Regneri","year":"2013","unstructured":"Regneri, M., Rohrbach, M., Wetzel, D., Thater, S., Schiele, B., Pinkal, M.: Grounding action descriptions in videos. Trans. Assoc. Comput. Linguist. 1, 25\u201336 (2013)","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"24_CR44","doi-asserted-by":"crossref","unstructured":"Song, Y., Redi, M., Vallmitjana, J., Jaimes, A.: To click or not to click: automatic selection of beautiful thumbnails from videos. In: CIKM, pp. 659\u2013668 (2016)","DOI":"10.1145\/2983323.2983349"},{"key":"24_CR45","doi-asserted-by":"crossref","unstructured":"Song, Y., Vallmitjana, J., Stent, A., Jaimes, A.: Tvsum: summarizing web videos using titles. In: CVPR, pp. 5179\u20135187 (2015)","DOI":"10.1109\/CVPR.2015.7299154"},{"key":"24_CR46","doi-asserted-by":"crossref","unstructured":"Sun, H., Zhou, M., Chen, W., Xie, W.: TR-DETR: task-reciprocal transformer for joint moment retrieval and highlight detection. In: AAAI (2024)","DOI":"10.1609\/aaai.v38i5.28304"},{"key":"24_CR47","doi-asserted-by":"crossref","unstructured":"Sun, M., Farhadi, A., Seitz, S.: Ranking domain-specific highlights by analyzing edited videos. In: ECCV, pp. 787\u2013802 (2014)","DOI":"10.1007\/978-3-319-10590-1_51"},{"key":"24_CR48","unstructured":"Sung, Y.L., Cho, J., Bansal, M.: LST: ladder side-tuning for parameter and memory efficient transfer learning. In: NeurIPS, pp. 12991\u201313005 (2022)"},{"key":"24_CR49","unstructured":"Touvron, H., et\u00a0al.: LLaMA: open and efficient foundation language models. Tech. Rep. arXiv:2302.13971 (2023)"},{"key":"24_CR50","unstructured":"Vaswani, A., et al.: Attention is all you need. In: NeurIPS, pp. 5998\u20136008 (2017)"},{"key":"24_CR51","doi-asserted-by":"crossref","unstructured":"Wang, L., Liu, D., Puri, R., Metaxas, D.N.: Learning trailer moments in full-length movies. In: ECCV, pp. 300\u2013316 (2020)","DOI":"10.1007\/978-3-030-58523-5_18"},{"key":"24_CR52","doi-asserted-by":"crossref","unstructured":"Wang, Z., Wang, L., Wu, T., Li, T., Wu, G.: Negative sample matters: a renaissance of metric learning for temporal grounding. In: AAAI, pp. 2613\u20132623 (2022)","DOI":"10.1609\/aaai.v36i3.20163"},{"key":"24_CR53","doi-asserted-by":"crossref","unstructured":"Wei, F., Wang, B., Ge, T., Jiang, Y., Li, W., Duan, L.: Learning pixel-level distinctions for video highlight detection. In: CVPR, pp. 3073\u20133082 (2022)","DOI":"10.1109\/CVPR52688.2022.00308"},{"key":"24_CR54","doi-asserted-by":"crossref","unstructured":"Xiong, B., Kalantidis, Y., Ghadiyaram, D., Grauman, K.: Less is more: learning highlight detection from video duration. In: CVPR, pp. 1258\u20131267 (2019)","DOI":"10.1109\/CVPR.2019.00135"},{"key":"24_CR55","doi-asserted-by":"crossref","unstructured":"Xu, M., Wang, H., Ni, B., Zhu, R., Sun, Z., Wang, C.: Cross-category video highlight detection via set-based learning. In: ICCV, pp. 7970\u20137979 (2021)","DOI":"10.1109\/ICCV48922.2021.00787"},{"key":"24_CR56","doi-asserted-by":"crossref","unstructured":"Xu, Y., Sun, Y., Li, Y., Shi, Y., Zhu, X., Du, S.: MH-DETR: video moment and highlight detection with cross-modal transformer. Tech. Rep. arXiv:2305.00355 (2023)","DOI":"10.1109\/IJCNN60899.2024.10650814"},{"key":"24_CR57","doi-asserted-by":"crossref","unstructured":"Yan, S., et al.: UnLoc: a unified framework for video localization tasks. In: ICCV, pp. 13623\u201313633 (2023)","DOI":"10.1109\/ICCV51070.2023.01253"},{"key":"24_CR58","doi-asserted-by":"crossref","unstructured":"Yang, H., Wang, B., Lin, S., Wipf, D., Guo, M., Guo, B.: Unsupervised extraction of video highlights via robust recurrent auto-encoders. In: ICCV, pp. 4633\u20134641 (2015)","DOI":"10.1109\/ICCV.2015.526"},{"key":"24_CR59","doi-asserted-by":"crossref","unstructured":"Ye, Q., Shen, X., Gao, Y., Wang, Z., Bi, Q., Li, P., Yang, G.: Temporal cue guided video highlight detection with low-rank audio-visual fusion. In: ICCV, pp. 7950\u20137959 (2021)","DOI":"10.1109\/ICCV48922.2021.00785"},{"key":"24_CR60","unstructured":"Yuan, T., Zhang, X., Liu, K., Liu, B., Jin, J., Jiao, Z.: UCF-crime annotation: a benchmark for surveillance video-and-language understanding. Tech. Rep. arXiv:2309.13925 (2023)"},{"key":"24_CR61","doi-asserted-by":"crossref","unstructured":"Yuan, Y., Ma, L., Wang, J., Liu, W., Zhu, W.: Semantic conditioned dynamic modulation for temporal sentence grounding in videos. NeurIPS 32 (2019)","DOI":"10.1109\/TPAMI.2020.3038993"},{"key":"24_CR62","doi-asserted-by":"crossref","unstructured":"Zhang, H., Sun, A., Jing, W., Zhou, J.T.: Span-based localizing network for natural language video localization. Tech. Rep. arXiv:2004.13931 (2020)","DOI":"10.18653\/v1\/2020.acl-main.585"},{"key":"24_CR63","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"766","DOI":"10.1007\/978-3-319-46478-7_47","volume-title":"Computer Vision \u2013 ECCV 2016","author":"K Zhang","year":"2016","unstructured":"Zhang, K., Chao, W.-L., Sha, F., Grauman, K.: Video summarization with long short-term memory. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9911, pp. 766\u2013782. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46478-7_47"},{"key":"24_CR64","doi-asserted-by":"crossref","unstructured":"Zhang, S., Peng, H., Fu, J., Luo, J.: Learning 2D temporal adjacent networks for moment localization with natural language. In: AAAI, pp. 12870\u201312877 (2020)","DOI":"10.1609\/aaai.v34i07.6984"},{"key":"24_CR65","doi-asserted-by":"crossref","unstructured":"Zhou, K., Yang, J., Loy, C.C., Liu, Z.: Conditional prompt learning for vision-language models. In: CVPR, pp. 16816\u201316825 (2022)","DOI":"10.1109\/CVPR52688.2022.01631"},{"issue":"9","key":"24_CR66","doi-asserted-by":"publisher","first-page":"2337","DOI":"10.1007\/s11263-022-01653-1","volume":"130","author":"K Zhou","year":"2022","unstructured":"Zhou, K., Yang, J., Loy, C.C., Liu, Z.: Learning to prompt for vision-language models. Int. J. Comput. Vis. 130(9), 2337\u20132348 (2022)","journal-title":"Int. J. Comput. Vis."}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72940-9_24","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,16]],"date-time":"2024-11-16T21:35:56Z","timestamp":1731792956000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72940-9_24"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,17]]},"ISBN":["9783031729393","9783031729409"],"references-count":66,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72940-9_24","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,17]]},"assertion":[{"value":"17 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}