{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T16:57:49Z","timestamp":1777654669791,"version":"3.51.4"},"publisher-location":"Cham","reference-count":68,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031729515","type":"print"},{"value":"9783031729522","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T00:00:00Z","timestamp":1727740800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T00:00:00Z","timestamp":1727740800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72952-2_25","type":"book-chapter","created":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T05:02:02Z","timestamp":1727672522000},"page":"436-455","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Turbo: Informativity-Driven Acceleration Plug-In for\u00a0Vision-Language Large Models"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8472-7677","authenticated-orcid":false,"given":"Chen","family":"Ju","sequence":"first","affiliation":[]},{"given":"Haicheng","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Haozhe","family":"Cheng","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5299-7074","authenticated-orcid":false,"given":"Xu","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Zhonghua","family":"Zhai","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1520-4140","authenticated-orcid":false,"given":"Weilin","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Jinsong","family":"Lan","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8550-5064","authenticated-orcid":false,"given":"Shuai","family":"Xiao","sequence":"additional","affiliation":[]},{"given":"Bo","family":"Zheng","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,10,1]]},"reference":[{"key":"25_CR1","doi-asserted-by":"crossref","unstructured":"Bigham, J.P., et\u00a0al.: Vizwiz: nearly real-time answers to visual questions. In: Proceedings of the 23nd Annual ACM Symposium on User Interface Software and Technology, pp. 333\u2013342 (2010)","DOI":"10.1145\/1866029.1866080"},{"key":"25_CR2","unstructured":"Bolya, D., Fu, C.Y., Dai, X., Zhang, P., Feichtenhofer, C., Hoffman, J.: Token merging: your vit but faster. arXiv preprint arXiv:2210.09461 (2022)"},{"key":"25_CR3","doi-asserted-by":"crossref","unstructured":"Bolya, D., Hoffman, J.: Token merging for fast stable diffusion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4599\u20134603 (2023)","DOI":"10.1109\/CVPRW59228.2023.00484"},{"key":"25_CR4","first-page":"1877","volume":"33","author":"T Brown","year":"2020","unstructured":"Brown, T., et al.: Language models are few-shot learners. Adv. Neural. Inf. Process. Syst. 33, 1877\u20131901 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"25_CR5","unstructured":"Chen, J., et al.: Minigpt-v2: large language model as a unified interface for vision-language multi-task learning. arXiv preprint arXiv:2310.09478 (2023)"},{"key":"25_CR6","unstructured":"Chen, M., et al.: Wear-any-way: manipulable virtual try-on via sparse correspondence alignment. arXiv preprint arXiv:2403.12965 (2024)"},{"key":"25_CR7","doi-asserted-by":"crossref","unstructured":"Chen, X., et al.: Enhancing cross-domain click-through rate prediction via explicit feature augmentation. In: Companion Proceedings of the ACM on Web Conference 2024 (2024)","DOI":"10.1145\/3589335.3648341"},{"key":"25_CR8","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"104","DOI":"10.1007\/978-3-030-58577-8_7","volume-title":"Computer Vision \u2013 ECCV 2020","author":"Y-C Chen","year":"2020","unstructured":"Chen, Y.-C., et al.: UNITER: UNiversal image-TExt representation learning. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12375, pp. 104\u2013120. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58577-8_7"},{"key":"25_CR9","unstructured":"Cheng, H., et al.: Denoiser: rethinking the robustness for open-vocabulary action recognition. arXiv preprint arXiv:2404.14890 (2024)"},{"key":"25_CR10","unstructured":"Cheng, Z., et al.: Image to multi-modal retrieval for industrial scenarios. arXiv preprint arXiv:2305.03972 (2023)"},{"key":"25_CR11","unstructured":"Cheng, Z., Xiao, S., Zhai, Z., Zeng, X., Huang, W.: Mixer: image to multi-modal retrieval learning for industrial application. arXiv preprint arXiv:2305.03972 (2023)"},{"key":"25_CR12","doi-asserted-by":"crossref","unstructured":"Esser, P., Rombach, R., Ommer, B.: Taming transformers for high-resolution image synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12873\u201312883 (2021)","DOI":"10.1109\/CVPR46437.2021.01268"},{"key":"25_CR13","doi-asserted-by":"crossref","unstructured":"Fang, Z., Wang, J., Hu, X., Wang, L., Yang, Y., Liu, Z.: Compressing visual-linguistic model via knowledge distillation. In: Proceedings of the International Conference on Computer Vision (2021)","DOI":"10.1109\/ICCV48922.2021.00146"},{"key":"25_CR14","doi-asserted-by":"publisher","unstructured":"Fayyaz, M., et al.: Adaptive token sampling for efficient vision transformers. In: European Conference on Computer Vision, pp. 396\u2013414. Springer, Heidelberg (2022). https:\/\/doi.org\/10.1007\/978-3-031-20083-0_24","DOI":"10.1007\/978-3-031-20083-0_24"},{"key":"25_CR15","unstructured":"Frantar, E., Ashkboos, S., Hoefler, T., Alistarh, D.: GPTQ: accurate post-training quantization for generative pre-trained transformers. arXiv preprint arXiv:2210.17323 (2022)"},{"key":"25_CR16","doi-asserted-by":"crossref","unstructured":"He, K., Chen, X., Xie, S., Li, Y., Doll\u00e1r, P., Girshick, R.: Masked autoencoders are scalable vision learners. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16000\u201316009 (2022)","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"25_CR17","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. Adv. Neural Inf. Process. Syst. (2020)"},{"key":"25_CR18","unstructured":"Huang, P.Y., et\u00a0al.: Mavil: masked audio-video learners. Adv. Neural Inf. Process. Syst. (2024)"},{"key":"25_CR19","doi-asserted-by":"crossref","unstructured":"Hudson, D.A., Manning, C.D.: GQA: a new dataset for real-world visual reasoning and compositional question answering. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6700\u20136709 (2019)","DOI":"10.1109\/CVPR.2019.00686"},{"key":"25_CR20","unstructured":"Jia, C., et al.: Scaling up visual and vision-language representation learning with noisy text supervision. In: Proceedings of the International Conference on Machine Learning (2021)"},{"key":"25_CR21","doi-asserted-by":"crossref","unstructured":"Jiang, C., et al.: Trips: efficient vision-and-language pre-training with text-relevant image patch selection. In: Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing, pp. 4084\u20134096 (2022)","DOI":"10.18653\/v1\/2022.emnlp-main.273"},{"key":"25_CR22","doi-asserted-by":"publisher","unstructured":"Ju, C., Han, T., Zheng, K., Zhang, Y., Xie, W.: Prompting visual-language models for efficient video understanding. In: Proceedings of the European Conference on Computer Vision. Springer, Heidelberg (2022). https:\/\/doi.org\/10.1007\/978-3-031-19833-5_7","DOI":"10.1007\/978-3-031-19833-5_7"},{"key":"25_CR23","unstructured":"Ju, C., et al.: Multi-modal prompting for low-shot temporal action localization. arXiv preprint arXiv:2303.11732 (2023)"},{"key":"25_CR24","unstructured":"Ju, C., et al.: Constraint and union for partially-supervised temporal sentence grounding. arXiv preprint arXiv:2302.09850 (2023)"},{"key":"25_CR25","doi-asserted-by":"crossref","unstructured":"Ju, C., Zhao, P., Chen, S., Zhang, Y., Wang, Y., Tian, Q.: Divide and conquer for single-frame temporal action localization. In: Proceedings of the International Conference on Computer Vision (2021)","DOI":"10.1109\/ICCV48922.2021.01320"},{"key":"25_CR26","doi-asserted-by":"crossref","unstructured":"Ju, C., et al.: Adaptive mutual supervision for weakly-supervised temporal action localization. IEEE Trans. Multimedia (2022)","DOI":"10.1109\/TMM.2022.3213478"},{"key":"25_CR27","unstructured":"Ju, C., Zhao, P., Zhang, Y., Wang, Y., Tian, Q.: Point-level temporal action localization: bridging fully-supervised proposals to weakly-supervised losses. arXiv preprint arXiv:2012.08236 (2020)"},{"key":"25_CR28","doi-asserted-by":"crossref","unstructured":"Ju, C., et al.: Distilling vision-language pre-training to collaborate with weakly-supervised temporal action localization. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (2023)","DOI":"10.1109\/CVPR52729.2023.01417"},{"key":"25_CR29","unstructured":"Kim, W., Son, B., Kim, I.: Vilt: vision-and-language transformer without convolution or region supervision. In: Proceedings of the International Conference on Machine Learning. PMLR (2021)"},{"key":"25_CR30","unstructured":"Li, J., Li, D., Savarese, S., Hoi, S.: Blip-2: bootstrapping language-image pre-training with frozen image encoders and large language models. arXiv preprint arXiv:2301.12597 (2023)"},{"key":"25_CR31","unstructured":"Li, J., Li, D., Xiong, C., Hoi, S.: Blip: Bootstrapping language-image pre-training for unified vision-language understanding and generation. In: International Conference on Machine Learning, pp. 12888\u201312900. PMLR (2022)"},{"key":"25_CR32","unstructured":"Liang, Y., Ge, C., Tong, Z., Song, Y., Wang, J., Xie, P.: Not all patches are what you need: expediting vision transformers via token reorganizations. arXiv preprint arXiv:2202.07800 (2022)"},{"key":"25_CR33","unstructured":"Liu, D., Kan, M., Shan, S., Xilin, C.: A simple romance between multi-exit vision transformer and token reduction. In: The Twelfth International Conference on Learning Representations (2023)"},{"key":"25_CR34","doi-asserted-by":"crossref","unstructured":"Liu, J., Ju, C., Ma, C., Wang, Y., Wang, Y., Zhang, Y.: Audio-aware query-enhanced transformer for audio-visual segmentation. arXiv preprint arXiv:2307.13236 (2023)","DOI":"10.1109\/WACV57701.2024.00551"},{"key":"25_CR35","doi-asserted-by":"crossref","unstructured":"Liu, J., Ju, C., Xie, W., Zhang, Y.: Exploiting transformation invariance and equivariance for self-supervised sound localisation. In: Proceedings of ACM International Conference on Multimedia (2022)","DOI":"10.1145\/3503161.3548317"},{"key":"25_CR36","doi-asserted-by":"crossref","unstructured":"Liu, J., Liu, Y., Zhang, F., Ju, C., Zhang, Y., Wang, Y.: Audio-visual segmentation via unlabeled frame exploitation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2024)","DOI":"10.1109\/CVPR52733.2024.02487"},{"key":"25_CR37","doi-asserted-by":"crossref","unstructured":"Liu, J., Wang, Y., Ju, C., Ma, C., Zhang, Y., Xie, W.: Annotation-free audio-visual segmentation. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 5604\u20135614 (2024)","DOI":"10.1109\/WACV57701.2024.00551"},{"key":"25_CR38","unstructured":"Liu, Z., et\u00a0al.: Deja vu: contextual sparsity for efficient LLMS at inference time. In: Proceedings of the International Conference on Machine Learning. PMLR (2023)"},{"key":"25_CR39","doi-asserted-by":"publisher","first-page":"293","DOI":"10.1016\/j.neucom.2022.07.028","volume":"508","author":"H Luo","year":"2022","unstructured":"Luo, H., et al.: Clip4clip: an empirical study of clip for end to end video clip retrieval and captioning. Neurocomputing 508, 293\u2013304 (2022)","journal-title":"Neurocomputing"},{"key":"25_CR40","unstructured":"Ma, C., et al.: Diffusionseg: adapting diffusion towards unsupervised object discovery. arXiv preprint arXiv:2303.09813 (2023)"},{"key":"25_CR41","unstructured":"Ma, C., Yang, Y., Ju, C., Zhang, F., Zhang, Y., Wang, Y.: Attrseg: open-vocabulary semantic segmentation via attribute decomposition-aggregation. In: Thirty-Seventh Conference on Neural Information Processing Systems (2023)"},{"key":"25_CR42","unstructured":"Ma, C., Yang, Y., Ju, C., Zhang, F., Zhang, Y., Wang, Y.: Open-vocabulary semantic segmentation via attribute decomposition-aggregation. arXiv preprint arXiv:2309.00096 (2023)"},{"key":"25_CR43","doi-asserted-by":"crossref","unstructured":"Marino, K., Rastegari, M., Farhadi, A., Mottaghi, R.: OK-VQA: a visual question answering benchmark requiring external knowledge. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3195\u20133204 (2019)","DOI":"10.1109\/CVPR.2019.00331"},{"key":"25_CR44","unstructured":"Mokady, R., Hertz, A., Bermano, A.H.: Clipcap: clip prefix for image captioning. arXiv preprint arXiv:2111.09734 (2021)"},{"key":"25_CR45","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: Proceedings of the International Conference on Machine Learning (2021)"},{"key":"25_CR46","unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., Chu, C., Chen, M.: Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125 (2022)"},{"key":"25_CR47","unstructured":"Rao, Y., Zhao, W., Liu, B., Lu, J., Zhou, J., Hsieh, C.J.: Dynamicvit: efficient vision transformers with dynamic token sparsification. Adv. Neural Inf. Process. Syst. (2021)"},{"key":"25_CR48","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"issue":"3","key":"25_CR49","doi-asserted-by":"publisher","first-page":"379","DOI":"10.1002\/j.1538-7305.1948.tb01338.x","volume":"27","author":"CE Shannon","year":"1948","unstructured":"Shannon, C.E.: A mathematical theory of communication. Bell Syst. Tech. J. 27(3), 379\u2013423 (1948)","journal-title":"Bell Syst. Tech. J."},{"key":"25_CR50","doi-asserted-by":"crossref","unstructured":"Shen, S., et al.: Q-bert: hessian based ultra low precision quantization of bert. In: Proceedings of the AAAI Conference on Artificial Intelligence (2020)","DOI":"10.1609\/aaai.v34i05.6409"},{"key":"25_CR51","unstructured":"Shi, D., Tao, C., Jin, Y., Yang, Z., Yuan, C., Wang, J.: Upop: unified and progressive pruning for compressing vision-language transformers. arXiv preprint arXiv:2301.13741 (2023)"},{"key":"25_CR52","doi-asserted-by":"crossref","unstructured":"Shi, Z., Zhou, X., Qiu, X., Zhu, X.: Improving image captioning with better use of captions. arXiv preprint arXiv:2006.11807 (2020)","DOI":"10.18653\/v1\/2020.acl-main.664"},{"key":"25_CR53","doi-asserted-by":"crossref","unstructured":"Singh, M., et al.: Revisiting weakly supervised pre-training of visual perception models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 804\u2013814 (2022)","DOI":"10.1109\/CVPR52688.2022.00088"},{"key":"25_CR54","doi-asserted-by":"crossref","unstructured":"Song, H., Dong, L., Zhang, W.N., Liu, T., Wei, F.: Clip models are few-shot learners: empirical studies on VQA and visual entailment. arXiv preprint arXiv:2203.07190 (2022)","DOI":"10.18653\/v1\/2022.acl-long.421"},{"key":"25_CR55","unstructured":"Steiner, A., Kolesnikov, A., Zhai, X., Wightman, R., Uszkoreit, J., Beyer, L.: How to train your vit? data, augmentation, and regularization in vision transformers. arXiv preprint arXiv:2106.10270 (2021)"},{"key":"25_CR56","unstructured":"Tong, Z., Song, Y., Wang, J., Wang, L.: Videomae: masked autoencoders are data-efficient learners for self-supervised video pre-training. Adv. Neural Inf. Process. Syst. (2022)"},{"key":"25_CR57","unstructured":"Van Den\u00a0Oord, A., Vinyals, O., et\u00a0al.: Neural discrete representation learning. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"key":"25_CR58","doi-asserted-by":"crossref","unstructured":"Wang, T., Zhou, W., Zeng, Y., Zhang, X.: Efficientvlm: fast and accurate vision-language models via knowledge distillation and modal-adaptive pruning. arXiv preprint arXiv:2210.07795 (2022)","DOI":"10.18653\/v1\/2023.findings-acl.873"},{"key":"25_CR59","unstructured":"Wei, J., et\u00a0al.: Emergent abilities of large language models. arXiv preprint arXiv:2206.07682 (2022)"},{"key":"25_CR60","doi-asserted-by":"crossref","unstructured":"Wei, S., Ye, T., Zhang, S., Tang, Y., Liang, J.: Joint token pruning and squeezing towards more aggressive compression of vision transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2092\u20132101 (2023)","DOI":"10.1109\/CVPR52729.2023.00208"},{"key":"25_CR61","unstructured":"Wu, X., Zeng, F., Wang, X., Wang, Y., Chen, X.: PPT: token pruning and pooling for efficient vision transformers. arXiv preprint arXiv:2310.01812 (2023)"},{"key":"25_CR62","unstructured":"Xiao, G., Lin, J., Seznec, M., Wu, H., Demouth, J., Han, S.: Smoothquant: accurate and efficient post-training quantization for large language models. In: International Conference on Machine Learning, pp. 38087\u201338099. PMLR (2023)"},{"key":"25_CR63","doi-asserted-by":"crossref","unstructured":"Xu, Y., et al.: Evo-vit: slow-fast token evolution for dynamic vision transformer. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp. 2964\u20132972 (2022)","DOI":"10.1609\/aaai.v36i3.20202"},{"key":"25_CR64","doi-asserted-by":"crossref","unstructured":"Yang, Y., Ma, C., Ju, C., Zhang, Y., Wang, Y.: Multi-modal prototypes for open-set semantic segmentation. arXiv preprint arXiv:2307.02003 (2023)","DOI":"10.1007\/s11263-024-02165-w"},{"key":"25_CR65","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"133","DOI":"10.1007\/978-3-030-87722-4_13","volume-title":"Domain Adaptation and Representation Transfer, and Affordable Healthcare and AI for Resource Diverse Global Health","author":"Z Ye","year":"2021","unstructured":"Ye, Z., Ju, C., Ma, C., Zhang, X.: Unsupervised domain adaption via similarity-based prototypes for cross-modality segmentation. In: Albarqouni, S., et al. (eds.) DART\/FAIR -2021. LNCS, vol. 12968, pp. 133\u2013143. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-87722-4_13"},{"key":"25_CR66","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"539","DOI":"10.1007\/978-3-030-58598-3_32","volume-title":"Computer Vision \u2013 ECCV 2020","author":"P Zhao","year":"2020","unstructured":"Zhao, P., Xie, L., Ju, C., Zhang, Y., Wang, Y., Tian, Q.: Bottom-up temporal action localization with mutual regularization. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12353, pp. 539\u2013555. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58598-3_32"},{"key":"25_CR67","doi-asserted-by":"publisher","first-page":"2337","DOI":"10.1007\/s11263-022-01653-1","volume":"130","author":"K Zhou","year":"2022","unstructured":"Zhou, K., Yang, J., Loy, C.C., Liu, Z.: Learning to prompt for vision-language models. Int. J. Comput. Vision 130, 2337\u20132348 (2022)","journal-title":"Int. J. Comput. Vision"},{"key":"25_CR68","unstructured":"Zhu, D., Chen, J., Shen, X., Li, X., Elhoseiny, M.: Minigpt-4: enhancing vision-language understanding with advanced large language models. arXiv preprint arXiv:2304.10592 (2023)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72952-2_25","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T05:18:34Z","timestamp":1727673514000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72952-2_25"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,1]]},"ISBN":["9783031729515","9783031729522"],"references-count":68,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72952-2_25","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10,1]]},"assertion":[{"value":"1 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}