{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,16]],"date-time":"2026-05-16T01:59:54Z","timestamp":1778896794219,"version":"3.51.4"},"publisher-location":"Cham","reference-count":59,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031732089","type":"print"},{"value":"9783031732096","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,1]],"date-time":"2024-11-01T00:00:00Z","timestamp":1730419200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,1]],"date-time":"2024-11-01T00:00:00Z","timestamp":1730419200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73209-6_20","type":"book-chapter","created":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T15:02:57Z","timestamp":1730386977000},"page":"342-359","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["A Unified Image Compression Method for\u00a0Human Perception and\u00a0Multiple Vision Tasks"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-9111-4084","authenticated-orcid":false,"given":"Sha","family":"Guo","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7307-0443","authenticated-orcid":false,"given":"Lin","family":"Sui","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3168-1852","authenticated-orcid":false,"given":"Chenlin","family":"Zhang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0563-1760","authenticated-orcid":false,"given":"Zhuo","family":"Chen","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1692-0069","authenticated-orcid":false,"given":"Wenhan","family":"Yang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4491-2023","authenticated-orcid":false,"given":"Lingyu","family":"Duan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,1]]},"reference":[{"key":"20_CR1","doi-asserted-by":"crossref","unstructured":"Bai, Y., et al.: Towards end-to-end image compression and analysis with transformers. In: AAAI, pp. 104\u2013112 (2022)","DOI":"10.1609\/aaai.v36i1.19884"},{"key":"20_CR2","unstructured":"Ball\u00e9, J., Laparra, V., Simoncelli, E.P.: End-to-end optimized image compression. In: ICLR (2017)"},{"key":"20_CR3","unstructured":"Ball\u00e9, J., Minnen, D., Singh, S., Hwang, S.J., Johnston, N.: Variational image compression with a scale hyperprior. In: ICLR (2018)"},{"key":"20_CR4","unstructured":"Bjontegaard, G.: Calculation of average PSNR differences between RD-curves. ITU SG16 Doc. VCEG-M33 (2001)"},{"key":"20_CR5","unstructured":"Blau, Y., Michaeli, T.: Rethinking lossy compression: the rate-distortion-perception tradeoff. In: ICML, pp. 675\u2013685. PMLR (2019)"},{"issue":"10","key":"20_CR6","first-page":"3736","volume":"31","author":"B Bross","year":"2021","unstructured":"Bross, B., et al.: Overview of the versatile video coding (VVC) standard and its applications. IEEE TCSVT 31(10), 3736\u20133764 (2021)","journal-title":"IEEE TCSVT"},{"key":"20_CR7","doi-asserted-by":"crossref","unstructured":"Chamain, L.D., Racap\u00e9, F., B\u00e9gaint, J., Pushparaja, A., Feltman, S.: End-to-end optimized image compression for machines, a study. In: DCC, pp. 163\u2013172. IEEE (2021)","DOI":"10.1109\/DCC50243.2021.00024"},{"key":"20_CR8","first-page":"3179","volume":"30","author":"T Chen","year":"2021","unstructured":"Chen, T., Liu, H., Ma, Z., Shen, Q., Cao, X., Wang, Y.: End-to-end learnt image compression via non-local attention optimization and improved context modeling. IEEE TIP 30, 3179\u20133191 (2021)","journal-title":"IEEE TIP"},{"key":"20_CR9","first-page":"2230","volume":"29","author":"Z Chen","year":"2019","unstructured":"Chen, Z., Fan, K., Wang, S., Duan, L., Lin, W., Kot, A.C.: Toward intelligent sensing: intermediate deep feature compression. IEEE TIP 29, 2230\u20132243 (2019)","journal-title":"IEEE TIP"},{"key":"20_CR10","doi-asserted-by":"crossref","unstructured":"Cheng, B., Misra, I., Schwing, A.G., Kirillov, A., Girdhar, R.: Masked-attention mask transformer for universal image segmentation. In: CVPR, pp. 1290\u20131299 (2022)","DOI":"10.1109\/CVPR52688.2022.00135"},{"key":"20_CR11","doi-asserted-by":"crossref","unstructured":"Cheng, Z., Sun, H., Takeuchi, M., Katto, J.: Learned image compression with discretized gaussian mixture likelihoods and attention modules. In: CVPR, pp. 7939\u20137948 (2020)","DOI":"10.1109\/CVPR42600.2020.00796"},{"key":"20_CR12","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"309","DOI":"10.1007\/978-3-030-58565-5_19","volume-title":"Computer Vision \u2013 ECCV 2020","author":"J Choi","year":"2020","unstructured":"Choi, J., Han, B.: Task-aware quantization network for JPEG image compression. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12365, pp. 309\u2013324. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58565-5_19"},{"key":"20_CR13","unstructured":"Dhariwal, P., Nichol, A.: Diffusion models beat GANs on image synthesis. In: NeurIPS, vol. 34, pp. 8780\u20138794 (2021)"},{"key":"20_CR14","first-page":"8680","volume":"29","author":"L Duan","year":"2020","unstructured":"Duan, L., Liu, J., Yang, W., Huang, T., Gao, W.: Video coding for machines: a paradigm of collaborative compression and intelligent analytics. IEEE TIP 29, 8680\u20138695 (2020)","journal-title":"IEEE TIP"},{"key":"20_CR15","doi-asserted-by":"crossref","unstructured":"Duan, Z., Lu, M., Ma, Z., Zhu, F.: Lossy image compression with quantized hierarchical VAEs. In: WACV, pp. 198\u2013207 (2023)","DOI":"10.1109\/WACV56688.2023.00028"},{"key":"20_CR16","doi-asserted-by":"crossref","unstructured":"Feng, Y., Ji, S., Liu, Y.S., Du, S., Dai, Q., Gao, Y.: Hypergraph-based multi-modal representation for open-set 3d object retrieval. IEEE TPAMI (2023)","DOI":"10.1109\/TPAMI.2023.3332768"},{"key":"20_CR17","doi-asserted-by":"crossref","unstructured":"Ge, X., et al.: Task-aware encoder control for deep video compression. In: CVPR, pp. 26036\u201326045 (2024)","DOI":"10.1109\/CVPR52733.2024.02460"},{"key":"20_CR18","doi-asserted-by":"crossref","unstructured":"Guo, S., Chen, Z., Zhao, Y., Zhang, N., Li, X., Duan, L.: Toward scalable image feature compression: a content-adaptive and diffusion-based approach. In: ACM MM, pp. 1431\u20131442 (2023)","DOI":"10.1145\/3581783.3611851"},{"key":"20_CR19","doi-asserted-by":"crossref","unstructured":"He, D., Yang, Z., Peng, W., Ma, R., Qin, H., Wang, Y.: ELIC: efficient learned image compression with unevenly grouped space-channel contextual adaptive coding. In: CVPR, pp. 5718\u20135727 (2022)","DOI":"10.1109\/CVPR52688.2022.00563"},{"key":"20_CR20","doi-asserted-by":"crossref","unstructured":"He, D., Zheng, Y., Sun, B., Wang, Y., Qin, H.: Checkerboard context model for efficient learned image compression. In: CVPR, pp. 14771\u201314780 (2021)","DOI":"10.1109\/CVPR46437.2021.01453"},{"key":"20_CR21","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., Girshick, R.: Mask R-CNN. In: ICCV, pp. 2961\u20132969 (2017)","DOI":"10.1109\/ICCV.2017.322"},{"key":"20_CR22","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: CVPR, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"20_CR23","doi-asserted-by":"crossref","unstructured":"Hu, Y., Yang, S., Yang, W., Duan, L.Y., Liu, J.: Towards coding for human and machine vision: a scalable image coding approach. In: ICME, pp.\u00a01\u20136. IEEE (2020)","DOI":"10.1109\/ICME46284.2020.9102750"},{"key":"20_CR24","doi-asserted-by":"crossref","unstructured":"Huang, Z., Jia, C., Wang, S., Ma, S.: Visual analysis motivated rate-distortion model for image coding. In: ICME, pp.\u00a01\u20136. IEEE (2021)","DOI":"10.1109\/ICME51207.2021.9428417"},{"key":"20_CR25","doi-asserted-by":"crossref","unstructured":"Kim, Y., et al.: End-to-end learnable multi-scale feature compression for VCM. IEEE TCSVT (2023)","DOI":"10.1109\/TCSVT.2023.3302858"},{"key":"20_CR26","doi-asserted-by":"crossref","unstructured":"Kirillov, A., He, K., Girshick, R., Rother, C., Doll\u00e1r, P.: Panoptic segmentation. In: CVPR, pp. 9404\u20139413 (2019)","DOI":"10.1109\/CVPR.2019.00963"},{"key":"20_CR27","doi-asserted-by":"crossref","unstructured":"Le, N., Zhang, H., Cricri, F., Ghaznavi-Youvalari, R., Rahtu, E.: Image coding for machines: an end-to-end learned approach. In: ICASSP, pp. 1590\u20131594. IEEE (2021)","DOI":"10.1109\/ICASSP39728.2021.9414465"},{"key":"20_CR28","doi-asserted-by":"crossref","unstructured":"Le, N., Zhang, H., Cricri, F., Ghaznavi-Youvalari, R., Tavakoli, H.R., Rahtu, E.: Learned image coding for machines: a content-adaptive approach. In: ICME, pp.\u00a01\u20136. IEEE (2021)","DOI":"10.1109\/ICME51207.2021.9428224"},{"key":"20_CR29","unstructured":"Li, J., Li, D., Xiong, C., Hoi, S.: BLIP: bootstrapping language-image pre-training for unified vision-language understanding and generation. In: ICML, pp. 12888\u201312900. PMLR (2022)"},{"key":"20_CR30","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"632","DOI":"10.1007\/978-3-031-19800-7_37","volume-title":"Computer Vision \u2013 ECCV 2022","author":"M Li","year":"2022","unstructured":"Li, M., Gao, S., Feng, Y., Shi, Y., Wang, J.: Content-oriented learned image compression. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13679, pp. 632\u2013647. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19800-7_37"},{"key":"20_CR31","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Doll\u00e1r, P., Girshick, R., He, K., Hariharan, B., Belongie, S.: Feature pyramid networks for object detection. In: CVPR, pp. 2117\u20132125 (2017)","DOI":"10.1109\/CVPR.2017.106"},{"key":"20_CR32","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"20_CR33","unstructured":"Lin, X., et al.: DiffBIR: towards blind image restoration with generative diffusion prior. arXiv preprint arXiv:2308.15070 (2023)"},{"issue":"9","key":"20_CR34","doi-asserted-by":"publisher","first-page":"2605","DOI":"10.1007\/s11263-021-01491-7","volume":"129","author":"K Liu","year":"2021","unstructured":"Liu, K., Liu, D., Li, L., Yan, N., Li, H.: Semantics-to-signal scalable image compression with learned revertible representations. IJCV 129(9), 2605\u20132621 (2021)","journal-title":"IJCV"},{"key":"20_CR35","doi-asserted-by":"crossref","unstructured":"Liu, L., Hu, Z., Chen, Z., Xu, D.: ICMH-net: neural image compression towards both machine vision and human vision. In: ACM MM, pp. 8047\u20138056 (2023)","DOI":"10.1145\/3581783.3612041"},{"key":"20_CR36","doi-asserted-by":"crossref","unstructured":"Lu, M., Guo, P., Shi, H., Cao, C., Ma, Z.: Transformer-based image compression. In: DCC, p. 469. IEEE (2022)","DOI":"10.1109\/DCC52660.2022.00080"},{"key":"20_CR37","doi-asserted-by":"publisher","first-page":"106022","DOI":"10.1016\/j.resconrec.2021.106022","volume":"178","author":"W Lu","year":"2022","unstructured":"Lu, W., Chen, J., Xue, F.: Using computer vision to recognize composition of construction waste mixtures: a semantic segmentation approach. Resour. Conserv. Recycl. 178, 106022 (2022)","journal-title":"Resour. Conserv. Recycl."},{"key":"20_CR38","unstructured":"Van\u00a0der Maaten, L., Hinton, G.: Visualizing data using t-SNE. J. Mach. Learn. Res. 9(11) (2008)"},{"key":"20_CR39","unstructured":"Mentzer, F., Toderici, G.D., Tschannen, M., Agustsson, E.: High-fidelity generative image compression. In: NeurIPS, vol. 33, pp. 11913\u201311924 (2020)"},{"key":"20_CR40","unstructured":"Minnen, D., Ball\u00e9, J., Toderici, G.D.: Joint autoregressive and hierarchical priors for learned image compression. In: NeurIPS, vol. 31 (2018)"},{"key":"20_CR41","doi-asserted-by":"crossref","unstructured":"Minnen, D., Singh, S.: Channel-wise autoregressive entropy models for learned image compression. In: ICIP, pp. 3339\u20133343. IEEE (2020)","DOI":"10.1109\/ICIP40778.2020.9190935"},{"key":"20_CR42","unstructured":"M\u00fcller-Franzes, G., et\u00a0al.: Diffusion probabilistic models beat GANs on medical images. arXiv preprint arXiv:2212.07501 (2022)"},{"key":"20_CR43","volume-title":"JPEG: Still Image Data Compression Standard","author":"WB Pennebaker","year":"1992","unstructured":"Pennebaker, W.B., Mitchell, J.L.: JPEG: Still Image Data Compression Standard. Springer, Heidelberg (1992)"},{"key":"20_CR44","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. In: NeurIPS, vol. 28 (2015)"},{"key":"20_CR45","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: CVPR, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"20_CR46","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"234","DOI":"10.1007\/978-3-319-24574-4_28","volume-title":"Medical Image Computing and Computer-Assisted Intervention \u2013 MICCAI 2015","author":"O Ronneberger","year":"2015","unstructured":"Ronneberger, O., Fischer, P., Brox, T.: U-net: convolutional networks for biomedical image segmentation. In: Navab, N., Hornegger, J., Wells, W.M., Frangi, A.F. (eds.) MICCAI 2015. LNCS, vol. 9351, pp. 234\u2013241. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-24574-4_28"},{"key":"20_CR47","unstructured":"Ruder, S.: An overview of gradient descent optimization algorithms. arXiv preprint arXiv:1609.04747 (2016)"},{"key":"20_CR48","series-title":"Lecture Notes in Electrical Engineering","doi-asserted-by":"publisher","first-page":"271","DOI":"10.1007\/978-981-10-0072-0_35","volume-title":"Advanced Graphic Communications, Packaging Technology and Materials","author":"Z Si","year":"2016","unstructured":"Si, Z., Shen, K.: Research on the WebP image format. In: Xu, M., Yang, L., Ouyang, Y., Ouyang, Y. (eds.) Advanced Graphic Communications, Packaging Technology and Materials. LNEE, vol. 369, pp. 271\u2013277. Springer, Singapore (2016). https:\/\/doi.org\/10.1007\/978-981-10-0072-0_35"},{"key":"20_CR49","doi-asserted-by":"crossref","unstructured":"Strudel, R., Garcia, R., Laptev, I., Schmid, C.: Segmenter: transformer for semantic segmentation. In: ICCV, pp. 7262\u20137272 (2021)","DOI":"10.1109\/ICCV48922.2021.00717"},{"key":"20_CR50","doi-asserted-by":"crossref","unstructured":"Suzuki, S., Takagi, M., Hayase, K., Onishi, T., Shimizu, A.: Image pre-transformation for recognition-aware image compression. In: ICIP, pp. 2686\u20132690. IEEE (2019)","DOI":"10.1109\/ICIP.2019.8803275"},{"key":"20_CR51","unstructured":"Xiang, J., Tian, K., Zhang, J.: MIMT: masked image modeling transformer for video compression. In: ICLR (2022)"},{"key":"20_CR52","doi-asserted-by":"crossref","unstructured":"Yang, M., Yu, K., Zhang, C., Li, Z., Yang, K.: DenseASPP for semantic segmentation in street scenes. In: CVPR, pp. 3684\u20133692 (2018)","DOI":"10.1109\/CVPR.2018.00388"},{"key":"20_CR53","doi-asserted-by":"crossref","unstructured":"Yang, W., Huang, H., Hu, Y., Duan, L.Y., Liu, J.: Video coding for machines: compact visual representation compression for intelligent collaborative analytics. IEEE TPAMI (2024)","DOI":"10.1109\/TPAMI.2024.3367293"},{"key":"20_CR54","doi-asserted-by":"crossref","unstructured":"Yoon, C., et al.: MEDO: minimizing effective distortions only for machine-oriented visual feature compression. In: VCIP, pp.\u00a01\u20135. IEEE (2023)","DOI":"10.1109\/VCIP59821.2023.10402661"},{"key":"20_CR55","doi-asserted-by":"crossref","unstructured":"Zamir, A.R., Sax, A., Shen, W., Guibas, L.J., Malik, J., Savarese, S.: Taskonomy: Disentangling task transfer learning. In: CVPR, pp. 3712\u20133722 (2018)","DOI":"10.1109\/CVPR.2018.00391"},{"key":"20_CR56","doi-asserted-by":"crossref","unstructured":"Zeng, H., Peng, S., Li, D.: DeepLabv3+ semantic segmentation model based on feature cross attention mechanism. In: JPCS, p. 012106. IOP Publishing (2020)","DOI":"10.1088\/1742-6596\/1678\/1\/012106"},{"key":"20_CR57","doi-asserted-by":"crossref","unstructured":"Zhu, X., Song, J., Gao, L., Zheng, F., Shen, H.T.: Unified multivariate gaussian mixture for efficient neural image compression. In: CVPR, pp. 17612\u201317621 (2022)","DOI":"10.1109\/CVPR52688.2022.01709"},{"key":"20_CR58","unstructured":"Zhu, Y., Yang, Y., Cohen, T.: Transformer-based transform coding. In: ICLR (2021)"},{"key":"20_CR59","doi-asserted-by":"crossref","unstructured":"Zou, R., Song, C., Zhang, Z.: The devil is in the details: window-based attention for image compression. In: CVPR, pp. 17492\u201317501 (2022)","DOI":"10.1109\/CVPR52688.2022.01697"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73209-6_20","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,8]],"date-time":"2025-04-08T12:28:47Z","timestamp":1744115327000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73209-6_20"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,1]]},"ISBN":["9783031732089","9783031732096"],"references-count":59,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73209-6_20","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,1]]},"assertion":[{"value":"1 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}