{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,2]],"date-time":"2025-12-02T15:03:04Z","timestamp":1764687784653,"version":"3.40.3"},"publisher-location":"Cham","reference-count":38,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030007751"},{"type":"electronic","value":"9783030007768"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-030-00776-8_1","type":"book-chapter","created":{"date-parts":[[2018,9,18]],"date-time":"2018-09-18T08:14:58Z","timestamp":1537258498000},"page":"3-14","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["CodedVision: Towards Joint Image Understanding and Compression via End-to-End Learning"],"prefix":"10.1007","author":[{"given":"Qiu","family":"Shen","sequence":"first","affiliation":[]},{"given":"Juanjuan","family":"Cai","sequence":"additional","affiliation":[]},{"given":"Linfeng","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Haojie","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Tong","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Long","family":"Ye","sequence":"additional","affiliation":[]},{"given":"Zhan","family":"Ma","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,9,19]]},"reference":[{"issue":"11","key":"1_CR1","doi-asserted-by":"publisher","first-page":"4311","DOI":"10.1109\/TSP.2006.881199","volume":"54","author":"M Aharon","year":"2006","unstructured":"Aharon, M., Elad, M., Bruckstein, A.: $$\\rm k$$-svd: an algorithm for designing overcomplete dictionaries for sparse representation. IEEE Trans. Sig. Process. 54(11), 4311\u20134322 (2006). https:\/\/doi.org\/10.1109\/TSP.2006.881199","journal-title":"IEEE Trans. Sig. Process."},{"issue":"4","key":"1_CR2","doi-asserted-by":"publisher","first-page":"355","DOI":"10.1162\/pres.1997.6.4.355","volume":"6","author":"RT Azuma","year":"1997","unstructured":"Azuma, R.T.: A survey of augmented reality. Presence Teleop. Virt. Environ. 6(4), 355\u2013385 (1997)","journal-title":"Presence Teleop. Virt. Environ."},{"issue":"2","key":"1_CR3","doi-asserted-by":"publisher","first-page":"1043","DOI":"10.1007\/s11042-014-2345-z","volume":"75","author":"RV Babu","year":"2016","unstructured":"Babu, R.V., Tom, M., Wadekar, P.: A survey on compressed domain video analysis techniques. Multimedia Tools Appl. 75(2), 1043\u20131078 (2016)","journal-title":"Multimedia Tools Appl."},{"key":"1_CR4","unstructured":"Ball\u00e9, J., Laparra, V., Simoncelli, E.P.: End-to-end optimized image compression. CoRR abs\/1611.01704 (2016). http:\/\/arxiv.org\/abs\/1611.01704"},{"key":"1_CR5","unstructured":"Ball\u00e9, J., Laparra, V., Simoncelli, E.P.: End-to-end optimized image compression. arXiv preprint (2016). arXiv:1611.01704"},{"key":"1_CR6","unstructured":"Bjontegaard, G.: Calculation of average PSNR differences between R-D curves. In: Document VCEG-M33, ITU-T VCEG 13th Meeting (2001)"},{"key":"1_CR7","doi-asserted-by":"crossref","unstructured":"Chen, T., Liu, H., Shen, Q., Yue, T., Cao, X., Ma, Z.: Deepcoder: a deep neural network based video compression. In: Visual Communications and Image Processing (VCIP), 2017, pp. 1\u20134. IEEE (2017)","DOI":"10.1109\/VCIP.2017.8305033"},{"key":"1_CR8","doi-asserted-by":"crossref","unstructured":"Dong, C., Deng, Y., Change Loy, C., Tang, X.: Compression artifacts reduction by a deep convolutional network. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 576\u2013584 (2015)","DOI":"10.1109\/ICCV.2015.73"},{"key":"1_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"184","DOI":"10.1007\/978-3-319-10593-2_13","volume-title":"Computer Vision \u2013 ECCV 2014","author":"C Dong","year":"2014","unstructured":"Dong, C., Loy, C.C., He, K., Tang, X.: Learning a deep convolutional network for image super-resolution. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8692, pp. 184\u2013199. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10593-2_13"},{"issue":"2","key":"1_CR10","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham, M., Van Gool, L., Williams, C.K., Winn, J., Zisserman, A.: The pascal visual object classes (voc) challenge. Int. J. Comput. Vis. 88(2), 303\u2013338 (2010)","journal-title":"Int. J. Comput. Vis."},{"issue":"1","key":"1_CR11","doi-asserted-by":"publisher","first-page":"27","DOI":"10.1109\/TCSVT.2013.2273613","volume":"24","author":"Y Fang","year":"2014","unstructured":"Fang, Y., Lin, W., Chen, Z., Tsai, C.M., Lin, C.W.: A video saliency detection model in compressed domain. IEEE Trans. Circuits Syst. Video Technol. 24(1), 27\u201338 (2014)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"1_CR12","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"issue":"12","key":"1_CR13","doi-asserted-by":"publisher","first-page":"5814","DOI":"10.1109\/TIP.2016.2614132","volume":"25","author":"R Hong","year":"2016","unstructured":"Hong, R., Hu, Z., Wang, R., Wang, M., Tao, D.: Multi-view object retrieval via multi-scale topic models. IEEE Trans. Image Process. 25(12), 5814\u20135827 (2016)","journal-title":"IEEE Trans. Image Process."},{"issue":"3","key":"1_CR14","doi-asserted-by":"publisher","first-page":"1124","DOI":"10.1109\/TIP.2016.2514499","volume":"25","author":"R Hong","year":"2016","unstructured":"Hong, R., Zhang, L., Tao, D.: Unified photo enhancement by discovering aesthetic communities from flickr. IEEE Trans. Image Process. 25(3), 1124\u20131135 (2016)","journal-title":"IEEE Trans. Image Process."},{"issue":"8","key":"1_CR15","doi-asserted-by":"publisher","first-page":"1555","DOI":"10.1109\/TMM.2016.2567071","volume":"18","author":"R Hong","year":"2016","unstructured":"Hong, R., Zhang, L., Zhang, C., Zimmermann, R.: Flickr circles: aesthetic tendency discovery by multi-view regularized topic modeling. IEEE Trans. Multimedia 18(8), 1555\u20131567 (2016)","journal-title":"IEEE Trans. Multimedia"},{"key":"1_CR16","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint (2014). arXiv:1412.6980"},{"key":"1_CR17","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: Imagenet classification with deep convolutional neural networks. In: Advances in Neural Information Processing Systems, pp. 1097\u20131105 (2012)"},{"issue":"7553","key":"1_CR18","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"LeCun, Y., Bengio, Y., Hinton, G.: Deep learning. Nature 521(7553), 436 (2015)","journal-title":"Nature"},{"issue":"1","key":"1_CR19","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1109\/JPROC.2004.839613","volume":"93","author":"DT Lee","year":"2005","unstructured":"Lee, D.T.: Jpeg 2000: retrospective and new developments. Proc. IEEE 93(1), 32\u201341 (2005). https:\/\/doi.org\/10.1109\/JPROC.2004.839613","journal-title":"Proc. IEEE"},{"key":"1_CR20","doi-asserted-by":"publisher","unstructured":"Lowe, D.G.: Object recognition from local scale-invariant features. In: Proceedings of the Seventh IEEE International Conference on Computer Vision, vol. 2, pp. 1150\u20131157 (1999). https:\/\/doi.org\/10.1109\/ICCV.1999.790410","DOI":"10.1109\/ICCV.1999.790410"},{"issue":"4","key":"1_CR21","first-page":"831","volume":"2","author":"ZM Lu","year":"2006","unstructured":"Lu, Z.M., Li, S.Z., Burkhardt, H.: A content-based image retrieval scheme in jpeg compressed domain. Int. J. Innovative Comput. Inf. Control 2(4), 831\u2013839 (2006)","journal-title":"Int. J. Innovative Comput. Inf. Control"},{"key":"1_CR22","volume-title":"Vision: A Computational Investigation into the Human Representation and Processing of Visual Information","author":"D Marr","year":"1982","unstructured":"Marr, D.: Vision: A Computational Investigation into the Human Representation and Processing of Visual Information. Henry Holt and Co., Inc., New York (1982)"},{"key":"1_CR23","doi-asserted-by":"crossref","unstructured":"Nah, S., Kim, T.H., Lee, K.M.: Deep multi-scale convolutional neural network for dynamic scene deblurring. In: CVPR, vol. 1, p. 3 (2017)","DOI":"10.1109\/CVPR.2017.35"},{"issue":"6","key":"1_CR24","doi-asserted-by":"publisher","first-page":"23","DOI":"10.1109\/79.733495","volume":"15","author":"A Ortega","year":"1998","unstructured":"Ortega, A., Ramchandran, K.: Rate-distortion methods for image and video compression. IEEE Sig. Process. Mag. 15(6), 23\u201350 (1998)","journal-title":"IEEE Sig. Process. Mag."},{"key":"1_CR25","doi-asserted-by":"publisher","unstructured":"Pati, Y.C., Rezaiifar, R., Krishnaprasad, P.S.: Orthogonal matching pursuit: recursive function approximation with applications to wavelet decomposition. In: Proceedings of 27th Asilomar Conference on Signals, Systems and Computers, vol. 1, pp. 40\u201344 (1993). https:\/\/doi.org\/10.1109\/ACSSC.1993.342465","DOI":"10.1109\/ACSSC.1993.342465"},{"issue":"1","key":"1_CR26","doi-asserted-by":"publisher","first-page":"2","DOI":"10.1109\/TCSVT.2009.2020253","volume":"20","author":"F Porikli","year":"2010","unstructured":"Porikli, F., Bashir, F., Sun, H.: Compressed domain video object segmentation. IEEE Trans. Circ. Syst. Video Technol. 20(1), 2\u201314 (2010)","journal-title":"IEEE Trans. Circ. Syst. Video Technol."},{"key":"1_CR27","doi-asserted-by":"crossref","unstructured":"Redmon, J., Divvala, S., Girshick, R., Farhadi, A.: You only look once: Unified, real-time object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 779\u2013788 (2016)","DOI":"10.1109\/CVPR.2016.91"},{"key":"1_CR28","doi-asserted-by":"crossref","unstructured":"Shi, W., Caballero, J., Husz\u00e1r, F., Totz, J., Aitken, A.P., Bishop, R., Rueckert, D., Wang, Z.: Real-time single image and video super-resolution using an efficient sub-pixel convolutional neural network. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1874\u20131883 (2016)","DOI":"10.1109\/CVPR.2016.207"},{"issue":"4","key":"1_CR29","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1109\/64.85917","volume":"6","author":"RL Simpson","year":"1991","unstructured":"Simpson, R.L.: Computer vision: an overview. IEEE Expert 6(4), 11\u201315 (1991). https:\/\/doi.org\/10.1109\/64.85917","journal-title":"IEEE Expert"},{"issue":"12","key":"1_CR30","doi-asserted-by":"publisher","first-page":"1649","DOI":"10.1109\/TCSVT.2012.2221191","volume":"22","author":"GJ Sullivan","year":"2012","unstructured":"Sullivan, G.J., Ohm, J.R., Han, W.J., Wiegand, T.: Overview of the high efficiency video coding (HEVC) standard. IEEE Trans. Circuits Syst. Video Technol. 22(12), 1649\u20131668 (2012). https:\/\/doi.org\/10.1109\/TCSVT.2012.2221191","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"1_CR31","unstructured":"Toderici, G., O\u2019Malley, S.M., Hwang, S.J., Vincent, D., Minnen, D., Baluja, S., Covell, M., Sukthankar, R.: Variable rate image compression with recurrent neural networks. CoRR abs\/1511.06085 (2015). http:\/\/arxiv.org\/abs\/1511.06085"},{"key":"1_CR32","doi-asserted-by":"crossref","unstructured":"Toderici, G., Vincent, D., Johnston, N., Hwang, S.J., Minnen, D., Shor, J., Covell, M.: Full resolution image compression with recurrent neural networks. CoRR abs\/1608.05148 (2016). http:\/\/arxiv.org\/abs\/1608.05148","DOI":"10.1109\/CVPR.2017.577"},{"issue":"7","key":"1_CR33","doi-asserted-by":"publisher","first-page":"560","DOI":"10.1109\/TCSVT.2003.815165","volume":"13","author":"T Wiegand","year":"2003","unstructured":"Wiegand, T., Sullivan, G.J., Bjontegaard, G., Luthra, A.: Overview of the h.264\/avc video coding standard. IEEE Trans. Circuits Syst. Video Technol. 13(7), 560\u2013576 (2003)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"1_CR34","doi-asserted-by":"crossref","unstructured":"Xue, Y., Wang, Y.: Video coding using a self-adaptive redundant dictionary consisting of spatial and temporal prediction candidates. In: Proceedings of the IEEE International Conference on Multimedia and Expo (ICME) (2014)","DOI":"10.1109\/ICME.2014.6890314"},{"issue":"5","key":"1_CR35","doi-asserted-by":"publisher","first-page":"1061","DOI":"10.1109\/JSTSP.2011.2135332","volume":"5","author":"J Zepeda","year":"2011","unstructured":"Zepeda, J., Guillemot, C., Kijak, E.: Image compression using sparse representations and the iteration-tuned and aligned dictionary. IEEE. J. Sel. Top. Sign. Process. 5(5), 1061\u20131073 (2011)","journal-title":"IEEE. J. Sel. Top. Sign. Process."},{"issue":"4","key":"1_CR36","doi-asserted-by":"publisher","first-page":"903","DOI":"10.1109\/TMM.2017.2759500","volume":"20","author":"C Zhang","year":"2018","unstructured":"Zhang, C., Cheng, J., Tian, Q.: Multiview label sharing for visual representations and classifications. IEEE Trans. Multimedia 20(4), 903\u2013913 (2018)","journal-title":"IEEE Trans. Multimedia"},{"key":"1_CR37","doi-asserted-by":"crossref","unstructured":"Zhang, C., Liu, J., Tian, Q., Xu, C., Lu, H., Ma, S.: Image classification by non-negative sparse coding, low-rank and sparse decomposition. In: 2011 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 1673\u20131680. IEEE (2011)","DOI":"10.1109\/CVPR.2011.5995484"},{"key":"1_CR38","unstructured":"Zhao, L., He, Z., Cao, W., Zhao, D.: Real-time moving object segmentation and classification from HEVC compressed surveillance video. IEEE Transactions on Circuits and Systems for Video Technology (2016)"}],"container-title":["Lecture Notes in Computer Science","Advances in Multimedia Information Processing \u2013 PCM 2018"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-00776-8_1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,13]],"date-time":"2024-03-13T12:19:08Z","timestamp":1710332348000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-00776-8_1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783030007751","9783030007768"],"references-count":38,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-00776-8_1","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2018]]},"assertion":[{"value":"19 September 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PCM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Pacific Rim Conference on Multimedia","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Hefei","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 September 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 September 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"pcm2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/pcm2018.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}