{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:24:39Z","timestamp":1750220679818,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":43,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,3,7]],"date-time":"2021-03-07T00:00:00Z","timestamp":1615075200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Beijing Nova Program from Beijing Municipal Science and Technology Commission","award":["Z201100006820123"],"award-info":[{"award-number":["Z201100006820123"]}]},{"name":"Natural Science Foundation of China","award":["U1536203, 61972169"],"award-info":[{"award-number":["U1536203, 61972169"]}]},{"name":"Major Scientific and Technological Project of Hubei Province","award":["2018AAA068, 2019AAA051"],"award-info":[{"award-number":["2018AAA068, 2019AAA051"]}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2016QY01W0200"],"award-info":[{"award-number":["2016QY01W0200"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,3,7]]},"DOI":"10.1145\/3444685.3446304","type":"proceedings-article","created":{"date-parts":[[2021,5,4]],"date-time":"2021-05-04T04:48:41Z","timestamp":1620103721000},"page":"1-6","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Cross-modal learning for saliency prediction in mobile environment"],"prefix":"10.1145","author":[{"given":"Dakai","family":"Ren","sequence":"first","affiliation":[{"name":"Beijing University of Posts and Telecommunications, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiangming","family":"Wen","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaoya","family":"Liu","sequence":"additional","affiliation":[{"name":"Xinyang Vocational and Technical College, Xinyang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shuai","family":"Huang","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiazhong","family":"Chen","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2021,5,3]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2017.2777665"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"crossref","unstructured":"A. Borji and L. Itti. 2012. Exploiting local and global patch rarities for saliency detection. In CVPR. 478--485. A. Borji and L. Itti. 2012. Exploiting local and global patch rarities for saliency detection. In CVPR. 478--485.","DOI":"10.1109\/CVPR.2012.6247711"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"crossref","unstructured":"N. Bruce C. Catton and S. Janjic. 2016. A deeper look at saliency: Feature contrast semantics and beyond. In CVPR. 516--524. N. Bruce C. Catton and S. Janjic. 2016. A deeper look at saliency: Feature contrast semantics and beyond. In CVPR. 516--524.","DOI":"10.1109\/CVPR.2016.62"},{"key":"e_1_3_2_1_4_1","unstructured":"N. Bruce and K. Tsotsos. 2005. Saliency based on information maximization. In NIPS. 155--162. N. Bruce and K. Tsotsos. 2005. Saliency based on information maximization. In NIPS. 155--162."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"crossref","unstructured":"J. Chen Y. Li Y. Fan W. Wu X. Wang H. Cao and Y. Chen. 2016. Investigation of mobile surroundings for visual attention based on image perception model. In VCIP. 1--4. J. Chen Y. Li Y. Fan W. Wu X. Wang H. Cao and Y. Chen. 2016. Investigation of mobile surroundings for visual attention based on image perception model. In VCIP. 1--4.","DOI":"10.1109\/VCIP.2016.7805488"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"M. Cornia L. Baraldi G. Serra and R. Cucchiara. 2016. A deep multilevel network for saliency prediction. In ICPR. 3488--3493. M. Cornia L. Baraldi G. Serra and R. Cucchiara. 2016. A deep multilevel network for saliency prediction. In ICPR. 3488--3493.","DOI":"10.1109\/ICPR.2016.7900174"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2018.2851672"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1167\/13.4.11"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2011.2169775"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2014.2336549"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2011.11.007"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2009.2030969"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"crossref","unstructured":"S. Gupta J. Hoffman and J. Malik. 2016. Cross modal distillation for supervision transfer. In CVPR. 2827--2836. S. Gupta J. Hoffman and J. Malik. 2016. Cross modal distillation for supervision transfer. In CVPR. 2827--2836.","DOI":"10.1109\/CVPR.2016.309"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2015.2404432"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"crossref","unstructured":"J. Harel C. Koch and P. Perona. 2007. Graph-based visual saliency. In NIPS. 545--552. J. Harel C. Koch and P. Perona. 2007. Graph-based visual saliency. In NIPS. 545--552.","DOI":"10.7551\/mitpress\/7503.003.0073"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2011.146"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"crossref","unstructured":"X. Huang Y. Peng and M. Yuan. 2017. Cross-modal common representation learning by hybrid transfer network. In IJCAI. 1893--1900. X. Huang Y. Peng and M. Yuan. 2017. Cross-modal common representation learning by hybrid transfer network. In IJCAI. 1893--1900.","DOI":"10.24963\/ijcai.2017\/263"},{"key":"e_1_3_2_1_18_1","volume-title":"SALICON: Reducing the semantic gap in saliency prediction by adapting deep neural networks. In ICCV. 262--270.","author":"Huang X.","year":"2015","unstructured":"X. Huang , C. Shen , X. Boix , and Q. Zhao . 2015 . SALICON: Reducing the semantic gap in saliency prediction by adapting deep neural networks. In ICCV. 262--270. X. Huang, C. Shen, X. Boix, and Q. Zhao. 2015. SALICON: Reducing the semantic gap in saliency prediction by adapting deep neural networks. In ICCV. 262--270."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/34.730558"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"crossref","unstructured":"S. Jetley N. Murray and E. Vig. 2016. End-to-End Saliency Mapping via Probability Distribution Prediction. In CVPR. 5753--5760. S. Jetley N. Murray and E. Vig. 2016. End-to-End Saliency Mapping via Probability Distribution Prediction. In CVPR. 5753--5760.","DOI":"10.1109\/CVPR.2016.620"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"crossref","unstructured":"T. Judd K. Ehinger F. Durand and A. Torralba. 2009. Learning to predict where humans look. In ICCV. 2106--2113. T. Judd K. Ehinger F. Durand and A. Torralba. 2009. Learning to predict where humans look. In ICCV. 2106--2113.","DOI":"10.1109\/ICCV.2009.5459462"},{"key":"e_1_3_2_1_22_1","unstructured":"S. Kato C. Boon A. Fujibayashi S. Hangai and T. Hamamoto. 2005. Perceptual quality of motion of video sequences on mobile terminals. In IASTED. 442--447. S. Kato C. Boon A. Fujibayashi S. Hangai and T. Hamamoto. 2005. Perceptual quality of motion of video sequences on mobile terminals. In IASTED. 442--447."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCE.2010.5606241"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2017.2710620"},{"volume-title":"ICLR Workshop. 1--12","author":"K\u00fcmmerer M.","key":"e_1_3_2_1_25_1","unstructured":"M. K\u00fcmmerer , L. Theis , and M. Bethge . 2015. Deep gaze I: Boosting saliency prediction with feature maps trained on ImageNets . In ICLR Workshop. 1--12 . M. K\u00fcmmerer, L. Theis, and M. Bethge. 2015. Deep gaze I: Boosting saliency prediction with feature maps trained on ImageNets. In ICLR Workshop. 1--12."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2567391"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2012.147"},{"key":"e_1_3_2_1_28_1","unstructured":"N. Liu J. Han D. Zhang S. Wen and T. Liu. 2015. Predicting eye fixations using convolutional neural networks. In CVPR. 362--370. N. Liu J. Han D. Zhang S. Wen and T. Liu. 2015. Predicting eye fixations using convolutional neural networks. In CVPR. 362--370."},{"key":"e_1_3_2_1_29_1","volume-title":"K. McGuinness, and N. O'Connor.","author":"Pan J.","year":"2016","unstructured":"J. Pan , E. Sayrol , X. Giro i Nieto , K. McGuinness, and N. O'Connor. 2016 . Shallow and deep convolutional networks for Saliency prediction. In CVPR. 598--606. J. Pan, E. Sayrol, X. Giro i Nieto, K. McGuinness, and N. O'Connor. 2016. Shallow and deep convolutional networks for Saliency prediction. In CVPR. 598--606."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"crossref","unstructured":"N. Riche M. Duvinage M. Mancas B. Gosselin and T. Dutoit. 2014. Saliency and human fixations: State-of-the-art and study of comparison metrics. In ICCV. 1153--1160. N. Riche M. Duvinage M. Mancas B. Gosselin and T. Dutoit. 2014. Saliency and human fixations: State-of-the-art and study of comparison metrics. In ICCV. 1153--1160.","DOI":"10.1109\/ICCV.2013.147"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.image.2013.03.009"},{"key":"e_1_3_2_1_32_1","first-page":"1","article-title":"The big picture on small screens delivering acceptable video quality in mobile TV","volume":"5","author":"Sasse M. A.","year":"2009","unstructured":"M. A. Sasse and H. Knoche . 2009 . The big picture on small screens delivering acceptable video quality in mobile TV . Journal of Vision 5 , 3 (2009), 1 -- 27 . M. A. Sasse and H. Knoche. 2009. The big picture on small screens delivering acceptable video quality in mobile TV. Journal of Vision 5, 3 (2009), 1--27.","journal-title":"Journal of Vision"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"crossref","unstructured":"B. Schauerte and R. Stiefelhagen. 2012. Quaternion-based spectral saliency detection for eye fixation prediction. In ECCV. 116--129. B. Schauerte and R. Stiefelhagen. 2012. Quaternion-based spectral saliency detection for eye fixation prediction. In ECCV. 116--129.","DOI":"10.1007\/978-3-642-33709-3_9"},{"key":"e_1_3_2_1_34_1","unstructured":"K. Simonyan and A. Zisserman. 2015. Very deep convolutional networks for large-scale image recognition. In ICLR. 1--14. K. Simonyan and A. Zisserman. 2015. Very deep convolutional networks for large-scale image recognition. In ICLR. 1--14."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2017.03.018"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"crossref","unstructured":"E. Vig M. Dorr and D. Cox. 2014. Large-scale optimization of hierarchical features for saliency prediction in natural images. In CVPR. 2798--2805. E. Vig M. Dorr and D. Cox. 2014. Large-scale optimization of hierarchical features for saliency prediction in natural images. In CVPR. 2798--2805.","DOI":"10.1109\/CVPR.2014.358"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2017.2787612"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"crossref","unstructured":"W. Wang J. Shen F. Guo M. Cheng and A. Borji. 2018. Revisiting video saliency: A large-scale benchmark and a new model. In CVPR. 4894--4903. W. Wang J. Shen F. Guo M. Cheng and A. Borji. 2018. Revisiting video saliency: A large-scale benchmark and a new model. In CVPR. 4894--4903.","DOI":"10.1109\/CVPR.2018.00514"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"crossref","unstructured":"D. Xu W. Ouyang E. Ricci X. Wang and Nicu Sebe. 2017. Learning cross-modal deep representations for robust pedestrian detection. In CVPR. 4236--4244. D. Xu W. Ouyang E. Ricci X. Wang and Nicu Sebe. 2017. Learning cross-modal deep representations for robust pedestrian detection. In CVPR. 4236--4244.","DOI":"10.1109\/CVPR.2017.451"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1167\/14.1.28"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"crossref","unstructured":"J. Xue and C. Chen. 2012. Mobile JND: environment adapted perceptual model and mobile video quality enhancement. In ACM Multimedia Systems. 173--183. J. Xue and C. Chen. 2012. Mobile JND: environment adapted perceptual model and mobile video quality enhancement. In ACM Multimedia Systems. 173--183.","DOI":"10.1145\/2155555.2155584"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"crossref","unstructured":"J. Zhang and S. Sclaroff. 2013. Saliency detection: A Boolean map approach. In ICCV. 153--160. J. Zhang and S. Sclaroff. 2013. Saliency detection: A Boolean map approach. In ICCV. 153--160.","DOI":"10.1109\/ICCV.2013.26"},{"volume-title":"Proceedings CVPR. IEEE, 153--160","author":"Zhang J.","key":"e_1_3_2_1_43_1","unstructured":"J. Zhang and S. Sclaroff . 2013. Saliency detection: A Boolean map approach . In Proceedings CVPR. IEEE, 153--160 . J. Zhang and S. Sclaroff. 2013. Saliency detection: A Boolean map approach. In Proceedings CVPR. IEEE, 153--160."}],"event":{"name":"MMAsia '20: ACM Multimedia Asia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Virtual Event Singapore","acronym":"MMAsia '20"},"container-title":["Proceedings of the 2nd ACM International Conference on Multimedia in Asia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3444685.3446304","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3444685.3446304","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T22:03:19Z","timestamp":1750197799000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3444685.3446304"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,3,7]]},"references-count":43,"alternative-id":["10.1145\/3444685.3446304","10.1145\/3444685"],"URL":"https:\/\/doi.org\/10.1145\/3444685.3446304","relation":{},"subject":[],"published":{"date-parts":[[2021,3,7]]},"assertion":[{"value":"2021-05-03","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}