{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2022,3,30]],"date-time":"2022-03-30T05:52:28Z","timestamp":1648619548207},"reference-count":29,"publisher":"Institute of Electronics, Information and Communications Engineers (IEICE)","issue":"10","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEICE Trans. Inf. &amp; Syst."],"published-print":{"date-parts":[[2018,10,1]]},"DOI":"10.1587\/transinf.2018edp7029","type":"journal-article","created":{"date-parts":[[2018,9,30]],"date-time":"2018-09-30T22:09:47Z","timestamp":1538345387000},"page":"2509-2517","source":"Crossref","is-referenced-by-count":0,"title":["Finding Important People in a Video Using Deep Neural Networks with Conditional Random Fields"],"prefix":"10.1587","volume":"E101.D","author":[{"given":"Mayu","family":"OTANI","sequence":"first","affiliation":[{"name":"Nara Institute of Science and Technology"}],"role":[{"role":"author","vocab":"crossref"}]},{"given":"Atsushi","family":"NISHIDA","sequence":"additional","affiliation":[{"name":"Dai Nippon Printing Co., Ltd."}],"role":[{"role":"author","vocab":"crossref"}]},{"given":"Yuta","family":"NAKASHIMA","sequence":"additional","affiliation":[{"name":"Osaka University"}],"role":[{"role":"author","vocab":"crossref"}]},{"given":"Tomokazu","family":"SATO","sequence":"additional","affiliation":[{"name":"Shiga University"}],"role":[{"role":"author","vocab":"crossref"}]},{"given":"Naokazu","family":"YOKOYA","sequence":"additional","affiliation":[{"name":"Nara Institute of Science and Technology"}],"role":[{"role":"author","vocab":"crossref"}]}],"member":"532","reference":[{"key":"1","doi-asserted-by":"crossref","unstructured":"[1] F. Liu and M. Gleicher, \u201cVideo retargeting: Automating pan and scan,\u201d ACM International Conference on Multimedia (MM), pp.241-250, 2006. 10.1145\/1180639.1180702","DOI":"10.1145\/1180639.1180702"},{"key":"2","doi-asserted-by":"publisher","unstructured":"[2] L. Itti, \u201cAutomatic foveation for video compression using a neurobiological model of visual attention,\u201d IEEE Trans. Image Process., vol.13, no.10, pp.1304-1318, 2004. 10.1109\/tip.2004.834657","DOI":"10.1109\/TIP.2004.834657"},{"key":"3","doi-asserted-by":"publisher","unstructured":"[3] L. Itti, C. Koch, and E. Niebur, \u201cA model of saliency-based visual attention for rapid scene analysis,\u201d IEEE Trans. Pattern Anal. Mach. Intell., vol.20, no.11, pp.1254-1259, 1998. 10.1109\/34.730558","DOI":"10.1109\/34.730558"},{"key":"4","doi-asserted-by":"publisher","unstructured":"[4] Y.-F. Ma, X.-S. Hua, L. Lu, and H.-J. Zhang, \u201cA generic framework of user attention model and its application in video summarization,\u201d IEEE Trans. Multimedia, vol.7, no.5, pp.907-919, 2005. 10.1109\/tmm.2005.854410","DOI":"10.1109\/TMM.2005.854410"},{"key":"5","doi-asserted-by":"publisher","unstructured":"[5] Y. Nakashima, N. Babaguchi, and J. Fan, \u201cIntended human object detection for automatically protecting privacy in mobile video surveillance,\u201d Multimedia Systems, vol.18, no.2, pp.157-173, 2012. 10.1007\/s00530-011-0244-y","DOI":"10.1007\/s00530-011-0244-y"},{"key":"6","doi-asserted-by":"crossref","unstructured":"[6] V. Ramanathan, B. Yao, and L. Fei-Fei, \u201cSocial role discovery in human events,\u201d IEEE Conf. Computer Vision and Pattern Recognition (CVPR), pp.2475-2482, 2013. 10.1109\/cvpr.2013.320","DOI":"10.1109\/CVPR.2013.320"},{"key":"7","doi-asserted-by":"crossref","unstructured":"[7] M. Andriluka, S. Roth, and B. Schiele, \u201cPictorial structures revisited: People detection and articulated pose estimation,\u201d IEEE Conf. Computer Vision and Pattern Recognition (CVPR), pp.1014-1021, 2009. 10.1109\/cvprw.2009.5206754","DOI":"10.1109\/CVPRW.2009.5206754"},{"key":"8","doi-asserted-by":"crossref","unstructured":"[8] Y.-C. Su, D. Jayaraman, and K. Grauman, \u201cPano2Vid: Automatic cinematography for watching 360<sup>\u00b0<\/sup> videos,\u201d Asian Conference on Computer Vision (ACCV), pp.154-171, 2016. 10.1007\/978-3-319-54190-7_10","DOI":"10.1007\/978-3-319-54190-7_10"},{"key":"9","doi-asserted-by":"crossref","unstructured":"[9] L. Itti and P. Baldi, \u201cBayesian surprise attracts human attention,\u201d Vision. Res., vol.49, no.10, pp.1295-1306, 2009. 10.1016\/j.visres.2008.09.007","DOI":"10.1016\/j.visres.2008.09.007"},{"key":"10","doi-asserted-by":"crossref","unstructured":"[10] S. Frintrop, G. Backer, and E. Rome, \u201cGoal-directed search with a top-down modulated computational attention system,\u201d in DAGM Conference on Pattern Recognition, pp.117-124, 2005. 10.1007\/11550518_15","DOI":"10.1007\/11550518_15"},{"key":"11","doi-asserted-by":"publisher","unstructured":"[11] O. Russakovsky, J. Deng, H. Su, J. Krause, S. Satheesh, S. Ma, Z. Huang, A. Karpathy, A. Khosla, M. Bernstein, A.C. Berg, and L. Fei-Fei, \u201cImageNet large scale visual recognition challenge,\u201d International Journal of Computer Vision, vol.115, no.3, pp.211-252, 2015. 10.1007\/s11263-015-0816-y","DOI":"10.1007\/s11263-015-0816-y"},{"key":"12","doi-asserted-by":"crossref","unstructured":"[12] T.-Y. Lin, M. Maire, S. Belongie, J. Hays, P. Perona, D. Ramanan, P. Doll\u00e1r, and C.L. Zitnick, \u201cMicrosoft COCO: Common objects in context,\u201d European Conference on Computer Vision (ECCV), pp.740-755, 2014. 10.1007\/978-3-319-10602-1_48","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"13","unstructured":"[13] K. Simonyan and A. Zisserman, \u201cVery deep convolutional networks for large-scale image recoginition,\u201d International Conference on on Learning Representations (ICLR), pp.1-14, 2015."},{"key":"14","doi-asserted-by":"crossref","unstructured":"[14] C. Szegedy, W. Liu, Y. Jia, P. Sermanet, S. Reed, D. Anguelov, D. Erhan, V. Vanhoucke, and A. Rabinovich, \u201cGoing deeper with convolutions,\u201d IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp.1-9, 2015. 10.1109\/cvpr.2015.7298594","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"15","doi-asserted-by":"crossref","unstructured":"[15] K. He, X. Zhang, S. Ren, and J. Sun, \u201cDeep residual learning for image recognition,\u201d IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp.770-778, 2016. 10.1109\/cvpr.2016.90","DOI":"10.1109\/CVPR.2016.90"},{"key":"16","doi-asserted-by":"crossref","unstructured":"[16] X. Liang, X. Shen, J. Feng, L. Lin, and S. Yan, \u201cSemantic object parsing with graph LSTM,\u201d European Conference on Computer Vision (ECCV), pp.125-143, 2016. 10.1007\/978-3-319-46448-0_8","DOI":"10.1007\/978-3-319-46448-0_8"},{"key":"17","doi-asserted-by":"crossref","unstructured":"[17] S. Zheng, S. Jayasumana, B. Romera-Paredes, V. Vineet, Z. Su, D. Du, C. Huang, and P.H.S. Torr, \u201cConditional random fields as recurrent neural networks,\u201d IEEE International Conference on Computer Vision (ICCV), pp.1529-1537, 2015. 10.1109\/iccv.2015.179","DOI":"10.1109\/ICCV.2015.179"},{"key":"18","doi-asserted-by":"crossref","unstructured":"[18] A. Arnab, S. Jayasumana, S. Zheng, and P.H.S. Torr, \u201cHigher order conditional random fields in deep neural networks,\u201d European Conference on Computer Vision (ECCV), pp.524-540, 2016. 10.1007\/978-3-319-46475-6_33","DOI":"10.1007\/978-3-319-46475-6_33"},{"key":"19","doi-asserted-by":"crossref","unstructured":"[19] S. Chandra and I. Kokkinos, \u201cFast, exact and multi-scale inference for semantic image segmentation with deep gaussian CRFs,\u201d European Conference on Computer Vision (ECCV), pp.402-418, 2016. 10.1007\/978-3-319-46478-7_25","DOI":"10.1007\/978-3-319-46478-7_25"},{"key":"20","doi-asserted-by":"publisher","unstructured":"[20] Y. Nakashima, N. Babaguchi, and J. Fan, \u201cPrivacy protection for social video via background estimation and CRF-based videographer&apos;s intention modeling,\u201d IEICE Transactions on on Information and Systems, vol.E99-D, no.4, pp.1221-1233, 2016. 10.1587\/transinf.2015edp7378","DOI":"10.1587\/transinf.2015EDP7378"},{"key":"21","doi-asserted-by":"crossref","unstructured":"[21] X. Ma and E. Hovy, \u201cEnd-to-end sequence labeling via bi-directional LSTM-CNNs-CRF,\u201d Annual Meeting of the Association for Computational Linguistics (ACL), pp.1064-1074, 2016. 10.18653\/v1\/p16-1101","DOI":"10.18653\/v1\/P16-1101"},{"key":"22","doi-asserted-by":"crossref","unstructured":"[22] X. Chu, W. Ouyang, H. Li, and X. Wang, \u201cCRF-CNN: Modeling structured information in human pose estimation,\u201d Conference on Neural Information Processing Systems (NIPS), pp.316-324, 2016.","DOI":"10.1109\/CVPR.2016.510"},{"key":"23","doi-asserted-by":"crossref","unstructured":"[23] F. Schroff, D. Kalenichenko, and J. Philbin, \u201cFaceNet: A unified embedding for face recognition and clustering,\u201d IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp.815-823, 2015. 10.1109\/cvpr.2015.7298682","DOI":"10.1109\/CVPR.2015.7298682"},{"key":"24","doi-asserted-by":"publisher","unstructured":"[24] G.E. Hinton, \u201cTraining products of experts by minimizing contrastive divergence,\u201d Neural Computation, vol.14, no.8, pp.1771-1800, 2002. 10.1162\/089976602760128018","DOI":"10.1162\/089976602760128018"},{"key":"25","doi-asserted-by":"crossref","unstructured":"[25] A. Kirillov, D. Schlesinger, S. Zheng, B. Savchynskyy, P.H.S. Torr, and C. Rother, \u201cJoint training of generic CNN-CRF models with stochastic optimization,\u201d Asian Conference on Computer Vision (ACCV), pp.221-236, 2016. 10.1007\/978-3-319-54184-6_14","DOI":"10.1007\/978-3-319-54184-6_14"},{"key":"26","unstructured":"[26] D. Kingma and J. Ba, \u201cAdam: A method for stochastic optimization,\u201d International Conference on Learning Representations (ICLR), 13 pages, 2015."},{"key":"27","unstructured":"[27] G.E. Hinton, N. Srivastava, A. Krizhevsky, I. Sutskever, and R.R. Salakhutdinov, \u201cImproving neural networks by preventing co-adaptation of feature detectors,\u201d arXiv preprint arXiv:1207.0580, 18 pages, 2012."},{"key":"28","unstructured":"[28] S. Tokui, K. Oono, S. Hido, and J. Clayton, \u201cChainer: A next-generation open source framework for deep learning,\u201d Conference on Neural Information Processing Systems (NIPS), 6 pages, 2015."},{"key":"29","doi-asserted-by":"crossref","unstructured":"[29] J.F. Henriques, R. Caseiro, P. Martins, and J. Batista, \u201cExploiting the circulant structure of tracking-by-detection with kernels,\u201d European Conference on Computer Vision (ECCV), pp.702-715, 2012. 10.1007\/978-3-642-33765-9_50","DOI":"10.1007\/978-3-642-33765-9_50"}],"container-title":["IEICE Transactions on Information and Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/transinf\/E101.D\/10\/E101.D_2018EDP7029\/_pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,10,25]],"date-time":"2019-10-25T03:54:11Z","timestamp":1571975651000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/transinf\/E101.D\/10\/E101.D_2018EDP7029\/_article"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,10,1]]},"references-count":29,"journal-issue":{"issue":"10","published-print":{"date-parts":[[2018]]}},"URL":"https:\/\/doi.org\/10.1587\/transinf.2018edp7029","relation":{},"ISSN":["0916-8532","1745-1361"],"issn-type":[{"value":"0916-8532","type":"print"},{"value":"1745-1361","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,10,1]]}}}