{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,12]],"date-time":"2025-11-12T03:26:51Z","timestamp":1762918011879,"version":"3.37.3"},"reference-count":52,"publisher":"Springer Science and Business Media LLC","issue":"19","license":[{"start":{"date-parts":[[2019,7,8]],"date-time":"2019-07-08T00:00:00Z","timestamp":1562544000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2019,7,8]],"date-time":"2019-07-08T00:00:00Z","timestamp":1562544000000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2019,10]]},"DOI":"10.1007\/s11042-019-07888-4","type":"journal-article","created":{"date-parts":[[2019,7,8]],"date-time":"2019-07-08T08:04:21Z","timestamp":1562573061000},"page":"28189-28208","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["YADA: you always dream again for better object detection"],"prefix":"10.1007","volume":"78","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4237-2737","authenticated-orcid":false,"given":"Khanh-Duy","family":"Nguyen","sequence":"first","affiliation":[]},{"given":"Khang","family":"Nguyen","sequence":"additional","affiliation":[]},{"given":"Duy-Dinh","family":"Le","sequence":"additional","affiliation":[]},{"given":"Duc Anh","family":"Duong","sequence":"additional","affiliation":[]},{"given":"Tam V.","family":"Nguyen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,7,8]]},"reference":[{"key":"7888_CR1","doi-asserted-by":"crossref","unstructured":"Cheng G, Zhou P, Han J (2016) Rifd-cnn: rotation-invariant and fisher discriminative convolutional neural networks for object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2884\u20132893","DOI":"10.1109\/CVPR.2016.315"},{"issue":"1","key":"7888_CR2","doi-asserted-by":"publisher","first-page":"265","DOI":"10.1109\/TIP.2018.2867198","volume":"28","author":"G Cheng","year":"2019","unstructured":"Cheng G, Han J, Zhou P, Xu D (2019) Learning rotation-invariant and fisher discriminative convolutional neural networks for object detection. IEEE Trans Image Process 28(1):265\u2013278","journal-title":"IEEE Trans Image Process"},{"key":"7888_CR3","doi-asserted-by":"crossref","unstructured":"Chu M, Wu S, Gu Y, Xu Y (2017) Rich features and precise localization with region proposal network for object detection. In: Chinese Conference on biometric recognition. Springer, pp 605\u2013614","DOI":"10.1007\/978-3-319-69923-3_65"},{"key":"7888_CR4","unstructured":"Dai J, Li Y, He K, Sun J (2016) R-fcn: object detection via region-based fully convolutional networks. In: Advances in neural information processing systems, pp 379\u2013387"},{"key":"7888_CR5","doi-asserted-by":"crossref","unstructured":"Dalal N, Triggs B (2005) Histograms of oriented gradients for human detection. In: 2005 IEEE Computer society conference on computer vision and pattern recognition (CVPR 2005), pp 886\u2013893","DOI":"10.1109\/CVPR.2005.177"},{"key":"7888_CR6","doi-asserted-by":"crossref","unstructured":"Dwibedi D, Misra I, Hebert M (2017) Cut, paste and learn: surprisingly easy synthesis for instance detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1301\u20131310","DOI":"10.1109\/ICCV.2017.146"},{"issue":"2","key":"7888_CR7","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham M, Gool LJV, Williams CKI, Winn JM, Zisserman A (2010) The pascal visual object classes (VOC) challenge. Int J Comput Vis 88(2):303\u2013338","journal-title":"Int J Comput Vis"},{"issue":"1","key":"7888_CR8","doi-asserted-by":"publisher","first-page":"98","DOI":"10.1007\/s11263-014-0733-5","volume":"111","author":"M Everingham","year":"2015","unstructured":"Everingham M, Eslami SA, Van Gool L, Williams CK, Winn J, Zisserman A (2015) The pascal visual object classes challenge: a retrospective. Int J Comput Vis 111(1):98\u2013136","journal-title":"Int J Comput Vis"},{"key":"7888_CR9","doi-asserted-by":"crossref","unstructured":"Felzenszwalb PF, McAllester DA, Ramanan D (2008) A discriminatively trained, multiscale, deformable part model. In: 2008 IEEE Computer society conference on computer vision and pattern recognition","DOI":"10.1109\/CVPR.2008.4587597"},{"key":"7888_CR10","unstructured":"Gaidon A, Wang Q, Cabon Y, Vig E (2016) Virtual worlds as proxy for multi-object tracking analysis. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4340\u20134349"},{"key":"7888_CR11","doi-asserted-by":"crossref","unstructured":"Geiger A, Lenz P, Urtasun R (2012) Are we ready for autonomous driving? the kitti vision benchmark suite. In: 2012 IEEE conference on computer vision and pattern recognition (CVPR), pp 3354\u20133361","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"7888_CR12","doi-asserted-by":"crossref","unstructured":"Girshick R (2015) Fast R-CNN. In: 2015 IEEE international conference on computer vision, pp 1440\u20131448","DOI":"10.1109\/ICCV.2015.169"},{"key":"7888_CR13","doi-asserted-by":"crossref","unstructured":"Girshick R, Donahue J, Darrell T, Malik J (2014) Rich feature hierarchies for accurate object detection and semantic segmentation. In: 2014 IEEE Conference on computer vision and pattern recognition, pp 580\u2013587","DOI":"10.1109\/CVPR.2014.81"},{"key":"7888_CR14","doi-asserted-by":"crossref","unstructured":"Gupta S, Girshick R, Arbel\u00e1ez P, Malik J (2014) Learning rich features from rgb-d images for object detection and segmentation. In: European conference on computer vision. Springer, pp 345\u2013360","DOI":"10.1007\/978-3-319-10584-0_23"},{"issue":"1","key":"7888_CR15","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1109\/MSP.2017.2749125","volume":"35","author":"J Han","year":"2018","unstructured":"Han J, Zhang D, Cheng G, Liu N, Xu D (2018) Advanced deep-learning techniques for salient and category-specific object detection: a survey. IEEE Signal Process Mag 35(1):84\u2013100","journal-title":"IEEE Signal Process Mag"},{"key":"7888_CR16","doi-asserted-by":"crossref","unstructured":"Handa A, P\u0103tr\u0103ucean V, Stent S, Cipolla R (2016) Scenenet: an annotated model generator for indoor scene understanding. In: 2016 IEEE International conference on robotics and automation (ICRA). IEEE, pp 5737\u20135743","DOI":"10.1109\/ICRA.2016.7487797"},{"key":"7888_CR17","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"7888_CR18","doi-asserted-by":"crossref","unstructured":"He K, Gkioxari G, Doll\u00e1r P, Girshick R (2017) Mask r-cnn. In: 2017 IEEE International conference on computer vision (ICCV). IEEE, pp 2980\u20132988","DOI":"10.1109\/ICCV.2017.322"},{"key":"7888_CR19","doi-asserted-by":"crossref","unstructured":"Huang G, Liu Z, Weinberger KQ, van der Maaten L (2017) Densely connected convolutional networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, vol 1, p 3","DOI":"10.1109\/CVPR.2017.243"},{"key":"7888_CR20","unstructured":"Iandola FN, Han S, Moskewicz MW, Ashraf K, Dally WJ, Keutzer K (2016) Squeezenet: Alexnet-level accuracy with 50x fewer parameters and <\u20090.5 mb model size. arXiv: 1602.07360"},{"key":"7888_CR21","unstructured":"Johnson-Roberson M, Barto C, Mehta R, Sridhar SN, Rosaen K, Vasudevan R (2017) Driving in the matrix: can virtual worlds replace human-generated annotations for real world tasks? In: 2017 IEEE International conference on robotics and automation (ICRA). IEEE, pp 746\u2013753"},{"issue":"2","key":"7888_CR22","doi-asserted-by":"publisher","first-page":"246","DOI":"10.1006\/ccog.1994.1014","volume":"3","author":"TL Kahan","year":"1994","unstructured":"Kahan TL, LaBerge S (1994) Lucid dreaming as metacognition: implications for cognitive science. Consciousness Cogn 3(2):246\u2013264","journal-title":"Consciousness Cogn"},{"key":"7888_CR23","doi-asserted-by":"publisher","first-page":"206","DOI":"10.1016\/j.jvcir.2019.02.020","volume":"60","author":"N Khanh-Duy","year":"2019","unstructured":"Khanh-Duy N, Khang N, Duy-Dinh L, Duc A D, Tam V N (2019) You always look again: Learning to detect the unseen objects. J. Vis. Commun. Image Represent. 60:206\u2013216","journal-title":"J. Vis. Commun. Image Represent."},{"key":"7888_CR24","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2012) Imagenet classification with deep convolutional neural networks. In: Advances in neural information processing systems, pp 1097\u20131105"},{"key":"7888_CR25","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2012) Imagenet classification with deep convolutional neural networks.In: Advances in neural information processing systems, pp 1106\u20131114"},{"key":"7888_CR26","unstructured":"Lin T-Y, Goyal P, Girshick R, He K, Doll\u00e1r P (2017) Focal loss for dense object detection. arXiv: 1708.02002"},{"key":"7888_CR27","doi-asserted-by":"crossref","unstructured":"Liu W, Anguelov D, Erhan D, Szegedy C, Reed S, Fu CY, Berg AC (2016) Ssd: single shot multibox detector. In: 14th European conference on computer vision, ECCV 2016. Springer","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"7888_CR28","doi-asserted-by":"crossref","unstructured":"Mahajan D, Girshick R, Ramanathan V, He K, Paluri M, Li Y, Bharambe A, van der Maaten L (2018) Exploring the limits of weakly supervised pretraining. arXiv: 1805.00932","DOI":"10.1007\/978-3-030-01216-8_12"},{"key":"7888_CR29","doi-asserted-by":"crossref","unstructured":"Peng X, Sun B, Ali K, Saenko K (2015) Learning deep object detectors from 3d models. In: Proceedings of the IEEE international conference on computer vision, pp 1278\u20131286","DOI":"10.1109\/ICCV.2015.151"},{"key":"7888_CR30","doi-asserted-by":"crossref","unstructured":"Redmon J, Farhadi A (2017) Yolo9000: better, faster, stronger. arXiv preprint","DOI":"10.1109\/CVPR.2017.690"},{"key":"7888_CR31","unstructured":"Redmon J, Farhadi A (2018) Yolov3: an incremental improvement. arXiv: 1804.02767"},{"key":"7888_CR32","doi-asserted-by":"crossref","unstructured":"Redmon J, Divvala S, Girshick R, Farhadi A (2016) You only look once: unified, real-time object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 779\u2013788","DOI":"10.1109\/CVPR.2016.91"},{"key":"7888_CR33","unstructured":"Ren S, He K, Girshick R, Sun J (2015) Faster R-CNN: towards real-time object detection with region proposal networks. In: Proceedings of advances in neural information processing systems, pp 91\u201399"},{"key":"7888_CR34","unstructured":"Rolnick D, Tegmark M (2017) The power of deeper networks for expressing natural functions. arXiv: 1705.05502"},{"key":"7888_CR35","doi-asserted-by":"crossref","unstructured":"Ros G, Sellart L, Materzynska J, Vazquez D, Lopez AM (2016) The synthia dataset: a large collection of synthetic images for semantic segmentation of urban scenes. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 3234\u20133243","DOI":"10.1109\/CVPR.2016.352"},{"key":"7888_CR36","doi-asserted-by":"crossref","unstructured":"Shrivastava A, Gupta A, Girshick R (2016) Training region-based object detectors with online hard example mining. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 761\u2013769","DOI":"10.1109\/CVPR.2016.89"},{"key":"7888_CR37","unstructured":"Simonyan K, Zisserman A (2014) Very deep convolutional networks for large-scale image recognition. arXiv: 1409.1556"},{"key":"7888_CR38","unstructured":"Singh B, Najibi M, Davis LS (2018) Sniper: efficient multi-scale training. In: Advances in neural information processing systems, pp 9310\u20139320"},{"key":"7888_CR39","doi-asserted-by":"crossref","unstructured":"Szegedy C, Liu W, Jia Y, Sermanet P, Reed S, Anguelov D, Erhan D, Vanhoucke V, Rabinovich A (2015) Going deeper with convolutions. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1\u20139","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"7888_CR40","doi-asserted-by":"crossref","unstructured":"Szegedy C, Vanhoucke V, Ioffe S, Shlens J, Wojna Z (2016) Rethinking the inception architecture for computer vision. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2818\u20132826","DOI":"10.1109\/CVPR.2016.308"},{"key":"7888_CR41","doi-asserted-by":"crossref","unstructured":"Szegedy C, Ioffe S, Vanhoucke V, Alemi AA (2017) Inception-v4, inception-resnet and the impact of residual connections on learning. In: AAAI, vol 4, p 12","DOI":"10.1609\/aaai.v31i1.11231"},{"key":"7888_CR42","unstructured":"Tam V N, Luoqi L, Khang N (2016) Exploiting generic multi-level convolutional neural networks for scene understanding. In: ICARCV, pp 1\u20136"},{"issue":"6","key":"7888_CR43","doi-asserted-by":"publisher","first-page":"3130","DOI":"10.1109\/TIP.2019.2894284","volume":"28","author":"VN Tam","year":"2019","unstructured":"Tam V N, Khanh N, Thanh-Toan D (2019) Semantic Prior Analysis for Salient Object Detection. IEEE Trans. Image Processing 28(6):3130\u20133141","journal-title":"IEEE Trans. Image Processing"},{"issue":"1","key":"7888_CR44","doi-asserted-by":"publisher","first-page":"86","DOI":"10.1007\/s11263-017-1042-6","volume":"126","author":"VN Tam","year":"2018","unstructured":"Tam V N, Qi Z, Shuicheng Y (2018) Attentive Systems: A Survey. Int. J. Comput. Vis. 126(1):86\u2013110","journal-title":"Int. J. Comput. Vis."},{"key":"7888_CR45","doi-asserted-by":"crossref","unstructured":"Tremblay J, Prakash A, Acuna D, Brophy M, Jampani V, Anil C, To T, Cameracci E, Boochoon S, Birchfield S (2018) Training deep networks with synthetic data: bridging the reality gap by domain randomization. arXiv: 1804.06516","DOI":"10.1109\/CVPRW.2018.00143"},{"key":"7888_CR46","doi-asserted-by":"crossref","unstructured":"Van de Sande KE, Uijlings JR, Gevers T, Smeulders AW (2011) Segmentation as selective search for object recognition. In: 2011 IEEE International conference on computer vision (ICCV). IEEE, pp 1879\u20131886","DOI":"10.1109\/ICCV.2011.6126456"},{"key":"7888_CR47","doi-asserted-by":"crossref","unstructured":"Varol G, Romero J, Martin X, Mahmood N, Black MJ, Laptev I, Schmid C (2017) Learning from synthetic humans. In: 2017 IEEE Conference on computer vision and pattern recognition (CVPR 2017). IEEE, pp 4627\u20134635","DOI":"10.1109\/CVPR.2017.492"},{"issue":"2","key":"7888_CR48","doi-asserted-by":"publisher","first-page":"137","DOI":"10.1023\/B:VISI.0000013087.49260.fb","volume":"57","author":"PA Viola","year":"2004","unstructured":"Viola PA, Jones MJ (2004) Robust real-time face detection. Int J Comput Vis 57(2):137\u2013154","journal-title":"Int J Comput Vis"},{"issue":"10","key":"7888_CR49","doi-asserted-by":"publisher","first-page":"2071","DOI":"10.1109\/TPAMI.2015.2389830","volume":"37","author":"X Wang","year":"2015","unstructured":"Wang X, Yang M, Zhu S, Lin Y (2015) Regionlets for generic object detection. IEEE Trans Pattern Anal Mach Intell 37(10):2071\u20132084","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"7888_CR50","unstructured":"Zhang D, Han J, Yang L, Xu D (2018) Spftn: a joint learning framework for localizing and segmenting objects in weakly labeled videos. IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"4","key":"7888_CR51","doi-asserted-by":"publisher","first-page":"363","DOI":"10.1007\/s11263-018-1112-4","volume":"127","author":"D Zhang","year":"2019","unstructured":"Zhang D, Han J, Zhao L, Meng D (2019) Leveraging prior-knowledge for weakly supervised object detection under a collaborative self-paced curriculum learning framework. Int J Comput Vis 127(4):363\u2013380","journal-title":"Int J Comput Vis"},{"key":"7888_CR52","unstructured":"Zhou Z-H, Feng J (2017) Deep forest: towards an alternative to deep neural networks: arXiv: 1702.08835"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-019-07888-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11042-019-07888-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-019-07888-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,9,23]],"date-time":"2022-09-23T04:10:58Z","timestamp":1663906258000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11042-019-07888-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,7,8]]},"references-count":52,"journal-issue":{"issue":"19","published-print":{"date-parts":[[2019,10]]}},"alternative-id":["7888"],"URL":"https:\/\/doi.org\/10.1007\/s11042-019-07888-4","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"type":"print","value":"1380-7501"},{"type":"electronic","value":"1573-7721"}],"subject":[],"published":{"date-parts":[[2019,7,8]]},"assertion":[{"value":"12 September 2018","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 April 2019","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 June 2019","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 July 2019","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}