{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,23]],"date-time":"2025-12-23T10:35:02Z","timestamp":1766486102657},"reference-count":43,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2020,4,24]],"date-time":"2020-04-24T00:00:00Z","timestamp":1587686400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,4,24]],"date-time":"2020-04-24T00:00:00Z","timestamp":1587686400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SN COMPUT. SCI."],"published-print":{"date-parts":[[2020,5]]},"DOI":"10.1007\/s42979-020-00132-z","type":"journal-article","created":{"date-parts":[[2020,4,24]],"date-time":"2020-04-24T16:03:39Z","timestamp":1587744219000},"update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":9,"title":["Benchmarking Deep Learning Models for Classification of Book Covers"],"prefix":"10.1007","volume":"1","author":[{"given":"Adriano","family":"Lucieri","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Huzaifa","family":"Sabir","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shoaib Ahmed","family":"Siddiqui","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Syed Tahseen Raza","family":"Rizvi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Brian Kenji","family":"Iwana","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Seiichi","family":"Uchida","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Andreas","family":"Dengel","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sheraz","family":"Ahmed","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,4,24]]},"reference":[{"key":"132_CR1","doi-asserted-by":"publisher","unstructured":"Afzal MZ, Capobianco S, Malik MI, Marinai S, Breuel TM, Dengel A, Liwicki M. Deepdocclassifier: document classification with deep convolutional neural network. In: 2015 13th international conference on document analysis and recognition (ICDAR); 2015. p. 1111\u20135. https:\/\/doi.org\/10.1109\/ICDAR.2015.7333933.","DOI":"10.1109\/ICDAR.2015.7333933"},{"key":"132_CR2","doi-asserted-by":"crossref","unstructured":"Anderson P, He X, Buehler C, Teney D, Johnson M, Gould S, Zhang L. Bottom-up and top-down attention for image captioning and visual question answering. In: The IEEE conference on computer vision and pattern recognition (CVPR); 2018. vol.\u00a03, p.\u00a06.","DOI":"10.1109\/CVPR.2018.00636"},{"issue":"7","key":"132_CR3","first-page":"2015","volume":"10","author":"S Bach","year":"2015","unstructured":"Bach S, Binder A, Montavon G, Klauschen F, M\u00fcller KR, Samek W. On pixel-wise explanations for non-linear classifier decisions by layer-wise relevance propagation. PLoS ONE. 2015;10(7):2015.","journal-title":"PLoS ONE"},{"key":"132_CR4","doi-asserted-by":"publisher","unstructured":"Buczkowski P, Sobkowicz A, Kozlowski M. Deep learning approaches towards book covers classification. In: International conference on pattern recognition applications and methods (ICPRAM); 2018. p. 309\u201316. https:\/\/doi.org\/10.5220\/0006556103090316.","DOI":"10.5220\/0006556103090316"},{"key":"132_CR5","doi-asserted-by":"crossref","unstructured":"Chen LC, Papandreou G, Schroff F, Adam H. Rethinking atrous convolution for semantic image segmentation; 2017. arXiv:1706.05587.","DOI":"10.1007\/978-3-030-01234-2_49"},{"key":"132_CR6","doi-asserted-by":"crossref","unstructured":"Chiu CC, Sainath TN, Wu Y, Prabhavalkar R, Nguyen P, Chen Z, Kannan A, Weiss RJ, Rao K, Gonina E, et\u00a0al. State-of-the-art speech recognition with sequence-to-sequence models. In: 2018 IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE; 2018. p. 4774\u20138.","DOI":"10.1109\/ICASSP.2018.8462105"},{"key":"132_CR7","doi-asserted-by":"crossref","unstructured":"Cubuk ED, Zoph B, Man\u00e9 D, Vasudevan V, Le QV. Autoaugment: learning augmentation policies from data. CoRR; 2018. arXiv:abs\/1805.09501.","DOI":"10.1109\/CVPR.2019.00020"},{"key":"132_CR8","unstructured":"Goodfellow I, Pouget-Abadie J, Mirza M, Xu B, Warde-Farley D, Ozair S, Courville A, Bengio Y. Generative adversarial nets. In: Advances in neural information processing systems (NIPS); 2014. p. 2672\u201380."},{"key":"132_CR9","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J. Deep residual learning for image recognition. In: The IEEE conference on computer vision and pattern recognition (CVPR); 2016. p. 770\u20138.","DOI":"10.1109\/CVPR.2016.90"},{"key":"132_CR10","doi-asserted-by":"crossref","unstructured":"Hou X, Zhang L. Saliency detection: a spectral residual approach. In: The IEEE conference on computer vision and pattern recognition (CVPR). IEEE; 2007. p. 1\u20138.","DOI":"10.1109\/CVPR.2007.383267"},{"key":"132_CR11","doi-asserted-by":"crossref","unstructured":"Hu J, Shen L, Sun G. Squeeze-and-excitation networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition; 2018. p. 7132\u201341.","DOI":"10.1109\/CVPR.2018.00745"},{"key":"132_CR12","doi-asserted-by":"crossref","unstructured":"Huang G, Liu Z, Van Der\u00a0Maaten L, Weinberger KQ. Densely connected convolutional networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition; 2017. p. 4700\u20138.","DOI":"10.1109\/CVPR.2017.243"},{"key":"132_CR13","unstructured":"Iandola FN, Han S, Moskewicz MW, Ashraf K, Dally WJ, Keutzer K. Squeezenet: alexnet-level accuracy with 50$$\\times$$ fewer parameters and< 0.5 mb model size; 2016. arXiv:1602.07360."},{"key":"132_CR14","unstructured":"Iwana BK, Rizvi STR, Ahmed S, Dengel A, Uchida S. Judging a book by its cover; 2016. arXiv:1610.09204."},{"key":"132_CR15","unstructured":"Jaderberg M, Simonyan K, Zisserman A, et\u00a0al. Spatial transformer networks. In: Advances in neural information processing systems (NIPS); 2015. p. 2017\u201325."},{"key":"132_CR16","doi-asserted-by":"crossref","unstructured":"Jolly S, Iwana BK, Kuroki R, Uchida S. How do convolutional neural networks learn design? In: 2018 24th international conference on pattern recognition (ICPR). IEEE; 2018. p. 1085\u201390.","DOI":"10.1109\/ICPR.2018.8545624"},{"key":"132_CR17","doi-asserted-by":"crossref","unstructured":"Joulin A, Grave E, Bojanowski P, Mikolov T. Bag of tricks for efficient text classification. In: Proceedings of the 15th conference of the European chapter of the association for computational linguistics: volume 2, short papers. Association for Computational Linguistics; 2017. p. 427\u201331.","DOI":"10.18653\/v1\/E17-2068"},{"key":"132_CR18","doi-asserted-by":"crossref","unstructured":"Karayev S, Trentacoste M, Han H, Agarwala A, Darrell T, Hertzmann A, Winnemoeller H. Recognizing image style; 2013. arXiv:1311.3715.","DOI":"10.5244\/C.28.122"},{"key":"132_CR19","unstructured":"Karras T, Aila T, Laine S, Lehtinen J. Progressive growing of gans for improved quality, stability, and variation; 2017. arXiv:1710.10196."},{"key":"132_CR20","unstructured":"Kjartansson S, Ashavsky A. Can you judge a book by its cover? Stanford CS231N; 2017. http:\/\/cs231n.stanford.edu\/reports\/2017\/pdfs\/814.pdf."},{"key":"132_CR21","unstructured":"Krizhevsky A, Sutskever I, Hinton GE. Imagenet classification with deep convolutional neural networks. In: Advances in neural information processing systems (NIPS); 2012. p. 1097\u2013105."},{"issue":"11","key":"132_CR22","doi-asserted-by":"publisher","first-page":"2278","DOI":"10.1109\/5.726791","volume":"86","author":"Y LeCun","year":"1998","unstructured":"LeCun Y, Bottou L, Bengio Y, Haffner P. Gradient-based learning applied to document recognition. Proc IEEE. 1998;86(11):2278\u2013324.","journal-title":"Proc IEEE"},{"key":"132_CR23","unstructured":"LeCun Y, Cortes C. MNIST handwritten digit database; 2010. http:\/\/yann.lecun.com\/exdb\/mnist\/."},{"issue":"4","key":"132_CR24","doi-asserted-by":"publisher","first-page":"30","DOI":"10.1109\/MMUL.2011.1","volume":"18","author":"J Libeks","year":"2011","unstructured":"Libeks J, Turnbull D. You can judge an artist by an album cover: using images for music annotation. IEEE Multi Med. 2011;18(4):30\u20137.","journal-title":"IEEE Multi Med"},{"key":"132_CR25","doi-asserted-by":"crossref","unstructured":"Lin TY, Maire M, Belongie S, Hays J, Perona P, Ramanan D, Doll\u00e1r P, Zitnick CL. Microsoft coco: common objects in context. In: European conference on computer vision. Springer; 2014. p. 740\u201355.","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"132_CR26","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Graves A, Antonoglou I, Wierstra D, Riedmiller M. Playing atari with deep reinforcement learning; 2013. arXiv:1312.5602."},{"key":"132_CR27","unstructured":"Nar K, Ocal O, Sastry SS, Ramchandran K. Cross-entropy loss leads to poor margins. OpenReview; 2019. https:\/\/openreview.net\/forum?id=ByfbnsA9Km."},{"key":"132_CR28","unstructured":"Netzer Y, Wang T, Coates A, Bissacco A, Wu B, Ng AY. Reading digits in natural images with unsupervised feature learning. In: NIPS workshop on deep learning and unsupervised feature learning; 2011. vol. 2011, p.\u00a05."},{"key":"132_CR29","doi-asserted-by":"crossref","unstructured":"Nilsback ME, Zisserman A. Automated flower classification over a large number of classes. In: Sixth Indian conference on computer vision, graphics & image processing, 2008. ICVGIP\u201908. IEEE; 2008. p. 722\u20139.","DOI":"10.1109\/ICVGIP.2008.47"},{"issue":"1","key":"132_CR30","first-page":"4","volume":"1","author":"S Oramas","year":"2018","unstructured":"Oramas S, Barbieri F, Nieto O, Serra X. Multimodal deep learning for music genre classification. Trans Int Soc Music Inf Retr. 2018;1(1):4\u201321.","journal-title":"Trans Int Soc Music Inf Retr"},{"key":"132_CR31","unstructured":"Oramas S, Nieto O, Barbieri F, Serra X. Multi-label music genre classification from audio, text, and images using deep features; 2017. arXiv:1707.04916."},{"key":"132_CR32","unstructured":"Rodr\u00edguez P, Cucurull G, Gonz\u00e0lez J, Gonfaus JM, Roca X. A painless attention mechanism for convolutional neural networks; 2018. https:\/\/openreview.net\/forum?id=rJe7FW-Cb."},{"issue":"3","key":"132_CR33","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky O, Deng J, Su H, Krause J, Satheesh S, Ma S, Huang Z, Karpathy A, Khosla A, Bernstein M, Berg AC, Fei-Fei L. ImageNet large scale visual recognition challenge. Int J Comput Vis (IJCV). 2015;115(3):211\u201352. https:\/\/doi.org\/10.1007\/s11263-015-0816-y.","journal-title":"Int J Comput Vis (IJCV)"},{"key":"132_CR34","unstructured":"Simonyan K, Zisserman A. Very deep convolutional networks for large-scale image recognition; 2014. arXiv:1409.1556."},{"key":"132_CR35","doi-asserted-by":"crossref","unstructured":"Sobkowicz A, Koz\u0142owski M, Buczkowski P. Reading book by the cover-book genre detection using short descriptions. In: International conference on man\u2013machine interactions. Springer; 2017. p. 439\u201348.","DOI":"10.1007\/978-3-319-67792-7_43"},{"key":"132_CR36","doi-asserted-by":"crossref","unstructured":"Szegedy C, Ioffe S, Vanhoucke V, Alemi AA. Inception-v4, inception-resnet and the impact of residual connections on learning. In: AAAI conference on artificial intelligence; 2017. vol.\u00a04, p.\u00a012.","DOI":"10.1609\/aaai.v31i1.11231"},{"key":"132_CR37","doi-asserted-by":"crossref","unstructured":"Szegedy C, Liu W, Jia Y, Sermanet P, Reed S, Anguelov D, Erhan D, Vanhoucke V, Rabinovich A. Going deeper with convolutions. In: The IEEE conference on computer vision and pattern recognition (CVPR) 2015.","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"132_CR38","unstructured":"Wah C, Branson S, Welinder P, Perona P, Belongie S. The caltech-UCSD birds-200-2011 dataset 2011."},{"key":"132_CR39","unstructured":"Wang Y, Skerry-Ryan R, Stanton D, Wu Y, Weiss RJ, Jaitly N, Yang Z, Xiao Y, Chen Z, Bengio S, et\u00a0al. Tacotron: a fully end-to-end text-to-speech synthesis model; 2017. arXiv:1703.10135."},{"issue":"2","key":"132_CR40","doi-asserted-by":"publisher","first-page":"297","DOI":"10.1007\/s10115-017-1042-4","volume":"53","author":"JG Yao","year":"2017","unstructured":"Yao JG, Wan X, Xiao J. Recent advances in document summarization. Knowl Inf Syst. 2017;53(2):297\u2013336. https:\/\/doi.org\/10.1007\/s10115-017-1042-4.","journal-title":"Knowl Inf Syst"},{"key":"132_CR41","unstructured":"Yu F, Seff A, Zhang Y, Song S, Funkhouser T, Xiao J. Lsun: construction of a large-scale image dataset using deep learning with humans in the loop; 2015. arXiv:1506.03365."},{"key":"132_CR42","doi-asserted-by":"crossref","unstructured":"Zoph B, Vasudevan V, Shlens J, Le QV. Learning transferable architectures for scalable image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition; 2018. p. 8697\u2013710.","DOI":"10.1109\/CVPR.2018.00907"},{"key":"132_CR43","unstructured":"Zujovic J, Gandy L, Friedman S, Pardo B, Pappas TN. Classifying paintings by artistic genre: an analysis of features & classifiers. In: IEEE international workshop on multimedia signal processing, 2009. MMSP\u201909. IEEE; 2009. p. 1\u20135."}],"container-title":["SN Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42979-020-00132-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s42979-020-00132-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42979-020-00132-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,22]],"date-time":"2022-10-22T06:37:41Z","timestamp":1666420661000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s42979-020-00132-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,4,24]]},"references-count":43,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2020,5]]}},"alternative-id":["132"],"URL":"https:\/\/doi.org\/10.1007\/s42979-020-00132-z","relation":{},"ISSN":["2662-995X","2661-8907"],"issn-type":[{"value":"2662-995X","type":"print"},{"value":"2661-8907","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,4,24]]},"assertion":[{"value":"20 January 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 March 2020","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 April 2020","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Compliance with ethical standards"}},{"value":"On behalf of all authors, the corresponding author states that there is no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of Interest"}}],"article-number":"139"}}