{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,20]],"date-time":"2025-10-20T10:25:22Z","timestamp":1760955922980},"reference-count":41,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2019,2,28]],"date-time":"2019-02-28T00:00:00Z","timestamp":1551312000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Big Data"],"published-print":{"date-parts":[[2019,12]]},"DOI":"10.1186\/s40537-019-0186-3","type":"journal-article","created":{"date-parts":[[2019,2,28]],"date-time":"2019-02-28T06:28:58Z","timestamp":1551335338000},"update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":40,"title":["Selecting a representative decision tree from an ensemble of decision-tree models for fast big data classification"],"prefix":"10.1186","volume":"6","author":[{"given":"Abraham Itzhak","family":"Weinberg","sequence":"first","affiliation":[]},{"given":"Mark","family":"Last","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,2,28]]},"reference":[{"key":"186_CR1","unstructured":"AlSabti K, Ranka S, Singh V. Clouds: classification for large or out-of-core datasets. In: Conference on knowledge discovery and data mining. 1998"},{"key":"186_CR2","doi-asserted-by":"crossref","unstructured":"Amado N, Gama J, Silva F. Parallel implementation of decision tree learning algorithms. In: Progress in artificial intelligence. Berlin: Springer; 2001. p.\u00a06\u201313.","DOI":"10.1007\/3-540-45329-6_4"},{"key":"186_CR3","unstructured":"Amado N, Gama J, Silva F. Exploiting parallelism in decision tree induction. In: Proceedings from the ECML\/PKDD workshop on parallel and distributed computing for machine learning. 2003. p. 13\u201322."},{"key":"186_CR4","doi-asserted-by":"crossref","unstructured":"Andrzejak A, Langner F, Zabala S. Interpretable models from distributed data via merging of decision trees. In: 2013 IEEE symposium on computational intelligence and data mining (CIDM). New York: IEEE; 2013. p. 1\u20139.","DOI":"10.1109\/CIDM.2013.6597210"},{"key":"186_CR5","doi-asserted-by":"crossref","unstructured":"Basilico JD, Munson MA, Kolda TG, Dixon KR, Kegelmeyer WP. Comet: a recipe for learning and using large ensembles on massive data. In: 2011 IEEE 11th international conference on data mining (ICDM). New York: IEEE; 2011. p. 41\u201350.","DOI":"10.1109\/ICDM.2011.39"},{"key":"186_CR6","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9781139042918","volume-title":"Scaling up machine learning: parallel and distributed approaches","author":"R Bekkerman","year":"2011","unstructured":"Bekkerman R, Bilenko M, Langford J. Scaling up machine learning: parallel and distributed approaches. Cambridge: Cambridge University Press; 2011."},{"key":"186_CR7","first-page":"849","volume":"11","author":"Y Ben-Haim","year":"2010","unstructured":"Ben-Haim Y, Tom-Tov E. A streaming parallel decision tree algorithm. J Mach Learn Res. 2010;11:849\u201372.","journal-title":"J Mach Learn Res"},{"key":"186_CR8","unstructured":"Bousquet O, Bottou L. The tradeoffs of large scale learning. In: Advances in neural information processing systems. 2008. p. 161\u20138."},{"issue":"1\u20132","key":"186_CR9","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1023\/A:1007563306331","volume":"36","author":"L Breiman","year":"1999","unstructured":"Breiman L. Pasting small votes for classification in large databases and on-line. Mach Learn. 1999;36(1\u20132):85\u2013103.","journal-title":"Mach Learn"},{"issue":"1","key":"186_CR10","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1023\/A:1010933404324","volume":"45","author":"L Breiman","year":"2001","unstructured":"Breiman L. Random forests. Mach Learn. 2001;45(1):5\u201332.","journal-title":"Mach Learn"},{"issue":"1","key":"186_CR11","doi-asserted-by":"publisher","first-page":"49","DOI":"10.14257\/ijdta.2014.7.1.05","volume":"7","author":"W Dai","year":"2014","unstructured":"Dai W, Ji W. A mapreduce implementation of c4. 5 decision tree algorithm. Int J Database Theory Appl. 2014;7(1):49\u201360.","journal-title":"Int J Database Theory Appl"},{"key":"186_CR12","unstructured":"DeWitt DJ, Naughton JF, Schneider D, et\u00a0al. Parallel sorting on a shared-nothing architecture using probabilistic splitting. In: Proceedings of the first international conference on parallel and distributed information systems, 1991. New York: IEEE; 1991. p.\u00a0280\u201391."},{"issue":"3","key":"186_CR13","doi-asserted-by":"publisher","first-page":"187","DOI":"10.3233\/IDA-1998-2303","volume":"2","author":"P Domingos","year":"1998","unstructured":"Domingos P. Knowledge discovery via multiple models. Intell Data Anal. 1998;2(3):187\u2013202.","journal-title":"Intell Data Anal"},{"key":"186_CR14","doi-asserted-by":"crossref","unstructured":"Domingos P, Hulten G. Mining high-speed data streams. In: Proceedings of the sixth ACM SIGKDD international conference on knowledge discovery and data mining. New York City: ACM; 2000. p. 71\u201380.","DOI":"10.1145\/347090.347107"},{"key":"186_CR15","doi-asserted-by":"publisher","first-page":"916","DOI":"10.1214\/07-AOAS148","volume":"2","author":"JH Friedman","year":"2008","unstructured":"Friedman JH, Popescu BE. Predictive learning via rule ensembles. Ann Appl Stat. 2008;2:916\u201354.","journal-title":"Ann Appl Stat"},{"key":"186_CR16","unstructured":"Gehrke J, Ganti V, Ramakrishnan R, Loh W-Y. Boat optimistic decision tree construction. In: ACM SIGMOD record, vol. 28. New York City: ACM, 1999. p. 169\u201380."},{"issue":"3","key":"186_CR17","doi-asserted-by":"publisher","first-page":"285","DOI":"10.1006\/jpdc.2000.1691","volume":"61","author":"S Goil","year":"2001","unstructured":"Goil S, Choudhary A. Parsimony: an infrastructure for parallel multidimensional analysis and data mining. J Parallel Distrib Comput. 2001;61(3):285\u2013321.","journal-title":"J Parallel Distrib Comput"},{"key":"186_CR18","doi-asserted-by":"publisher","first-page":"993","DOI":"10.1109\/34.58871","volume":"10","author":"LK Hansen","year":"1990","unstructured":"Hansen LK, Salamon P. Neural network ensembles. IEEE Trans Pattern Anal Mach Intell. 1990;10:993\u20131001.","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"186_CR19","unstructured":"Hodges J Jr, Lehmann EL. Comparison of the normal scores and Wilcoxon tests. In: Proc. fourth Berkeley symp. math. statist. prob, vol. 1. 1961. p. 307\u201317."},{"key":"186_CR20","first-page":"119","volume-title":"Communication and memory efficient parallel decision tree construction","author":"R Jin","year":"2003","unstructured":"Jin R, Agrawal G. Communication and memory efficient parallel decision tree construction. Philadelphia: SDM, SIAM; 2003. p. 119\u201329."},{"key":"186_CR21","unstructured":"Joshi MV, Karypis G, Kumar V. Scalparc: a new scalable and efficient parallel classification algorithm for mining large datasets. In: Parallel processing symposium, 1998. IPPS\/SPDP 1998. Proceedings of the first merged international... and symposium on parallel and distributed processing 1998. New York: IEEE; 1998. p.\u00a0573\u20139."},{"key":"186_CR22","doi-asserted-by":"crossref","unstructured":"Kargupta H, Park B-H. A Fourier spectrum-based approach to represent decision trees for mining data streams in mobile environments. In: IEEE transactions on knowledge and data engineering, vol. 16, no. 2. 2004. p. 216\u201329.","DOI":"10.1109\/TKDE.2004.1269599"},{"key":"186_CR23","first-page":"231","volume":"7","author":"A Krogh","year":"1995","unstructured":"Krogh A, Vedelsby J, et al. Neural network ensembles, cross validation, and active learning. Adv Neural Inf Process Syst. 1995;7:231\u20138.","journal-title":"Adv Neural Inf Process Syst"},{"key":"186_CR24","doi-asserted-by":"crossref","unstructured":"Louppe G, Geurts, P. Ensembles on random patches. In: Machine learning and knowledge discovery in databases. Berlin: Springer; 2012. p.\u00a0346\u201361.","DOI":"10.1007\/978-3-642-33460-3_28"},{"issue":"1","key":"186_CR25","doi-asserted-by":"publisher","first-page":"3898","DOI":"10.1038\/s41598-017-04281-9","volume":"7","author":"A Magana-Mora","year":"2017","unstructured":"Magana-Mora A, Bajic VB. Omniga: optimized omnivariate decision trees for generalizable classification models. Sci Rep. 2017;7(1):3898.","journal-title":"Sci Rep"},{"key":"186_CR26","doi-asserted-by":"crossref","unstructured":"Mehta M, Agrawal R, Rissanen J. Sliq: a fast scalable classifier for data mining. In: Advances in database technology EDBT\u201996. Berlin: Springer; 1996. p. 18\u201332.","DOI":"10.1007\/BFb0014141"},{"issue":"3","key":"186_CR27","doi-asserted-by":"publisher","first-page":"577","DOI":"10.1016\/S0167-9473(03)00063-X","volume":"45","author":"R Miglio","year":"2004","unstructured":"Miglio R, Soffritti G. The comparison between classification trees through proximity measures. Comput Stat Data Anal. 2004;45(3):577\u201393.","journal-title":"Comput Stat Data Anal"},{"key":"186_CR28","doi-asserted-by":"crossref","unstructured":"Narlikar GJ. A parallel, multithreaded decision tree builder. DTIC Document: Technical report; 1998.","DOI":"10.21236\/ADA363531"},{"key":"186_CR29","first-page":"810","volume-title":"A general framework for estimating similarity of datasets and decision trees: exploring semantic similarity of decision trees","author":"I Ntoutsi","year":"2008","unstructured":"Ntoutsi I, Kalousis A, Theodoridis Y. A general framework for estimating similarity of datasets and decision trees: exploring semantic similarity of decision trees. Philadelphia: SDM, SIAM; 2008. p. 810\u201321."},{"issue":"2","key":"186_CR30","doi-asserted-by":"publisher","first-page":"1426","DOI":"10.14778\/1687553.1687569","volume":"2","author":"B Panda","year":"2009","unstructured":"Panda B, Herbach JS, Basu S, Bayardo RJ. Planet: massively parallel learning of tree ensembles with mapreduce. Proc VLDB Endow. 2009;2(2):1426\u201337.","journal-title":"Proc VLDB Endow"},{"issue":"4","key":"186_CR31","doi-asserted-by":"publisher","first-page":"1253","DOI":"10.1073\/pnas.1219097111","volume":"111","author":"F Parisi","year":"2014","unstructured":"Parisi F, Strino F, Nadler B, Kluger Y. Ranking and combining multiple predictors without labeled data. Proc Natl Acad Sci. 2014;111(4):1253\u20138.","journal-title":"Proc Natl Acad Sci"},{"issue":"4","key":"186_CR32","doi-asserted-by":"publisher","first-page":"334","DOI":"10.14778\/2095686.2095692","volume":"5","author":"M Pawlik","year":"2011","unstructured":"Pawlik M, Augsten N. Rted: a robust algorithm for the tree edit distance. Proc VLDB Endow. 2011;5(4):334\u201345.","journal-title":"Proc VLDB Endow"},{"key":"186_CR33","unstructured":"Shafer J, Agrawal R, Mehta M. Sprint: a scalable parallel classifier for data mining. In: Proc. 1996 int. conf. very large databases, Citeseer, 1996. p. 544\u201355."},{"issue":"6","key":"186_CR34","doi-asserted-by":"publisher","first-page":"727","DOI":"10.1002\/(SICI)1097-0258(19990330)18:6<727::AID-SIM61>3.0.CO;2-2","volume":"18","author":"WD Shannon","year":"1999","unstructured":"Shannon WD, Banks D. Combining classification trees using MLE. Stat Med. 1999;18(6):727\u201340.","journal-title":"Stat Med"},{"key":"186_CR35","unstructured":"Sreenivas MK, AlSabti K, Ranka S. Parallel out-of-core decision tree classifiers. In: Kargupta H, Chan P, editors. Advances in distributed and parallel knowledge discovery. Menlo Park: AAAI; 2000. p.\u00a0317\u201336."},{"key":"186_CR36","doi-asserted-by":"publisher","DOI":"10.1007\/0-306-47011-X_2","volume-title":"Parallel formulations of decision-tree classification algorithms","author":"A Srivastava","year":"2002","unstructured":"Srivastava A, Han E-H, Kumar V, Singh V. Parallel formulations of decision-tree classification algorithms. Berlin: Springer; 2002."},{"key":"186_CR37","doi-asserted-by":"publisher","first-page":"331","DOI":"10.1016\/j.neucom.2014.04.078","volume":"150","author":"I Triguero","year":"2015","unstructured":"Triguero I, Peralta D, Bacardit J, Garc\u00ed S, Herrera F. MRPR: a mapreduce solution for prototype reduction in big data classification. Neurocomputing. 2015;150:331\u201345.","journal-title":"Neurocomputing"},{"issue":"4","key":"186_CR38","doi-asserted-by":"publisher","first-page":"737","DOI":"10.1515\/amcs-2017-0051","volume":"27","author":"AI Weinberg","year":"2017","unstructured":"Weinberg AI, Last M. Interpretable decision-tree induction in a big data parallel framework. Int J Appl Math Comput Sci. 2017;27(4):737\u201348.","journal-title":"Int J Appl Math Comput Sci"},{"key":"186_CR39","doi-asserted-by":"crossref","unstructured":"Ye T, Zhou H, Zou WY, Gao B. Zhang R. Rapidscorer: fast tree ensemble evaluation by maximizing compactness in data level parallelization. In: Proceedings of the 24th ACM SIGKDD international conference on knowledge discovery & data mining. New York City: ACM; 2018. p.\u00a0941\u201350.","DOI":"10.1145\/3219819.3219857"},{"issue":"6","key":"186_CR40","doi-asserted-by":"publisher","first-page":"1245","DOI":"10.1137\/0218082","volume":"18","author":"K Zhang","year":"1989","unstructured":"Zhang K, Shasha D. Simple fast algorithms for the editing distance between trees and related problems. SIAM J Comput. 1989;18(6):1245\u201362.","journal-title":"SIAM J Comput"},{"issue":"8","key":"186_CR41","first-page":"1775","volume":"7","author":"X Zhang","year":"2012","unstructured":"Zhang X, Jiang S. A splitting criteria based on similarity in decision tree learning. J Softw. 2012;7(8):1775\u201382.","journal-title":"J Softw"}],"container-title":["Journal of Big Data"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s40537-019-0186-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1186\/s40537-019-0186-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s40537-019-0186-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,2,28]],"date-time":"2020-02-28T00:07:34Z","timestamp":1582848454000},"score":1,"resource":{"primary":{"URL":"https:\/\/journalofbigdata.springeropen.com\/articles\/10.1186\/s40537-019-0186-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,2,28]]},"references-count":41,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2019,12]]}},"alternative-id":["186"],"URL":"https:\/\/doi.org\/10.1186\/s40537-019-0186-3","relation":{},"ISSN":["2196-1115"],"issn-type":[{"value":"2196-1115","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,2,28]]},"assertion":[{"value":"17 November 2018","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 February 2019","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 February 2019","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"23"}}