{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T17:40:01Z","timestamp":1774719601840,"version":"3.50.1"},"reference-count":44,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016,10]]},"DOI":"10.1109\/ictc.2016.7763375","type":"proceedings-article","created":{"date-parts":[[2016,12,5]],"date-time":"2016-12-05T21:54:58Z","timestamp":1480974898000},"page":"1088-1093","source":"Crossref","is-referenced-by-count":3,"title":["Study on the multi-modal data preprocessing for knowledge-converged super brain"],"prefix":"10.1109","author":[{"given":"Se Won","family":"Oh","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hyeon Soo","family":"Kim","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ho Sung","family":"Lee","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sun Jin","family":"Kim","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hongkyu","family":"Park","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Woongshik","family":"You","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1016\/j.comnet.2014.11.008"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1016\/j.jnca.2014.01.014"},{"key":"ref33","year":"0","journal-title":"The AMPLab at UC Berkeley"},{"key":"ref32","first-page":"2","article-title":"MLbase: A distributed machine-learning system","author":"kraska","year":"2013","journal-title":"6th Biennial Conference on Innovative Data Systems Research(CIDR'13)"},{"key":"ref31","first-page":"1","article-title":"MLlib: Machine learning in Apache Spark","volume":"17","author":"meng","year":"2016","journal-title":"J Mach Learn Res"},{"key":"ref30","first-page":"10","article-title":"Discretized streams: An efficient and fault-tolerant model for stream processing on large clusters","author":"zaharia","year":"2012","journal-title":"Proceedings of the 4th USENIX Conference on Hot Topics in Cloud Ccomputing"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.14778\/2824032.2824076"},{"key":"ref36","year":"0","journal-title":"Apache Flink"},{"key":"ref35","year":"0","journal-title":"'SQLStream Blaze'"},{"key":"ref34","year":"0","journal-title":"The Apache Software Foundation"},{"key":"ref10","year":"0","journal-title":"The UCI KDD Archive"},{"key":"ref40","first-page":"2","article-title":"Resilient distributed datasets: A fault-tolerant abstraction for in-memory cluster computing motivation","author":"zaharia","year":"2012","journal-title":"Proceedings of the 9th USENIX Conference on Networked Systems Design and Implementation"},{"key":"ref11","author":"pyle","year":"1999","journal-title":"Data Preparation for Data Mining"},{"key":"ref12","first-page":"1","article-title":"Data pre-processing and intelligent data analysis","volume":"18","author":"fazel famili","year":"1997","journal-title":"Int J Intell Data Anal"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.14257\/ijdta.2014.7.4.09"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2011.08.001"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/B978-0-12-381479-1.00003-4"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10247-4"},{"key":"ref17","year":"0","journal-title":"scikit-learn developers"},{"key":"ref18","year":"0","journal-title":"Extract -transform -load Wikipedia"},{"key":"ref19","year":"0","journal-title":"Adeptia &#x2018;Compare Data Integration Vendors&#x2019;"},{"key":"ref28","year":"0","journal-title":"'Big Data Algorithms for Data Preprocessing Computational Intelligence and Imbalanced Classes'"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/SURV.2013.103013.00206"},{"key":"ref27","year":"0","journal-title":"'Powered By Spark'"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.4218\/etrij.15.2415.0045"},{"key":"ref6","doi-asserted-by":"crossref","first-page":"2233","DOI":"10.1109\/TII.2014.2300753","article-title":"Internet of things in industries: A survey","volume":"10","author":"da xu","year":"2014","journal-title":"IEEE Trans Ind Informatics"},{"key":"ref29","author":"xin","year":"0","journal-title":"&#x2018;Apache Spark the fastest open source engine for sorting a petabyte&#x2019;"},{"key":"ref5","first-page":"127","article-title":"Research on data mining models for the internet of things","author":"bin","year":"2010","journal-title":"2010 International Conference on Image Analysis and Signal Processing"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2014.01.015"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/j.comnet.2015.12.023"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.comcom.2014.09.008"},{"key":"ref9","year":"0","journal-title":"'Introduction KSB Convergence Research Department'"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2015.2444095"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2015.12.006"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/MIS.2013.142"},{"key":"ref21","first-page":"21","article-title":"Sensing as a service and Big Data","author":"zaslavsky","year":"2012","journal-title":"Proc Int Conf Adv Cloud Comput"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.14257\/ijdta.2013.6.5.03"},{"key":"ref24","first-page":"10","article-title":"Spark: Cluster computing with working sets","author":"zaharia","year":"2010","journal-title":"Proc of the 2nd USENIX Conf on Hot Topics in Cloud Computing HotCloud'10"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/DISCEX.2000.821515"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4614-6309-2"},{"key":"ref44","year":"0","journal-title":"The R Project for Statistical Computing"},{"key":"ref26","year":"0","journal-title":"Apache SPARK"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CISDA.2009.5356528"},{"key":"ref25","author":"penchikala","year":"0","journal-title":"Big Data Processing with Apache Spark - Part 1 Introduction"}],"event":{"name":"2016 International Conference on Information and Communication Technology Convergence (ICTC)","location":"Jeju","start":{"date-parts":[[2016,10,19]]},"end":{"date-parts":[[2016,10,21]]}},"container-title":["2016 International Conference on Information and Communication Technology Convergence (ICTC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7750938\/7763035\/07763375.pdf?arnumber=7763375","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,6,25]],"date-time":"2017-06-25T05:17:38Z","timestamp":1498367858000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7763375\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,10]]},"references-count":44,"URL":"https:\/\/doi.org\/10.1109\/ictc.2016.7763375","relation":{},"subject":[],"published":{"date-parts":[[2016,10]]}}}