{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,25]],"date-time":"2025-11-25T06:52:30Z","timestamp":1764053550492,"version":"3.41.0"},"reference-count":35,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2018,9,1]],"date-time":"2018-09-01T00:00:00Z","timestamp":1535760000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Distrib Parallel Databases"],"published-print":{"date-parts":[[2019,6]]},"DOI":"10.1007\/s10619-018-7248-y","type":"journal-article","created":{"date-parts":[[2018,9,1]],"date-time":"2018-09-01T03:09:05Z","timestamp":1535771345000},"page":"273-295","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":11,"title":["On-demand big data integration"],"prefix":"10.1007","volume":"37","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0335-0458","authenticated-orcid":false,"given":"Pradeeban","family":"Kathiravelu","sequence":"first","affiliation":[]},{"given":"Ashish","family":"Sharma","sequence":"additional","affiliation":[]},{"given":"Helena","family":"Galhardas","sequence":"additional","affiliation":[]},{"given":"Peter","family":"Van Roy","sequence":"additional","affiliation":[]},{"given":"Lu\u00eds","family":"Veiga","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,9,1]]},"reference":[{"key":"7248_CR1","volume-title":"SEED Standard for the Exchange of Earthquake Data Reference Manual Format Version 2.4","author":"T Ahern","year":"2007","unstructured":"Ahern, T., Casey, R., Barnes, D., Benson, R., Knight, T.: SEED Standard for the Exchange of Earthquake Data Reference Manual Format Version 2.4. Incorporated Research Institutions for Seismology (IRIS), Seattle (2007)"},{"issue":"2\u20134","key":"7248_CR2","doi-asserted-by":"publisher","first-page":"357","DOI":"10.1002\/cpe.939","volume":"17","author":"M Antonioletti","year":"2005","unstructured":"Antonioletti, M., Atkinson, M., Baxter, R., Borley, A., Chue Hong, N.P., Collins, B., Hardman, N., Hume, A.C., Knox, A., Jackson, M.: The design and implementation of Grid database services in OGSA-DAI. Concurr. Comput. Pract. Exp. 17(2\u20134), 357\u2013376 (2005)","journal-title":"Concurr. Comput. Pract. Exp."},{"key":"7248_CR3","doi-asserted-by":"crossref","unstructured":"Ardestani, S.B., H\u00e5kansson, C.J., Laure, E., Livenson, I., Stran\u00e1k, P., Dima, E., Blommesteijn, D., van\u00a0de Sanden, M.: B2SHARE: an open e-Science data sharing platform. In: 2015 IEEE 11th International Conference on e-Science (e-Science), pp. 448\u2013453. IEEE (2015)","DOI":"10.1109\/eScience.2015.44"},{"key":"7248_CR4","doi-asserted-by":"publisher","first-page":"24","DOI":"10.1016\/j.procs.2013.10.005","volume":"23","author":"C Borckholder","year":"2013","unstructured":"Borckholder, C., Heinzel, A., Kaniovskyi, Y., Benkner, S., Lukas, A., Mayer, B.: A generic, service-based data integration framework applied to linking drugs and clinical trials. Procedia Comput. Sci. 23, 24\u201335 (2013)","journal-title":"Procedia Comput. Sci."},{"key":"7248_CR5","unstructured":"caMicroscope: caMicroscope (2018). http:\/\/camicroscope.org"},{"issue":"4","key":"7248_CR6","doi-asserted-by":"publisher","first-page":"212","DOI":"10.5152\/TJAR.2016.34711","volume":"44","author":"C\u00d6 \u00c7aparlar","year":"2016","unstructured":"\u00c7aparlar, C.\u00d6., D\u00f6nmez, A.: What is scientific research and how can it be done? Turk. J. Anaesthesiol. Reanim. 44(4), 212 (2016)","journal-title":"Turk. J. Anaesthesiol. Reanim."},{"issue":"1","key":"7248_CR7","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1145\/248603.248616","volume":"26","author":"S Chaudhuri","year":"1997","unstructured":"Chaudhuri, S., Dayal, U.: An overview of data warehousing and OLAP technology. ACM SIGMOD Rec. 26(1), 65\u201374 (1997)","journal-title":"ACM SIGMOD Rec."},{"issue":"6","key":"7248_CR8","doi-asserted-by":"publisher","first-page":"1045","DOI":"10.1007\/s10278-013-9622-7","volume":"26","author":"K Clark","year":"2013","unstructured":"Clark, K., Vendt, B., Smith, K., Freymann, J., Kirby, J., Koppel, P., Moore, S., Phillips, S., Maffitt, D., Pringle, M.: The Cancer Imaging Archive (TCIA): maintaining and operating a public information repository. J. Digit. Imaging 26(6), 1045\u20131057 (2013)","journal-title":"J. Digit. Imaging"},{"key":"7248_CR9","doi-asserted-by":"crossref","unstructured":"Dong, X.L., Srivastava, D.: Big data integration. In: 2013 IEEE 29th International Conference on Data Engineering (ICDE), pp. 1245\u20131248. IEEE (2013)","DOI":"10.1109\/ICDE.2013.6544914"},{"key":"7248_CR10","unstructured":"Gradecki, J.D., Cole, J.: Mastering Apache Velocity. Wiley (2003)"},{"issue":"2","key":"7248_CR11","doi-asserted-by":"publisher","first-page":"100","DOI":"10.1089\/big.2013.0011","volume":"1","author":"M Hausenblas","year":"2013","unstructured":"Hausenblas, M., Nadeau, J.: Apache Drill: interactive ad-hoc analysis at scale. Big Data 1(2), 100\u2013104 (2013)","journal-title":"Big Data"},{"key":"7248_CR12","doi-asserted-by":"crossref","unstructured":"Heinzlreiter, P., Perkins, J.R., Tirado, O.T., Karlsson, T.J.M., Ranea, J.A., Mitterecker, A., Blanca, M., Trelles, O.: A cloud-based GWAS analysis pipeline for clinical researchers. In: CLOSER, pp. 387\u2013394 (2014)","DOI":"10.5220\/0004802103870394"},{"issue":"5723","key":"7248_CR13","doi-asserted-by":"publisher","first-page":"817","DOI":"10.1126\/science.1110410","volume":"308","author":"T Hey","year":"2005","unstructured":"Hey, T., Trefethen, A.E.: Cyberinfrastructure for e-Science. Science 308(5723), 817\u2013821 (2005)","journal-title":"Science"},{"key":"7248_CR14","unstructured":"HL7: FHIR (2018). https:\/\/www.hl7.org\/fhir\/"},{"key":"7248_CR15","unstructured":"Huang, Z.: Data integration for urban transport planning. Citeseer (2003)"},{"key":"7248_CR16","doi-asserted-by":"crossref","unstructured":"Kadadi, A., Agrawal, R., Nyamful, C., Atiq, R.: Challenges of data integration and interoperability in big data. In: 2014 IEEE International Conference on Big Data (Big Data), pp. 38\u201340. IEEE (2014)","DOI":"10.1109\/BigData.2014.7004486"},{"issue":"12","key":"7248_CR17","doi-asserted-by":"publisher","first-page":"1286","DOI":"10.14778\/2536274.2536297","volume":"6","author":"Y Karg\u00edn","year":"2013","unstructured":"Karg\u00edn, Y., Ivanova, M., Zhang, Y., Manegold, S., Kersten, M.: Lazy ETL in action: ETL technology dates scientific data. Proc. VLDB Endow. 6(12), 1286\u20131289 (2013)","journal-title":"Proc. VLDB Endow."},{"key":"7248_CR18","doi-asserted-by":"crossref","unstructured":"Kathiravelu, P., Chen, Y., Sharma, A., Galhardas, H., Van\u00a0Roy, P., Veiga, L.: On-demand service-based big data integration: optimized for research collaboration. In: VLDB Workshop on Data Management and Analytics for Medicine and Healthcare, pp. 9\u201328. Springer (2017)","DOI":"10.1007\/978-3-319-67186-4_2"},{"key":"7248_CR19","doi-asserted-by":"crossref","unstructured":"Krishnan, S., Haas, D., Franklin, M.J., Wu, E.: Towards reliable interactive data cleaning: a user survey and recommendations. In: Proceedings of the Workshop on Human-in-the-Loop Data Analytics, p. 9. ACM (2016)","DOI":"10.1145\/2939502.2939511"},{"key":"7248_CR20","doi-asserted-by":"crossref","unstructured":"Langegger, A., W\u00f6\u00df, W., Bl\u00f6chl, M.: A semantic web middleware for virtual data integration on the web. In: European Semantic Web Conference, pp. 493\u2013507. Springer (2008)","DOI":"10.1007\/978-3-540-68234-9_37"},{"issue":"1","key":"7248_CR21","doi-asserted-by":"publisher","first-page":"279","DOI":"10.2218\/ijdc.v8i1.260","volume":"8","author":"D Lecarpentier","year":"2013","unstructured":"Lecarpentier, D., Wittenburg, P., Elbers, W., Michelini, A., Kanso, R., Coveney, P., Baxter, R.: EUDAT: a new cross-disciplinary data infrastructure for science. Int. J. Digit. Curation 8(1), 279\u2013287 (2013)","journal-title":"Int. J. Digit. Curation"},{"key":"7248_CR22","doi-asserted-by":"crossref","unstructured":"Lee, G., Doyle, S., Monaco, J., Madabhushi, A., Feldman, M.D., Master, S.R., Tomaszewski, J.E.: A knowledge representation framework for integration, classification of multi-scale imaging and non-imaging data: preliminary results in predicting prostate cancer recurrence by fusing mass spectrometry and histology. In: 2009 IEEE International Symposium on Biomedical Imaging: From Nano to Macro, pp. 77\u201380. IEEE (2009)","DOI":"10.1109\/ISBI.2009.5192987"},{"issue":"12","key":"7248_CR23","doi-asserted-by":"publisher","first-page":"2006","DOI":"10.14778\/3137765.3137833","volume":"10","author":"G Li","year":"2017","unstructured":"Li, G.: Human-in-the-loop data integration. Proc. VLDB Endow. 10(12), 2006\u20132017 (2017)","journal-title":"Proc. VLDB Endow."},{"key":"7248_CR24","doi-asserted-by":"crossref","unstructured":"Lyu, D.M., Tian, Y., Wang, Y., Tong, D.Y., Yin, W.W., Li, J.S.: Design and implementation of clinical data integration and management system based on Hadoop platform. In: 2015 7th International Conference on Information Technology in Medicine and Education (ITME), pp. 76\u201379. IEEE (2015)","DOI":"10.1109\/ITME.2015.86"},{"key":"7248_CR25","volume-title":"Infinispan Data Grid Platform","author":"F Marchioni","year":"2012","unstructured":"Marchioni, F., Surtani, M.: Infinispan Data Grid Platform. Packt Publishing Ltd., Birmingham (2012)"},{"key":"7248_CR26","unstructured":"Milchevski, E., Michel, S.: LigDB\u2014online query processing without (almost) any storage. In: EDBT, pp. 683\u2013688 (2015)"},{"issue":"4","key":"7248_CR27","doi-asserted-by":"publisher","first-page":"920","DOI":"10.1007\/s003300101100","volume":"12","author":"P Mildenberger","year":"2002","unstructured":"Mildenberger, P., Eichelberg, M., Martin, E.: Introduction to the DICOM standard. Eur. Radiol. 12(4), 920\u2013927 (2002)","journal-title":"Eur. Radiol."},{"issue":"6018","key":"7248_CR28","doi-asserted-by":"publisher","first-page":"703","DOI":"10.1126\/science.1197962","volume":"331","author":"OJ Reichman","year":"2011","unstructured":"Reichman, O.J., Jones, M.B., Schildhauer, M.P.: Challenges and opportunities of open data in ecology. Science 331(6018), 703\u2013705 (2011)","journal-title":"Science"},{"key":"7248_CR29","unstructured":"Scality: Scality RING (2018). http:\/\/storage.scality.com\/rs\/963-KAI-434\/images\/Scality%20Technical%20Whitepaper.pdf"},{"key":"7248_CR30","unstructured":"Spark: Spark Framework: An Expressive Web Framework for Kotlin and Java (2018). http:\/\/sparkjava.com\/"},{"issue":"2","key":"7248_CR31","doi-asserted-by":"publisher","first-page":"1626","DOI":"10.14778\/1687553.1687609","volume":"2","author":"A Thusoo","year":"2009","unstructured":"Thusoo, A., Sarma, J.S., Jain, N., Shao, Z., Chakka, P., Anthony, S., Liu, H., Wyckoff, P., Murthy, R.: Hive: a warehousing solution over a map-reduce framework. Proc. VLDB Endow. 2(2), 1626\u20131629 (2009)","journal-title":"Proc. VLDB Endow."},{"issue":"3","key":"7248_CR32","doi-asserted-by":"publisher","first-page":"1","DOI":"10.4018\/jdwm.2009070101","volume":"5","author":"P Vassiliadis","year":"2009","unstructured":"Vassiliadis, P.: A survey of Extract-transform-Load technology. Int. J. Data Warehous. Min. 5(3), 1\u201327 (2009)","journal-title":"Int. J. Data Warehous. Min."},{"key":"7248_CR33","volume-title":"Hadoop: The Definitive Guide","author":"T White","year":"2012","unstructured":"White, T.: Hadoop: The Definitive Guide. O\u2019Reilly Media Inc, Sebastopol (2012)"},{"key":"7248_CR34","unstructured":"Widmann, H., Thiemann, H.: EUDAT B2FIND: a cross-discipline metadata service and discovery portal. In: EGU General Assembly Conference Abstracts, vol. 18, p. 8562 (2016)"},{"key":"7248_CR35","doi-asserted-by":"crossref","unstructured":"Zhang, Q., Zhang, X., Zhang, Q., Shi, W., Zhong, H.: Firework: big data sharing and processing in collaborative edge environment. In: 2016 Fourth IEEE Workshop on Hot Topics in Web Systems and Technologies (HotWeb), pp. 20\u201325. IEEE (2016)","DOI":"10.1109\/HotWeb.2016.12"}],"container-title":["Distributed and Parallel Databases"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10619-018-7248-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10619-018-7248-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10619-018-7248-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,6]],"date-time":"2025-07-06T19:13:55Z","timestamp":1751829235000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10619-018-7248-y"}},"subtitle":["A hybrid ETL approach for reproducible scientific research"],"short-title":[],"issued":{"date-parts":[[2018,9,1]]},"references-count":35,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2019,6]]}},"alternative-id":["7248"],"URL":"https:\/\/doi.org\/10.1007\/s10619-018-7248-y","relation":{},"ISSN":["0926-8782","1573-7578"],"issn-type":[{"type":"print","value":"0926-8782"},{"type":"electronic","value":"1573-7578"}],"subject":[],"published":{"date-parts":[[2018,9,1]]},"assertion":[{"value":"1 September 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}