{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T09:44:36Z","timestamp":1774691076165,"version":"3.50.1"},"reference-count":36,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2019,1,5]],"date-time":"2019-01-05T00:00:00Z","timestamp":1546646400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["The VLDB Journal"],"published-print":{"date-parts":[[2019,8]]},"DOI":"10.1007\/s00778-018-0532-7","type":"journal-article","created":{"date-parts":[[2019,1,5]],"date-time":"2019-01-05T14:15:31Z","timestamp":1546697731000},"page":"497-521","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":58,"title":["Parametric schema inference for massive JSON datasets"],"prefix":"10.1007","volume":"28","author":[{"given":"Mohamed-Amine","family":"Baazizi","sequence":"first","affiliation":[]},{"given":"Dario","family":"Colazzo","sequence":"additional","affiliation":[]},{"given":"Giorgio","family":"Ghelli","sequence":"additional","affiliation":[]},{"given":"Carlo","family":"Sartiani","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,1,5]]},"reference":[{"key":"532_CR1","unstructured":"Apache Spark. \n                    http:\/\/spark.apache.org"},{"key":"532_CR2","doi-asserted-by":"crossref","unstructured":"Baazizi, M.A., Ben Lahmar, H., Colazzo, D., Ghelli, G., Sartiani, C.: Schema inference for massive JSON datasets. In: EDBT \u201917 (2017)","DOI":"10.1145\/3122831.3122837"},{"key":"532_CR3","doi-asserted-by":"crossref","unstructured":"Baazizi, M.A., Bidoit, N., Colazzo, D., Malla, N., Sahakyan, M.: Projection for XML update optimization. In: EDBT \u201911, pp. 307\u2013318 (2011)","DOI":"10.1145\/1951365.1951403"},{"key":"532_CR4","doi-asserted-by":"crossref","unstructured":"Baazizi, M.-A., Colazzo, D., Ghelli, G., Sartiani, C.: Counting types for massive JSON datasets. In: DBPL \u201917 (2017)","DOI":"10.1145\/3122831.3122837"},{"key":"532_CR5","unstructured":"Baazizi, M.-A., Colazzo, D., Ghelli, G., Sartiani, C.: Proofs for parametric schema inference for massive JSON datasets. Working paper or preprint (2018). \n                    https:\/\/hal.archives-ouvertes.fr\/hal-01960464\/"},{"key":"532_CR6","unstructured":"Benzaken, V., Castagna, G., Colazzo, D., Nguy\u00ean, K.: Type-based XML projection. In: VLDB \u201906, pp. 271\u2013282 (2006)"},{"key":"532_CR7","unstructured":"Bex, G.J., Neven, F., Schwentick, T., Tuyls, K.: Inference of concise DTDs from XML data. In: VLDB \u201806, pp. 115\u2013126 (2006)"},{"issue":"12","key":"532_CR8","first-page":"1272","volume":"4","author":"KS Beyer","year":"2011","unstructured":"Beyer, K.S., Ercegovac, V., Gemulla, R., Balmin, A., Eltabakh, M.Y., Kanne, C., \u00d6zcan, F., Shekita, E.J.: Jaql: a scripting language for large scale semistructured data analysis. PVLDB 4(12), 1272\u20131283 (2011)","journal-title":"PVLDB"},{"issue":"12","key":"532_CR9","first-page":"1778","volume":"10","author":"D Bonetta","year":"2017","unstructured":"Bonetta, D., Brantner, M.: Fad.js: fast JSON data access using JIT-based speculative optimizations. PVLDB 10(12), 1778\u20131789 (2017)","journal-title":"PVLDB"},{"key":"532_CR10","doi-asserted-by":"crossref","unstructured":"Bourhis, P., Reutter, J.L., Su\u00e1rez, F., Vrgoc, D.: JSON: data model, query languages and schema specification. In: PODS \u201917, pp. 123\u2013135 (2017)","DOI":"10.1145\/3034786.3056120"},{"key":"532_CR11","unstructured":"Bray, T.: The JavaScript object notation (JSON) data interchange format (2014). \n                    https:\/\/tools.ietf.org\/html\/rfc7159"},{"issue":"12","key":"532_CR12","first-page":"2012","volume":"8","author":"S Cebiric","year":"2015","unstructured":"Cebiric, S., Goasdou\u00e9, F., Manolescu, I.: Query-oriented summarization of RDF graphs. PVLDB 8(12), 2012\u20132015 (2015)","journal-title":"PVLDB"},{"key":"532_CR13","unstructured":"Ciucanu, R., Staworko, S.: Learning schemas for unordered XML. In: DBPL \u201813 (2013)"},{"key":"532_CR14","unstructured":"Colazzo, D., Ghelli, G., Sartiani, C.: Typing massive JSON datasets. In: XLDI \u201912, Affiliated with ICFP (2012)"},{"key":"532_CR15","doi-asserted-by":"crossref","unstructured":"DiScala, M., Abadi, D.J.: Automatic generation of normalized relational schemas from nested key-value data. In: \u00d6zcan, F., Koutrika, G., Madden, S. (eds.) SIGMOD \u201916, pp. 295\u2013310. ACM (2016)","DOI":"10.1145\/2882903.2882924"},{"issue":"4","key":"532_CR16","doi-asserted-by":"publisher","first-page":"1114","DOI":"10.1007\/s00224-014-9559-3","volume":"57","author":"DD Freydenberger","year":"2015","unstructured":"Freydenberger, D.D., K\u00f6tzing, T.: Fast learning of restricted regular expressions and DTDs. Theory Comput. Syst. 57(4), 1114\u20131158 (2015)","journal-title":"Theory Comput. Syst."},{"key":"532_CR17","doi-asserted-by":"crossref","unstructured":"Garofalakis, M.N., Gionis, A., Rastogi, R., Seshadri, S., Shim, K.: XTRACT: a system for extracting document type descriptors from XML documents. In: SIGMOD \u201900, pp. 165\u2013176 (2000)","DOI":"10.1145\/335191.335409"},{"key":"532_CR18","unstructured":"Goldman, R., Widom, J.: Dataguides: enabling query formulation and optimization in semistructured databases. In: VLDB\u201997, pp. 436\u2013445 (1997)"},{"key":"532_CR19","unstructured":"http:\/\/webia.lip6.fr\/~baazizi\/rs\/js\/vj18"},{"key":"532_CR20","unstructured":"JSON schema definition language. \n                    http:\/\/jsoniq.org\/docs\/JSound\/html-single\/"},{"key":"532_CR21","unstructured":"JSON schema language. \n                    http:\/\/json-schema.org"},{"key":"532_CR22","unstructured":"Labs, T.S.: Studio 3T, 2017. \n                    https:\/\/studio3t.com"},{"issue":"10","key":"532_CR23","first-page":"1118","volume":"10","author":"Y Li","year":"2017","unstructured":"Li, Y., Katsipoulakis, N.R., Chandramouli, B., Goldstein, J., Kossmann, D.: Mison: a fast JSON parser for data analytics. PVLDB 10(10), 1118\u20131129 (2017)","journal-title":"PVLDB"},{"key":"532_CR24","doi-asserted-by":"crossref","unstructured":"Liu, Z.H., Hammerschmidt, B., McMahon, D.: JSON data management: supporting schema-less development in RDBMS. In: SIGMOD \u201914, pp. 1247\u20131258 (2014)","DOI":"10.1145\/2588555.2595628"},{"key":"532_CR25","unstructured":"Lohrey, M., Maneth, S., Reh, C.P.: Compression of unordered XML trees. In: ICDT\u201907, pp. 18:1\u201318:17 (2017)"},{"key":"532_CR26","unstructured":"McHugh, J., Widom, J.: Query optimization for XML. In: VLDB \u201999, pp. 315\u2013326. Morgan Kaufmann Publishers Inc. (1999)"},{"issue":"4","key":"532_CR27","doi-asserted-by":"publisher","first-page":"660","DOI":"10.1145\/1111627.1111631","volume":"5","author":"M Murata","year":"2005","unstructured":"Murata, M., Lee, D., Mani, M., Kawaguchi, K.: Taxonomy of XML schema languages using formal language theory. ACM Trans. Internet Technol. 5(4), 660\u2013704 (2005)","journal-title":"ACM Trans. Internet Technol."},{"issue":"4","key":"532_CR28","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1145\/271074.271084","volume":"26","author":"S Nestorov","year":"1997","unstructured":"Nestorov, S., Abiteboul, S., Motwani, R.: Inferring structure in semistructured data. SIGMOD Rec. 26(4), 39\u201343 (1997)","journal-title":"SIGMOD Rec."},{"key":"532_CR29","doi-asserted-by":"crossref","unstructured":"Nestorov, S., Abiteboul, S., Motwani, R.: Extracting schema from semistructured data. In: SIGMOD \u201998, pp. 295\u2013306 (1998)","DOI":"10.1145\/276305.276331"},{"key":"532_CR30","doi-asserted-by":"crossref","unstructured":"Pezoa, F., Reutter, J.L., Suarez, F., Ugarte, M., Vrgo\u010d, D.: Foundations of JSON Schema. In: WWW \u201916, pp. 263\u2013273 (2016)","DOI":"10.1145\/2872427.2883029"},{"key":"532_CR31","unstructured":"Scherzinger, S., de\u00a0Almeida, E.C., Cerqueus, T., de\u00a0Almeida, L.B., Holanda, P.: Finding and fixing type mismatches in the evolution of object-nosql mappings. In: Proceedings of the Workshops of the EDBT\/ICDT 2016 (2016)"},{"key":"532_CR32","unstructured":"Schmidt, P.: mongodb-schema (2017). \n                    https:\/\/github.com\/mongodb-js\/mongodb-schema"},{"key":"532_CR33","unstructured":"scrapinghub. Skinfer (2015). \n                    https:\/\/github.com\/scrapinghub\/skinfer"},{"key":"532_CR34","unstructured":"Spark dataframe. \n                    https:\/\/spark.apache.org\/docs\/latest\/sql-programming-guide.html"},{"key":"532_CR35","unstructured":"The JSON Query Language. \n                    http:\/\/www.jsoniq.org"},{"issue":"9","key":"532_CR36","doi-asserted-by":"publisher","first-page":"922","DOI":"10.14778\/2777598.2777601","volume":"8","author":"L Wang","year":"2015","unstructured":"Wang, L., Zhang, S., Shi, J., Jiao, L., Hassanzadeh, O., Zou, J., Wangz, C.: Schema management for document stores. Proc. VLDB Endow. 8(9), 922\u2013933 (2015)","journal-title":"Proc. VLDB Endow."}],"container-title":["The VLDB Journal"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00778-018-0532-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s00778-018-0532-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00778-018-0532-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,1,5]],"date-time":"2020-01-05T00:14:05Z","timestamp":1578183245000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s00778-018-0532-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,1,5]]},"references-count":36,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2019,8]]}},"alternative-id":["532"],"URL":"https:\/\/doi.org\/10.1007\/s00778-018-0532-7","relation":{},"ISSN":["1066-8888","0949-877X"],"issn-type":[{"value":"1066-8888","type":"print"},{"value":"0949-877X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,1,5]]},"assertion":[{"value":"1 February 2018","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 November 2018","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 November 2018","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 January 2019","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}