{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,15]],"date-time":"2026-03-15T02:30:17Z","timestamp":1773541817164,"version":"3.50.1"},"reference-count":52,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2016,11,24]],"date-time":"2016-11-24T00:00:00Z","timestamp":1479945600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Cheminform"],"published-print":{"date-parts":[[2016,12]]},"DOI":"10.1186\/s13321-016-0179-6","type":"journal-article","created":{"date-parts":[[2016,11,24]],"date-time":"2016-11-24T09:06:03Z","timestamp":1479978363000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":29,"title":["Towards agile large-scale predictive modelling in drug discovery with flow-based programming design principles"],"prefix":"10.1186","volume":"8","author":[{"given":"Samuel","family":"Lampa","sequence":"first","affiliation":[]},{"given":"Jonathan","family":"Alvarsson","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8083-2864","authenticated-orcid":false,"given":"Ola","family":"Spjuth","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2016,11,24]]},"reference":[{"issue":"4","key":"179_CR1","doi-asserted-by":"publisher","first-page":"395","DOI":"10.1517\/17425255.2012.664636","volume":"8","author":"LG Valerio Jr","year":"2012","unstructured":"Valerio LG Jr (2012) Application of advanced in silico methods for predictive modeling and information integration. Expert Opin Drug Metab Toxicol 8(4):395\u2013398","journal-title":"Expert Opin Drug Metab Toxicol"},{"issue":"4","key":"179_CR2","first-page":"569","volume":"11","author":"P Gedeck","year":"2008","unstructured":"Gedeck P, Lewis RA (2008) Exploiting QSAR models in lead optimization. Curr Opin Drug Discov Dev 11(4):569\u2013575","journal-title":"Curr Opin Drug Discov Dev"},{"issue":"4","key":"179_CR3","doi-asserted-by":"publisher","first-page":"911","DOI":"10.1021\/tx700391f","volume":"21","author":"S Spycher","year":"2008","unstructured":"Spycher S, Smejtek P, Netzeva TI, Escher BI (2008) Toward a class-independent quantitative structure-activity relationship model for uncouplers of oxidative phosphorylation. Chem Res Toxicol 21(4):911\u2013927","journal-title":"Chem Res Toxicol"},{"key":"179_CR4","doi-asserted-by":"crossref","unstructured":"Hansch C (1969) A quantitative approach to biochemical structure-activity relationships. Acc Chem Res 2:232\u2013239","DOI":"10.1021\/ar50020a002"},{"issue":"1","key":"179_CR5","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s13062-015-0071-8","volume":"10","author":"O Spjuth","year":"2015","unstructured":"Spjuth O, Bongcam-Rudloff E, Hern\u00e1ndez GC, Forer L, Giovacchini M, Guimera RV, Kallio A, Korpelainen E, Ka\u0144du\u0142a MM, Krachunov M, Kreil DP, Kulev O, \u0141abaj PP, Lampa S, Pireddu L, Sch\u00f6nherr S, Siretskiy A, Vassilev D (2015) Experiences with workflows for automating data-intensive bioinformatics. Biol Direct 10(1):1\u201312","journal-title":"Biol Direct"},{"key":"179_CR6","doi-asserted-by":"crossref","unstructured":"Berthold MR, Cebron N, Dill F, Gabriel TR, K\u00f6tter T, Meinl T, Ohl P, Sieb C, Thiel K, Wiswedel B (2007) KNIME: the Konstanz Information Miner. In: Studies in classification, data analysis, and knowledge organization (GfKL 2007). Springer, Berlin","DOI":"10.1007\/978-3-540-78246-9_38"},{"issue":"18","key":"179_CR7","doi-asserted-by":"publisher","first-page":"1965","DOI":"10.2174\/156802612804910331","volume":"12","author":"MP Mazanetz","year":"2012","unstructured":"Mazanetz MP, Marmon RJ, Reisser CBT, Morao I (2012) Drug discovery applications for KNIME: an open source data mining platform. Curr Top Med Chem 12(18):1965\u20131979","journal-title":"Curr Top Med Chem"},{"key":"179_CR8","unstructured":"BIOVIA (2016) Pipeline pilot overview. \n                    http:\/\/accelrys.com\/products\/collaborative-science\/biovia-pipeline-pilot\/\n                    \n                  . Accessed 5 April"},{"key":"179_CR9","unstructured":"Chen J-W, Zhang J (2007) Comparing text-based and graphic user interfaces for novice and expert users. In: AMIA annual symposium proceedings, pp 125\u2013129"},{"key":"179_CR10","unstructured":"KNIME Product Matrix. \n                    https:\/\/www.knime.org\/products\/product-matrix\n                    \n                  . Accessed 20 Sep 2016"},{"key":"179_CR11","doi-asserted-by":"publisher","unstructured":"Leipzig J (2016) A review of bioinformatic pipeline frameworks. Brief Bioinform.\u00a0doi:\n                    10.1093\/bib\/bbw020\n                    \n                  . pii: bbw020","DOI":"10.1093\/bib\/bbw020"},{"key":"179_CR12","doi-asserted-by":"crossref","unstructured":"Breck E (2008) Zymake: a computational workflow system for machine learning and natural language processing. Software engineering, testing, and quality assurance for natural language processing, SETQA-NLP \u201908 association for computational linguistics, Stroudsburg, pp 5\u201313","DOI":"10.3115\/1622110.1622113"},{"issue":"8","key":"179_CR13","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/gb-2010-11-8-r86","volume":"11","author":"J Goecks","year":"2010","unstructured":"Goecks J, Nekrutenko A, Taylor J (2010) Galaxy: a comprehensive approach for supporting accessible, reproducible, and transparent computational research in the life sciences. Genome Biol 11(8):1\u201313","journal-title":"Genome Biol"},{"key":"179_CR14","volume-title":"Galaxy: a web-based genome analysis tool for experimentalists","author":"D Blankenberg","year":"2010","unstructured":"Blankenberg D, Kuster GV, Coraor N, Ananda G, Lazarus R, Mangan M, Nekrutenko A, Taylor J (2010) Galaxy: a web-based genome analysis tool for experimentalists. John Wiley & sons inc, Hoboken"},{"issue":"10","key":"179_CR15","doi-asserted-by":"publisher","first-page":"1451","DOI":"10.1101\/gr.4086505","volume":"15","author":"B Giardine","year":"2005","unstructured":"Giardine B, Riemer C, Hardison RC, Burhans R, Elnitski L, Shah P, Zhang Y, Blankenberg D, Albert I, Taylor J, Miller W, Kent WJ, Nekrutenko A (2005) Galaxy: a platform for interactive large-scale genome analysis. Genome Res 15(10):1451\u20131455","journal-title":"Genome Res"},{"issue":"1","key":"179_CR16","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/1751-0473-7-1","volume":"7","author":"AA Hunter","year":"2012","unstructured":"Hunter AA, Macgregor AB, Szabo TO, Wellington CA, Bellgard MI (2012) Yabi: an online research environment for grid, high performance and cloud computing. Source Code Biol Med 7(1):1\u201310","journal-title":"Source Code Biol Med"},{"issue":"13","key":"179_CR17","doi-asserted-by":"publisher","first-page":"1685","DOI":"10.1093\/bioinformatics\/btt199","volume":"29","author":"C Sloggett","year":"2013","unstructured":"Sloggett C, Goonasekera N, Afgan E (2013) BioBlend: automating pipeline analyses within Galaxy and CloudMan. Bioinformatics 29(13):1685\u20131686","journal-title":"Bioinformatics"},{"issue":"19","key":"179_CR18","doi-asserted-by":"publisher","first-page":"2520","DOI":"10.1093\/bioinformatics\/bts480","volume":"28","author":"J K\u00f6ster","year":"2012","unstructured":"K\u00f6ster J, Rahmann S (2012) Snakemake\u2014a scalable bioinformatics workflow engine. Bioinformatics 28(19):2520\u20132522","journal-title":"Bioinformatics"},{"key":"179_CR19","doi-asserted-by":"publisher","DOI":"10.6084\/m9.figshare.1254958.v2","author":"PD Tommaso","year":"2014","unstructured":"Tommaso PD, Chatzou M, Baraja PP, Notredame C (2014) A novel tool for highly scalable computational pipelines. Figshare. doi:\n                    10.6084\/m9.figshare.1254958.v2","journal-title":"Figshare"},{"issue":"11","key":"179_CR20","doi-asserted-by":"publisher","first-page":"1525","DOI":"10.1093\/bioinformatics\/bts167","volume":"28","author":"SP Sadedin","year":"2012","unstructured":"Sadedin SP, Pope B, Oshlack A (2012) Bpipe: a tool for running and managing bioinformatics pipelines. Bioinformatics 28(11):1525\u20131526","journal-title":"Bioinformatics"},{"issue":"1","key":"179_CR21","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1016\/S0164-1212(00)00089-3","volume":"56","author":"D Spinellis","year":"2001","unstructured":"Spinellis D (2001) Notable design patterns for domain-specific languages. J Syst Softw 56(1):91\u201399","journal-title":"J Syst Softw"},{"issue":"2","key":"179_CR22","doi-asserted-by":"publisher","first-page":"247","DOI":"10.2298\/CSIS1002247K","volume":"7","author":"T Kosar","year":"2010","unstructured":"Kosar T, Oliveira N, Mernik M, Pereira VJM, \u010crepin\u0161ek M, Da CD, Henriques RP (2010) Comparing general-purpose and domain-specific languages: an empirical study. Comput Sci Inf Syst 7(2):247\u2013264","journal-title":"Comput Sci Inf Syst"},{"issue":"2","key":"179_CR23","doi-asserted-by":"publisher","first-page":"75","DOI":"10.1002\/(SICI)1096-908X(199803\/04)10:2<75::AID-SMR168>3.0.CO;2-5","volume":"10","author":"AV Deursen","year":"1998","unstructured":"Deursen AV, Klint P (1998) Little languages: little maintenance? J Softw Maint Res Pract 10(2):75\u201392","journal-title":"J Softw Maint Res Pract"},{"key":"179_CR24","unstructured":"Discussion on blog post. \n                    http:\/\/bionics.it\/posts\/fbp-data-flow-syntax#comment-2141038801\n                    \n                  . Accessed 18 April 2016"},{"issue":"21","key":"179_CR25","doi-asserted-by":"publisher","first-page":"2778","DOI":"10.1093\/bioinformatics\/btq524","volume":"26","author":"L Goodstadt","year":"2010","unstructured":"Goodstadt L (2010) Ruffus: a lightweight Python library for computational pipelines. Bioinformatics 26(21):2778\u20132779","journal-title":"Bioinformatics"},{"key":"179_CR26","unstructured":"Luigi source code on GitHub. \n                    https:\/\/github.com\/spotify\/luigi\n                    \n                  . Accessed 5 April 2016"},{"key":"179_CR27","unstructured":"van Deursen A (1997) Domain-specific languages versus object-oriented frameworks: a financial engineering case study. In: Smalltalk and Java in Industry and Academia, STJA\u201997, pp 35\u201339"},{"key":"179_CR28","volume-title":"Hadoop: the definitive guide","author":"T White","year":"2009","unstructured":"White T (2009) Hadoop: the definitive guide, 1st edn. O\u2019Reilly, Sebastopol","edition":"1"},{"key":"179_CR29","unstructured":"Zaharia M, Chowdhury M, Franklin MJ, Shenker S, Stoica I (2010) Spark: cluster computing with working sets. In: Proceedings of the 2nd USENIX conference on hot topics in cloud computing, pp 10"},{"key":"179_CR30","unstructured":"SciLuigi repository on Github. \n                    http:\/\/github.com\/pharmbio\/sciluigi\n                    \n                  . Accessed 21 April 2016"},{"key":"179_CR31","first-page":"1871","volume":"9","author":"R-E Fan","year":"2008","unstructured":"Fan R-E, Chang K-W, Hsieh C-J, Wang X-R, Lin C-J (2008) LIBLINEAR: a library for large linear classification. J Mach Learn Res 9:1871\u20131874","journal-title":"J Mach Learn Res"},{"issue":"3","key":"179_CR32","doi-asserted-by":"publisher","first-page":"707","DOI":"10.1021\/ci020345w","volume":"43","author":"J-L Faulon","year":"2003","unstructured":"Faulon J-L, Visco DP, Pophale RS (2003) The signature molecular descriptor. 1. Using extended valence sequences in QSAR and QSPR studies. J Chem Inf Comput Sci 43(3):707\u2013720","journal-title":"J Chem Inf Comput Sci"},{"key":"179_CR33","unstructured":"National Center for Biotechnology Information. PubChem BioAssay Database; AID=1996. \n                    https:\/\/pubchem.ncbi.nlm.nih.gov\/bioassay\/1996"},{"issue":"1","key":"179_CR34","first-page":"39","volume":"8","author":"J Alvarsson","year":"2016","unstructured":"Alvarsson J, Lampa S, Schaal W, Andersson C, Wikberg JES, Spjuth O (2016) Large-scale ligand-based predictive modelling using support vector machines. J Chem Inf 8(1):39","journal-title":"J Chem Inf"},{"issue":"3","key":"179_CR35","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1109\/MCSE.2007.53","volume":"9","author":"F P\u00e9rez","year":"2007","unstructured":"P\u00e9rez F, Granger BE (2007) IPython: a system for interactive scientific computing. Comput Sci Eng 9(3):21\u201329. doi:\n                    10.1109\/MCSE.2007.53","journal-title":"Comput Sci Eng"},{"key":"179_CR36","unstructured":"Project Jupyter. \n                    http:\/\/jupyter.org\n                    \n                  . Accessed 18 Oct 2016"},{"key":"179_CR37","doi-asserted-by":"publisher","unstructured":"Pre-made Virtual Machine image for the Case Study. \n                    http:\/\/dx.doi.org\/10.6084\/m9.figshare.4038048\n                    \n                  . Accessed 18 Oct 2016. doi:\n                    10.6084\/m9.figshare.4038048","DOI":"10.6084\/m9.figshare.4038048"},{"key":"179_CR38","unstructured":"Github repository for the Case Study Virtual Machine. \n                    https:\/\/github.com\/pharmbio\/bioimg-sciluigi-casestudy\n                    \n                  . Accessed 18 Oct 2016"},{"key":"179_CR39","unstructured":"Project repository on Github. \n                    http:\/\/github.com\/pharmbio\/mm_project\n                    \n                  . Accessed 21 April 2016"},{"key":"179_CR40","unstructured":"Workflow file on Github. \n                    https:\/\/github.com\/pharmbio\/mm_project\/blob\/master\/exp\/20150627-crossval\/wfmm.py\n                    \n                  . Accessed 21 April 2016"},{"key":"179_CR41","unstructured":"H3 Biomedicine\u2019s fork of the SciLuigi source code on GitHub. \n                    https:\/\/github.com\/h3biomed\/sciluigi\n                    \n                  . Accessed 18 April 2016"},{"key":"179_CR42","unstructured":"Forks of the SciLuigi source code on GitHub. \n                    https:\/\/github.com\/pharmbio\/sciluigi\/network\/members\n                    \n                  . Accessed 18 Oct 2016"},{"issue":"8","key":"179_CR43","doi-asserted-by":"publisher","first-page":"232","DOI":"10.1021\/ar50020a002","volume":"2","author":"C Hansch","year":"1969","unstructured":"Hansch C (1969) Quantitative approach to biochemical structure\u2013activity relationships. Acc Chem Res 2(8):232\u2013239","journal-title":"Acc Chem Res"},{"issue":"1","key":"179_CR44","doi-asserted-by":"publisher","first-page":"261","DOI":"10.1016\/j.bmcl.2012.10.102","volume":"23","author":"U Norinder","year":"2013","unstructured":"Norinder U, Ek ME (2013) QSAR investigation of NaV2.7 active compounds using the SVM\/Signature approach and the Bioclipse Modeling platform. Bioorg Med Chem Lett 23(1):261\u2013263","journal-title":"Bioorg Med Chem Lett"},{"issue":"2","key":"179_CR45","doi-asserted-by":"publisher","first-page":"286","DOI":"10.1093\/bioinformatics\/bts681","volume":"29","author":"O Spjuth","year":"2013","unstructured":"Spjuth O, Georgiev V, Carlsson L, Alvarsson J, Berg A, Willighagen E, Wikberg JE, Eklund M (2013) Bioclipse-R: integrating management and visualization of life science data with statistical analysis. Bioinformatics 29(2):286\u2013289","journal-title":"Bioinformatics"},{"issue":"10","key":"179_CR46","doi-asserted-by":"publisher","first-page":"2647","DOI":"10.1021\/ci500361u","volume":"54","author":"J Alvarsson","year":"2014","unstructured":"Alvarsson J, Eklund M, Engkvist O, Spjuth O, Carlsson L, Wikberg JE, Noeske T (2014) Ligand-based target prediction with signature fingerprints. J Chem Inf Model 54(10):2647\u20132653","journal-title":"J Chem Inf Model"},{"issue":"11","key":"179_CR47","doi-asserted-by":"publisher","first-page":"3211","DOI":"10.1021\/ci500344v","volume":"54","author":"J Alvarsson","year":"2014","unstructured":"Alvarsson J, Eklund M, Andersson C, Carlsson L, Spjuth O, Wikberg JE (2014) Benchmarking study of parameter variation when using signature fingerprints together with support vector machines. J Chem Inf Model 54(11):3211\u20133217","journal-title":"J Chem Inf Model"},{"key":"179_CR48","doi-asserted-by":"crossref","unstructured":"Yoo AB, Jette MA, Grondona M (2003) SLURM: simple linux utility for resource management. In: Job scheduling strategies for parallel processing. Springer, Berlin, pp 44\u201360","DOI":"10.1007\/10968987_3"},{"key":"179_CR49","unstructured":"Example: top artists\u2014luigi documentation. \n                    http:\/\/luigi.readthedocs.org\/en\/stable\/example_top_artists.html\n                    \n                  . Accessed 13 April 2016"},{"key":"179_CR50","unstructured":"Morrison JP (1994) Flow-based programming. In: Proceedings of the 1st international workshop on software engineering for parallel and distributed systems, pp 25\u201329"},{"key":"179_CR51","volume-title":"Flow-based programming: a new approach to application development","author":"JP Morrison","year":"2010","unstructured":"Morrison JP (2010) Flow-based programming: a new approach to application development, 2nd edn. Self-published via CreateSpace, Charleston","edition":"2"},{"key":"179_CR52","unstructured":"Morrison JP (2016) Flow-based programming website. \n                    http:\/\/www.jpaulmorrison.com\/fbp\/\n                    \n                  . Accessed 7 April 2016"}],"container-title":["Journal of Cheminformatics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s13321-016-0179-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1186\/s13321-016-0179-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s13321-016-0179-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,6,24]],"date-time":"2019-06-24T10:20:44Z","timestamp":1561371644000},"score":1,"resource":{"primary":{"URL":"https:\/\/jcheminf.biomedcentral.com\/articles\/10.1186\/s13321-016-0179-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,11,24]]},"references-count":52,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2016,12]]}},"alternative-id":["179"],"URL":"https:\/\/doi.org\/10.1186\/s13321-016-0179-6","relation":{},"ISSN":["1758-2946"],"issn-type":[{"value":"1758-2946","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016,11,24]]},"assertion":[{"value":"19 July 2016","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 November 2016","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 November 2016","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"67"}}