diff --git a/build.js b/build.js index 0f7e4b9e..e3d10d72 100644 --- a/build.js +++ b/build.js @@ -111,12 +111,22 @@ var metalsmith = Metalsmith(__dirname) })) ) // Render HTML - .use(branch() - .pattern('**/*.html') + .use(branch('**/*.html') + // Render LaTeX inlined into the HTML .use(renderTeX()) + // Assign a mimir-specific layout to the mimir subdirectory + .use(branch('research/mimir/*.html') + .use(function(files, smith, done) { + for(i in files){ files[i].layout = 'mimir.hbs' } + done(); + }) + ) + // Render outer layouts .use(layouts({ engine: "handlebars", - default: "default.hbs" + default: "default.hbs", + directory: "layouts", + partials: "partials" })) ) // Validate diff --git a/layouts/default.hbs b/layouts/default.hbs index 1692d9cd..a14f0049 100644 --- a/layouts/default.hbs +++ b/layouts/default.hbs @@ -1,58 +1,45 @@ - - -
- -+A lot of analytics is based on information that starts off incomplete, is inconsistent, or is simply used incorrectly. Although people find ways of coping with these sources of uncertainty, those ways usually require lots of pain, effort and suffering before the data can be used, even when using automation. +
+Mimir is a database wrapper that helps you to embrace uncertainty rather than trying to fight it. Mimir attaches to a database of your choice using JDBC and provides a suite of lightweight, easy-to-use data cleaning and data analysis tools. +
+Unlike other automated data cleaning systems, Mimir doesn't claim that it will clean your data correctly. Instead, whenever you query data cleaned by Mimir, Mimir helps you to understand the choices it had to make, how they could impact your query results, and how confident it is in those results. +
+If you want more reliable results, no problem! Mimir streamlines the process of manual curation, focusing you on those parts of the data that need it most. +
+ +Documentation will be posted soon.
++Curating data, or making sure that it is correct, consistent, and complete +can be very slow and expensive. Most of this effort is often wasted, since +only a small portion of the curated data will ever be relevant to analysts +using it. Unfortunately, without basing an analysis on trustworthy, curated +data, it's currently foolish to trust the analysis' results. Our +on-demand certainty effort links query results to potential sources of +uncertainty that could affect them using a provenance model called Virtual +C-Tables. Seeing the impact of uncertainty can help analysts to evaluate +the quality and trustworthiness of those results. +
+ ++Mimir is built around a probabilistic database system. Classical +deterministic databases assume that all of your data is fixed: Every +cell has exactly one value, and every table has a fixed set of rows in it. +Probabilistic databases instead track multiple possibilities: for example +the results of OCR software parsing a glyph as being either a 4 or a 9. +That could be useful, but no one really wants to move their data to an +entirely new database system. We're exploring ways to enable probabilistic +database functionality within existing deterministic database engines, +allowing legacy database applications to transparently co-exist with +probability-aware applications. +
+ ++Quantitative metrics like standard deviations and probabilities help to +measure how reliable query results are, but don't really provide a good +sense of why the results aren't reliable or what can be done to fix them. +Mimir can provide users with a list of explanations of why a particular +result is uncertain, and rank that list in order of relevance. We are +exploring what contextual cues make an explanation relevant, and ways of +efficiently ranking explanations in bulk. +
diff --git a/src/research/mimir/index.md b/src/research/mimir/index.md deleted file mode 100644 index ab65b7c3..00000000 --- a/src/research/mimir/index.md +++ /dev/null @@ -1,72 +0,0 @@ ---- -title: Mimir ---- -