Skip to content

Commit

Permalink
feat(clean): Add wiki
Browse files Browse the repository at this point in the history
  • Loading branch information
qidanrui committed Nov 25, 2021
1 parent 93c7d6d commit 7f4ab12
Show file tree
Hide file tree
Showing 38 changed files with 29,640 additions and 1 deletion.
Binary file added .DS_Store
Binary file not shown.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -115,3 +115,4 @@ profiling
.coverage
report.xml
.vim
.DS_Store
Binary file added dataprep/.DS_Store
Binary file not shown.
Binary file added dataprep/clean/.DS_Store
Binary file not shown.
3 changes: 3 additions & 0 deletions dataprep/clean/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@

from .clean_df import clean_df

from .clean_df_gui import clean_df_gui

from .clean_text import clean_text, default_text_pipeline

from .clean_au_abn import clean_au_abn, validate_au_abn
Expand Down Expand Up @@ -376,6 +378,7 @@
"clean_currency",
"validate_currency",
"clean_df",
"clean_df_gui",
"clean_text",
"default_text_pipeline",
"clean_au_abn",
Expand Down
51 changes: 51 additions & 0 deletions dataprep/clean/clean_df_gui.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"""
Conduct a set of operations that would be useful for
cleaning and standardizing a full Pandas DataFrame.
"""
# pylint: disable-msg=relative-beyond-top-level
# pylint: disable-msg=cyclic-import
# type: ignore

from typing import Any

import pandas as pd

from IPython.display import IFrame, display

from dataprep.clean.gui.clean_gui import launch


def clean_df_gui(
df: pd.DataFrame,
) -> Any:
"""
This function shows the GUI of clean module.
Parameters
----------
df
A Pandas DataFrame containing the data to be cleaned.
"""
# pylint: disable=too-many-arguments
# pylint: disable-msg=too-many-locals
# pylint:disable=too-many-branches
# type: ignore

return UserInterface(df).display()


class UserInterface:
"""
A user interface used by clean module.
"""

# pylint: disable=too-many-instance-attributes
def __init__(self, df: pd.DataFrame):
self.df = df

def display(self) -> None:
"""Display the GUI."""
launch(self.df)

path_to_local_server = "http://localhost:7680"
display(IFrame(path_to_local_server, width=900, height=500))
Binary file added dataprep/clean/gui/.DS_Store
Binary file not shown.
12 changes: 12 additions & 0 deletions dataprep/clean/gui/clean_frontend/.babelrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"presets": [
["env", {
"modules": false,
"targets": {
"browsers": ["> 1%", "last 2 versions", "not ie <= 8"]
}
}],
"stage-2"
],
"plugins": ["transform-vue-jsx", "transform-runtime"]
}
9 changes: 9 additions & 0 deletions dataprep/clean/gui/clean_frontend/.editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
root = true

[*]
charset = utf-8
indent_style = space
indent_size = 2
end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true
19 changes: 19 additions & 0 deletions dataprep/clean/gui/clean_frontend/.eslintrc.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
module.exports = {
env: {
browser: true,
es2021: true,
},
extends: [
'plugin:vue/essential',
'airbnb-base',
],
parserOptions: {
ecmaVersion: 13,
sourceType: 'module',
},
plugins: [
'vue',
],
rules: {
},
};
13 changes: 13 additions & 0 deletions dataprep/clean/gui/clean_frontend/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
.DS_Store
node_modules/
npm-debug.log*
yarn-debug.log*
yarn-error.log*

# Editor directories and files
.idea
.vscode
*.suo
*.ntvs*
*.njsproj
*.sln
10 changes: 10 additions & 0 deletions dataprep/clean/gui/clean_frontend/.postcssrc.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// https://github.com/michael-ciniawsky/postcss-load-config

module.exports = {
"plugins": {
"postcss-import": {},
"postcss-url": {},
// to edit target browsers: use "browserslist" field in package.json
"autoprefixer": {}
}
}
21 changes: 21 additions & 0 deletions dataprep/clean/gui/clean_frontend/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# clean_frontend

> A Vue.js project
## Build Setup

``` bash
# install dependencies
npm install

# serve with hot reload at localhost:8080
npm run dev

# build for production with minification
npm run build

# build for production and view the bundle analyzer report
npm run build --report
```

For a detailed explanation on how things work, check out the [guide](http://vuejs-templates.github.io/webpack/) and [docs for vue-loader](http://vuejs.github.io/vue-loader).
40 changes: 40 additions & 0 deletions dataprep/clean/gui/clean_frontend/build_frontend/build.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
require('./check-versions')();

process.env.NODE_ENV = 'production';

const ora = require('ora');
const rm = require('rimraf');
const path = require('path');
const chalk = require('chalk');
const webpack = require('webpack');
const config = require('../config');
const webpackConfig = require('./webpack.prod.conf');

const spinner = ora('building for production...');
spinner.start();

rm(path.join(config.build.assetsRoot, config.build.assetsSubDirectory), (err) => {
if (err) throw err;
webpack(webpackConfig, (err, stats) => {
spinner.stop();
if (err) throw err;
process.stdout.write(`${stats.toString({
colors: true,
modules: false,
children: false, // If you are using ts-loader, setting this to true will make TypeScript errors show up during build.
chunks: false,
chunkModules: false,
})}\n\n`);

if (stats.hasErrors()) {
console.log(chalk.red(' Build failed with errors.\n'));
process.exit(1);
}

console.log(chalk.cyan(' Build complete.\n'));
console.log(chalk.yellow(
' Tip: built files are meant to be served over an HTTP server.\n'
+ ' Opening index.html over file:// won"t work.\n',
));
});
});
52 changes: 52 additions & 0 deletions dataprep/clean/gui/clean_frontend/build_frontend/check-versions.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
const chalk = require('chalk');
const semver = require('semver');
const shell = require('shelljs');
const packageConfig = require('../package.json');

function exec(cmd) {
return require('child_process').execSync(cmd).toString().trim();
}

const versionRequirements = [
{
name: 'node',
currentVersion: semver.clean(process.version),
versionRequirement: packageConfig.engines.node,
},
];

if (shell.which('npm')) {
versionRequirements.push({
name: 'npm',
currentVersion: exec('npm --version'),
versionRequirement: packageConfig.engines.npm,
});
}

module.exports = function () {
const warnings = [];

for (let i = 0; i < versionRequirements.length; i++) {
const mod = versionRequirements[i];

if (!semver.satisfies(mod.currentVersion, mod.versionRequirement)) {
warnings.push(`${mod.name}: ${
chalk.red(mod.currentVersion)} should be ${
chalk.green(mod.versionRequirement)}`);
}
}

if (warnings.length) {
console.log('');
console.log(chalk.yellow('To use this template, you must update following to modules:'));
console.log();

for (let i = 0; i < warnings.length; i++) {
const warning = warnings[i];
console.log(` ${warning}`);
}

console.log();
process.exit(1);
}
};
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
97 changes: 97 additions & 0 deletions dataprep/clean/gui/clean_frontend/build_frontend/utils.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
const path = require('path');
const ExtractTextPlugin = require('extract-text-webpack-plugin');
const config = require('../config');
const packageConfig = require('../package.json');

exports.assetsPath = function (_path) {
const assetsSubDirectory = process.env.NODE_ENV === 'production'
? config.build.assetsSubDirectory
: config.dev.assetsSubDirectory;

return path.posix.join(assetsSubDirectory, _path);
};

exports.cssLoaders = function (options) {
options = options || {};

const cssLoader = {
loader: 'css-loader',
options: {
sourceMap: options.sourceMap,
},
};

const postcssLoader = {
loader: 'postcss-loader',
options: {
sourceMap: options.sourceMap,
},
};

// generate loader string to be used with extract text plugin
function generateLoaders(loader, loaderOptions) {
const loaders = options.usePostCSS ? [cssLoader, postcssLoader] : [cssLoader];

if (loader) {
loaders.push({
loader: `${loader}-loader`,
options: { ...loaderOptions, sourceMap: options.sourceMap },
});
}

// Extract CSS when that option is specified
// (which is the case during production build)
if (options.extract) {
return ExtractTextPlugin.extract({
use: loaders,
fallback: 'vue-style-loader',
});
}
return ['vue-style-loader'].concat(loaders);
}

// https://vue-loader.vuejs.org/en/configurations/extract-css.html
return {
css: generateLoaders(),
postcss: generateLoaders(),
less: generateLoaders('less'),
sass: generateLoaders('sass', { indentedSyntax: true }),
scss: generateLoaders('sass'),
stylus: generateLoaders('stylus'),
styl: generateLoaders('stylus'),
};
};

// Generate loaders for standalone style files (outside of .vue)
exports.styleLoaders = function (options) {
const output = [];
const loaders = exports.cssLoaders(options);

for (const extension in loaders) {
const loader = loaders[extension];
output.push({
test: new RegExp(`\\.${extension}$`),
use: loader,
});
}

return output;
};

exports.createNotifierCallback = () => {
const notifier = require('node-notifier');

return (severity, errors) => {
if (severity !== 'error') return;

const error = errors[0];
const filename = error.file && error.file.split('!').pop();

notifier.notify({
title: packageConfig.name,
message: `${severity}: ${error.name}`,
subtitle: filename || '',
icon: path.join(__dirname, 'logo.png'),
});
};
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
const utils = require('./utils');
const config = require('../config');

const isProduction = process.env.NODE_ENV === 'production';
const sourceMapEnabled = isProduction
? config.build.productionSourceMap
: config.dev.cssSourceMap;

module.exports = {
loaders: utils.cssLoaders({
sourceMap: sourceMapEnabled,
extract: isProduction,
}),
cssSourceMap: sourceMapEnabled,
cacheBusting: config.dev.cacheBusting,
transformToRequire: {
video: ['src', 'poster'],
source: 'src',
img: 'src',
image: 'xlink:href',
},
};
Loading

1 comment on commit 7f4ab12

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DataPrep.EDA Benchmarks

Benchmark suite Current: 7f4ab12 Previous: 93c7d6d Ratio
dataprep/tests/benchmarks/eda.py::test_create_report 0.17390831148452232 iter/sec (stddev: 0.031510332882753754) 0.20197387127875202 iter/sec (stddev: 0.028998817934387633) 1.16

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.