Skip to content

Commit 7617f89

Browse files
author
Tania Allard
committed
Add solution package
1 parent b254e59 commit 7617f89

15 files changed

Lines changed: 220 additions & 0 deletions

File tree

.gitignore/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,3 +102,4 @@ ENV/
102102
.mypy_cache/
103103

104104
/data
105+
\.DS_Store

03_ProcessData.ipynb

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,24 @@
157157
"```\n"
158158
]
159159
},
160+
{
161+
"cell_type": "markdown",
162+
"metadata": {
163+
"slideshow": {
164+
"slide_type": "subslide"
165+
}
166+
},
167+
"source": [
168+
"# Packaging\n",
169+
"\n",
170+
"Since are using a modular approach we need to ensure we can call our functions from a **run all script**.\n",
171+
"\n",
172+
"From the shell: \n",
173+
"```\n",
174+
"$ touch src/__init__/py\n",
175+
"```"
176+
]
177+
},
160178
{
161179
"cell_type": "code",
162180
"execution_count": 1,
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
#!/usr/bin/env python
2+
3+
"""
4+
Script to create all the results from our
5+
amazing wine study
6+
"""
7+
8+
import numpy as np
9+
import pandas as pd
10+
import matplotlib.pyplot as plt
11+
import importlib
12+
13+
# imports from our package
14+
# Since we have digits at the start of the modules we
15+
# will use dynamic imports
16+
subset = importlib.import_module('.data.01_subset-data-GBP', 'scripts')
17+
plotwines = importlib.import_module('.visualization.02_visualize-wines', 'scripts')
18+
country_sub = importlib.import_module('.data.03_country-subset', 'scripts')
19+
20+
# ------------------------------------------------------------------------
21+
# Declare variables
22+
# ------------------------------------------------------------------------
23+
24+
# Set raw data path
25+
raw_data = "data/raw/winemag-data-130k-v2.csv"
26+
interim_data = "data/interim/2018-05-02-winemag_priceGBP.csv"
27+
28+
# Set country
29+
country = "Chile"
30+
31+
32+
# ------------------------------------------------------------------------
33+
# Perform analysis
34+
# ------------------------------------------------------------------------
35+
36+
if __name__ == '__main__':
37+
subset_file = subset.process_data_GBP(raw_data)
38+
print(subset_file)
39+
plotwines.create_plots(interim_data)
40+
country_file = country_sub.get_country(subset_file, country)
41+
print(country_file)

solutions/scripts/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
from . import data
2+
from . import visualization
Binary file not shown.

solutions/scripts/data/.gitkeep

Whitespace-only changes.
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#!/usr/bin/env python
2+
3+
import sys
4+
import datetime
5+
6+
import pandas as pd
7+
import numpy as np
8+
import matplotlib.pyplot as plt
9+
10+
11+
def process_data_GBP(filename):
12+
"""
13+
Get only the needed subset from the data.
14+
Args:
15+
filename: str
16+
Path to the filename containing the wine data
17+
18+
Returns:
19+
20+
data_path: st
21+
Path to the created data set
22+
"""
23+
24+
# Load table
25+
wine = pd.read_csv(filename)
26+
27+
# Subset of data to keep
28+
wine_keep = wine.loc[:,['country', 'designation', 'points', 'price']]
29+
30+
# Add column with prices in GBP
31+
wine_keep['price_GBP'] = wine_keep['price'].apply(lambda x : x * 1.2)
32+
33+
# Constructing the fname
34+
today = datetime.datetime.today().strftime('%Y-%m-%d')
35+
fname = f'data/interim/{today}-winemag_priceGBP.csv'
36+
37+
# Saving the csv
38+
wine_keep.to_csv(fname)
39+
40+
return(fname)
41+
42+
43+
if __name__ == '__main__':
44+
filename = sys.argv[1]
45+
print(filename)
46+
print(process_data_GBP(filename))
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#!/usr/bin/env python
2+
3+
4+
import sys
5+
import datetime
6+
7+
import pandas as pd
8+
import numpy as np
9+
import matplotlib.pyplot as plt
10+
11+
12+
def get_country(filename, country):
13+
"""
14+
Do a simple analysis per country
15+
Args:
16+
filename: str
17+
Path to the filename containing the wine data
18+
country: str
19+
Country to be used to subset
20+
21+
Returns:
22+
23+
data_path: st
24+
Path to the created data set
25+
"""
26+
27+
# Load table
28+
wine = pd.read_csv(filename)
29+
30+
# Use the country name to subset data
31+
subset_country = wine[wine['country'] == country ].copy()
32+
33+
# Subset the
34+
35+
# Constructing the fname
36+
today = datetime.datetime.today().strftime('%Y-%m-%d')
37+
fname = f'data/processed/{today}-winemag_{country}.csv'
38+
39+
# Saving the csv
40+
subset_country.to_csv(fname)
41+
42+
return(fname)
43+
44+
45+
if __name__ == '__main__':
46+
filename = sys.argv[1]
47+
country = sys.argv[2]
48+
print(f'Subsetting: {filename}')
49+
print(f'Country searched: {country}')
50+
51+
print(get_country(filename, country))

solutions/scripts/data/__init__.py

Whitespace-only changes.

solutions/scripts/external/.gitkeep

Whitespace-only changes.

0 commit comments

Comments
 (0)