Skip to content

Commit 0980673

Browse files
authored
Finish pep8 compliance (#630)
* PEP8 remove pep8 violations * Typo.
1 parent 9474029 commit 0980673

9 files changed

Lines changed: 55 additions & 180 deletions

File tree

ci_scripts/flake8_diff.sh

Lines changed: 1 addition & 154 deletions
Original file line numberDiff line numberDiff line change
@@ -1,156 +1,3 @@
11
#!/bin/bash
22

3-
# Inspired from https://github.com/scikit-learn/scikit-learn/blob/master/build_tools/travis/flake8_diff.sh
4-
5-
# This script is used in Travis to check that PRs do not add obvious
6-
# flake8 violations. It relies on two things:
7-
# - find common ancestor between branch and
8-
# openml/openml-python remote
9-
# - run flake8 --diff on the diff between the branch and the common
10-
# ancestor
11-
#
12-
# Additional features:
13-
# - the line numbers in Travis match the local branch on the PR
14-
# author machine.
15-
# - ./ci_scripts/flake8_diff.sh can be run locally for quick
16-
# turn-around
17-
18-
set -e
19-
# pipefail is necessary to propagate exit codes
20-
set -o pipefail
21-
22-
PROJECT=openml/openml-python
23-
PROJECT_URL=https://github.com/$PROJECT.git
24-
25-
# Find the remote with the project name (upstream in most cases)
26-
REMOTE=$(git remote -v | grep $PROJECT | cut -f1 | head -1 || echo '')
27-
28-
# Add a temporary remote if needed. For example this is necessary when
29-
# Travis is configured to run in a fork. In this case 'origin' is the
30-
# fork and not the reference repo we want to diff against.
31-
if [[ -z "$REMOTE" ]]; then
32-
TMP_REMOTE=tmp_reference_upstream
33-
REMOTE=$TMP_REMOTE
34-
git remote add $REMOTE $PROJECT_URL
35-
fi
36-
37-
echo "Remotes:"
38-
echo '--------------------------------------------------------------------------------'
39-
git remote --verbose
40-
41-
echo "Travis variables:"
42-
echo '--------------------------------------------------------------------------------'
43-
echo "On travis: $TRAVIS"
44-
echo "Current branch: $TRAVIS_BRANCH"
45-
echo "Is a pull request test: $TRAVIS_PULL_REQUEST"
46-
echo "Repository: $TRAVIS_REPO_SLUG"
47-
48-
# Travis does the git clone with a limited depth (50 at the time of
49-
# writing). This may not be enough to find the common ancestor with
50-
# $REMOTE/develop so we unshallow the git checkout
51-
if [[ -a .git/shallow ]]; then
52-
echo -e '\nTrying to unshallow the repo:'
53-
echo '--------------------------------------------------------------------------------'
54-
git fetch --unshallow
55-
fi
56-
57-
if [[ "$TRAVIS" == "true" ]]; then
58-
if [[ "$TRAVIS_BRANCH" == "master" ]]
59-
then
60-
# We do not test PEP8 on the master branch (or for the PR test into
61-
# master) as this results in failures which are only shown for the
62-
# pull request to finish a release (development to master) and are
63-
# therefore a pain to fix
64-
exit 0
65-
fi
66-
if [[ "$TRAVIS_PULL_REQUEST" == "false" ]]
67-
then
68-
# In main repo, using TRAVIS_COMMIT_RANGE to test the commits
69-
# that were pushed into a branch
70-
if [[ "$PROJECT" == "$TRAVIS_REPO_SLUG" ]]; then
71-
if [[ -z "$TRAVIS_COMMIT_RANGE" ]]; then
72-
echo "New branch, no commit range from Travis so passing this test by convention"
73-
exit 0
74-
fi
75-
COMMIT_RANGE=$TRAVIS_COMMIT_RANGE
76-
fi
77-
else
78-
# We want to fetch the code as it is in the PR branch and not
79-
# the result of the merge into develop. This way line numbers
80-
# reported by Travis will match with the local code.
81-
LOCAL_BRANCH_REF=travis_pr_$TRAVIS_PULL_REQUEST
82-
# In Travis the PR target is always origin
83-
git fetch origin pull/$TRAVIS_PULL_REQUEST/head:refs/$LOCAL_BRANCH_REF
84-
fi
85-
fi
86-
87-
# If not using the commit range from Travis we need to find the common
88-
# ancestor between $LOCAL_BRANCH_REF and $REMOTE/develop
89-
if [[ -z "$COMMIT_RANGE" ]]; then
90-
if [[ -z "$LOCAL_BRANCH_REF" ]]; then
91-
LOCAL_BRANCH_REF=$(git rev-parse --abbrev-ref HEAD)
92-
fi
93-
echo -e "\nLast 2 commits in $LOCAL_BRANCH_REF:"
94-
echo '--------------------------------------------------------------------------------'
95-
git --no-pager log -2 $LOCAL_BRANCH_REF
96-
97-
REMOTE_DEV_REF="$REMOTE/develop"
98-
# Make sure that $REMOTE_DEV_REF is a valid reference
99-
echo -e "\nFetching $REMOTE_DEV_REF"
100-
echo '--------------------------------------------------------------------------------'
101-
git fetch $REMOTE develop:refs/remotes/$REMOTE_DEV_REF
102-
LOCAL_BRANCH_SHORT_HASH=$(git rev-parse --short $LOCAL_BRANCH_REF)
103-
REMOTE_DEV_SHORT_HASH=$(git rev-parse --short $REMOTE_DEV_REF)
104-
105-
COMMIT=$(git merge-base $LOCAL_BRANCH_REF $REMOTE_DEV_REF) || \
106-
echo "No common ancestor found for $(git show $LOCAL_BRANCH_REF -q) and $(git show $REMOTE_DEV_REF -q)"
107-
108-
if [ -z "$COMMIT" ]; then
109-
exit 1
110-
fi
111-
112-
COMMIT_SHORT_HASH=$(git rev-parse --short $COMMIT)
113-
114-
echo -e "\nCommon ancestor between $LOCAL_BRANCH_REF ($LOCAL_BRANCH_SHORT_HASH)"\
115-
"and $REMOTE_DEV_REF ($REMOTE_DEV_SHORT_HASH) is $COMMIT_SHORT_HASH:"
116-
echo '--------------------------------------------------------------------------------'
117-
git --no-pager show --no-patch $COMMIT_SHORT_HASH
118-
119-
COMMIT_RANGE="$COMMIT_SHORT_HASH..$LOCAL_BRANCH_SHORT_HASH"
120-
121-
if [[ -n "$TMP_REMOTE" ]]; then
122-
git remote remove $TMP_REMOTE
123-
fi
124-
125-
else
126-
echo "Got the commit range from Travis: $COMMIT_RANGE"
127-
fi
128-
129-
echo -e '\nRunning flake8 on the diff in the range' "$COMMIT_RANGE" \
130-
"($(git rev-list $COMMIT_RANGE | wc -l) commit(s)):"
131-
echo '--------------------------------------------------------------------------------'
132-
# We need the following command to exit with 0 hence the echo in case
133-
# there is no match
134-
MODIFIED_FILES="$(git diff --no-ext-diff --name-only $COMMIT_RANGE || echo "no_match")"
135-
136-
check_files() {
137-
files="$1"
138-
shift
139-
options="$*"
140-
if [ -n "$files" ]; then
141-
# Conservative approach: diff without context (--unified=0) so that code
142-
# that was not changed does not create failures
143-
# git diff --no-ext-diff --unified=0 $COMMIT_RANGE -- $files | flake8 --ignore E402 --diff --show-source $options
144-
flake8 --ignore E402,W503 --show-source --max-line-length 100 $options
145-
fi
146-
}
147-
148-
if [[ "$MODIFIED_FILES" == "no_match" ]]; then
149-
echo "No file has been modified"
150-
else
151-
152-
check_files "$(echo "$MODIFIED_FILES" | grep -v ^examples)"
153-
check_files "$(echo "$MODIFIED_FILES" | grep ^examples)" \
154-
--config ./examples/.flake8
155-
fi
156-
echo -e "No problem detected by flake8\n"
3+
flake8 --ignore E402,W503 --show-source --max-line-length 100 $options

examples/datasets_tutorial.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454

5555
############################################################################
5656
# Get the actual data.
57-
#
57+
#
5858
# Returned as numpy array, with meta-info
5959
# (e.g. target feature, feature names, ...)
6060
X, y, attribute_names = dataset.get_data(

examples/flows_and_runs_tutorial.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,8 @@
5858
############################################################################
5959
# Share the run on the OpenML server
6060
#
61-
# So far the run is only available locally. By calling the publish function, the run is sent to the OpenML server:
61+
# So far the run is only available locally. By calling the publish function,
62+
# the run is sent to the OpenML server:
6263

6364
myrun = run.publish()
6465
# For this tutorial, our configuration publishes to the test server
@@ -96,11 +97,16 @@
9697
# compare your results with the rest of the class and learn from
9798
# them. Some tasks you could try (or browse openml.org):
9899
#
99-
# * EEG eye state: data_id:`1471 <http://www.openml.org/d/1471>`_, task_id:`14951 <http://www.openml.org/t/14951>`_
100-
# * Volcanoes on Venus: data_id:`1527 <http://www.openml.org/d/1527>`_, task_id:`10103 <http://www.openml.org/t/10103>`_
101-
# * Walking activity: data_id:`1509 <http://www.openml.org/d/1509>`_, task_id:`9945 <http://www.openml.org/t/9945>`_, 150k instances.
102-
# * Covertype (Satellite): data_id:`150 <http://www.openml.org/d/150>`_, task_id:`218 <http://www.openml.org/t/218>`_, 500k instances.
103-
# * Higgs (Physics): data_id:`23512 <http://www.openml.org/d/23512>`_, task_id:`52950 <http://www.openml.org/t/52950>`_, 100k instances, missing values.
100+
# * EEG eye state: data_id:`1471 <http://www.openml.org/d/1471>`_,
101+
# task_id:`14951 <http://www.openml.org/t/14951>`_
102+
# * Volcanoes on Venus: data_id:`1527 <http://www.openml.org/d/1527>`_,
103+
# task_id:`10103 <http://www.openml.org/t/10103>`_
104+
# * Walking activity: data_id:`1509 <http://www.openml.org/d/1509>`_,
105+
# task_id:`9945 <http://www.openml.org/t/9945>`_, 150k instances.
106+
# * Covertype (Satellite): data_id:`150 <http://www.openml.org/d/150>`_,
107+
# task_id:`218 <http://www.openml.org/t/218>`_, 500k instances.
108+
# * Higgs (Physics): data_id:`23512 <http://www.openml.org/d/23512>`_,
109+
# task_id:`52950 <http://www.openml.org/t/52950>`_, 100k instances, missing values.
104110

105111
# Easy benchmarking:
106112
for task_id in [115, ]: # Add further tasks. Disclaimer: they might take some time

examples/introduction_tutorial.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,21 +23,27 @@
2323
#
2424
# pip install openml
2525
#
26-
# For further information, please check out the installation guide at https://openml.github.io/openml-python/stable/contributing.html#installation
26+
# For further information, please check out the installation guide at
27+
# https://openml.github.io/openml-python/master/contributing.html#installation
2728
#
2829
# Authentication
2930
# ^^^^^^^^^^^^^^
3031
#
31-
# The OpenML server can only be accessed by users who have signed up on the OpenML platform. If you don’t have an account yet, sign up now.
32-
# You will receive an API key, which will authenticate you to the server and allow you to download and upload datasets, tasks, runs and flows.
32+
# The OpenML server can only be accessed by users who have signed up on the
33+
# OpenML platform. If you don’t have an account yet, sign up now.
34+
# You will receive an API key, which will authenticate you to the server
35+
# and allow you to download and upload datasets, tasks, runs and flows.
3336
#
3437
# * Create an OpenML account (free) on http://www.openml.org.
3538
# * After logging in, open your account page (avatar on the top right)
3639
# * Open 'Account Settings', then 'API authentication' to find your API key.
3740
#
3841
# There are two ways to authenticate:
3942
#
40-
# * Create a plain text file **~/.openml/config** with the line **'apikey=MYKEY'**, replacing **MYKEY** with your API key. The config file must be in the directory ~/.openml/config and exist prior to importing the openml module
43+
# * Create a plain text file **~/.openml/config** with the line
44+
# **'apikey=MYKEY'**, replacing **MYKEY** with your API key. The config
45+
# file must be in the directory ~/.openml/config and exist prior to
46+
# importing the openml module.
4147
# * Run the code below, replacing 'YOURKEY' with your API key.
4248

4349
############################################################################
@@ -50,13 +56,18 @@
5056
############################################################################
5157
# Caching
5258
# ^^^^^^^
53-
# When downloading datasets, tasks, runs and flows, they will be cached to retrieve them without calling the server later. As with the API key, the cache directory can be either specified through the config file or through the API:
59+
# When downloading datasets, tasks, runs and flows, they will be cached to
60+
# retrieve them without calling the server later. As with the API key,
61+
# the cache directory can be either specified through the config file or
62+
# through the API:
5463
#
55-
# * Add the line **cachedir = 'MYDIR'** to the config file, replacing 'MYDIR' with the path to the cache directory. By default, OpenML will use **~/.openml/cache** as the cache directory.
64+
# * Add the line **cachedir = 'MYDIR'** to the config file, replacing
65+
# 'MYDIR' with the path to the cache directory. By default, OpenML
66+
# will use **~/.openml/cache** as the cache directory.
5667
# * Run the code below, replacing 'YOURDIR' with the path to the cache directory.
5768

58-
import os
5969
# Uncomment and set your OpenML cache directory
70+
# import os
6071
# openml.config.cache_directory = os.path.expanduser('YOURDIR')
6172

6273
############################################################################

examples/run_setup_tutorial.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
2) Download the flow, reinstantiate the model with same hyperparameters,
2525
and solve the same task again;
2626
3) We will verify that the obtained results are exactly the same.
27+
2728
"""
2829
import logging
2930
import numpy as np
@@ -75,8 +76,7 @@
7576
run_original = run.publish() # this implicitly uploads the flow
7677

7778
###############################################################################
78-
# 2) Download the flow, reinstantiate the model with same hyperparameters,
79-
# and solve the same task again.
79+
# 2) Download the flow and solve the same task again.
8080
###############################################################################
8181

8282
# obtain setup id (note that the setup id is assigned by the OpenML server -

examples/tasks_tutorial.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,16 @@
1313
#
1414
# Tasks are identified by IDs and can be accessed in two different ways:
1515
#
16-
# 1. In a list providing basic information on all tasks available on OpenML. This function will not download the actual tasks, but will instead download meta data that can be used to filter the tasks and retrieve a set of IDs. We can filter this list, for example, we can only list tasks having a special tag or only tasks for a specific target such as *supervised classification*.
16+
# 1. In a list providing basic information on all tasks available on OpenML.
17+
# This function will not download the actual tasks, but will instead download
18+
# meta data that can be used to filter the tasks and retrieve a set of IDs.
19+
# We can filter this list, for example, we can only list tasks having a
20+
# special tag or only tasks for a specific target such as
21+
# *supervised classification*.
1722
#
18-
# 2. A single task by its ID. It contains all meta information, the target metric, the splits and an iterator which can be used to access the splits in a useful manner.
23+
# 2. A single task by its ID. It contains all meta information, the target
24+
# metric, the splits and an iterator which can be used to access the
25+
# splits in a useful manner.
1926

2027
############################################################################
2128
# Listing tasks
@@ -36,7 +43,8 @@
3643
pprint(tasks.head())
3744

3845
############################################################################
39-
# We can filter the list of tasks to only contain datasets with more than 500 samples, but less than 1000 samples:
46+
# We can filter the list of tasks to only contain datasets with more than
47+
# 500 samples, but less than 1000 samples:
4048

4149
filtered_tasks = tasks.query('NumberOfInstances > 500 and NumberOfInstances < 1000')
4250
print(list(filtered_tasks.index))
@@ -58,7 +66,8 @@
5866
print(len(filtered_tasks))
5967

6068
############################################################################
61-
# Resampling strategies can be found on the `OpenML Website <http://www.openml.org/search?type=measure&q=estimation%20procedure>`_.
69+
# Resampling strategies can be found on the
70+
# `OpenML Website <http://www.openml.org/search?type=measure&q=estimation%20procedure>`_.
6271
#
6372
# Similar to listing tasks by task type, we can list tasks by tags:
6473

@@ -111,7 +120,9 @@
111120
# Downloading tasks
112121
# ^^^^^^^^^^^^^^^^^
113122
#
114-
# We provide two functions to download tasks, one which downloads only a single task by its ID, and one which takes a list of IDs and downloads all of these tasks:
123+
# We provide two functions to download tasks, one which downloads only a
124+
# single task by its ID, and one which takes a list of IDs and downloads
125+
# all of these tasks:
115126

116127
task_id = 1
117128
task = openml.tasks.get_task(task_id)
@@ -127,5 +138,3 @@
127138
ids = [1, 2, 19, 97, 403]
128139
tasks = openml.tasks.get_tasks(ids)
129140
pprint(tasks[0])
130-
131-

openml/datasets/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from .functions import (
2+
attributes_arff_from_df,
23
check_datasets_active,
34
create_dataset,
45
get_dataset,
@@ -10,6 +11,7 @@
1011
from .data_feature import OpenMLDataFeature
1112

1213
__all__ = [
14+
'attributes_arff_from_df',
1315
'check_datasets_active',
1416
'create_dataset',
1517
'get_dataset',

tests/test_study/test_study_functions.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,12 +148,12 @@ def test_study_attach_illegal(self):
148148
study_id = study.publish()
149149
study_original = openml.study.get_study(study_id)
150150

151-
with self.assertRaisesRegex(openml.exceptions.OpenMLServerException,
151+
with self.assertRaisesRegex(openml.exceptions.OpenMLServerException,
152152
'Problem attaching entities.'):
153153
# run id does not exists
154154
openml.study.attach_to_study(study_id, [0])
155155

156-
with self.assertRaisesRegex(openml.exceptions.OpenMLServerException,
156+
with self.assertRaisesRegex(openml.exceptions.OpenMLServerException,
157157
'Problem attaching entities.'):
158158
# some runs already attached
159159
openml.study.attach_to_study(study_id, list(run_list_more.keys()))

tests/test_utils/test_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def test_list_datasets_with_high_size_parameter(self):
4747
datasets_b = openml.datasets.list_datasets(size=np.inf)
4848

4949
# note that in the meantime the number of datasets could have increased
50-
# due to tests that run in parralel.
50+
# due to tests that run in parallel.
5151
self.assertGreaterEqual(len(datasets_b), len(datasets_a))
5252

5353
def test_list_all_for_tasks(self):

0 commit comments

Comments
 (0)