source
show_data
show_data ()
Lists all the data available with the package
source
get_dataset
get_dataset (task='regression', data_of='palmerpenguins',
description=True, return_single_df=True, target_name=None)
task |
str |
regression |
define the need of task |
data_of |
str |
palmerpenguins |
express which data is needed |
description |
bool |
True |
weather pass description as and added output |
return_single_df |
bool |
True |
return a single data frame or provide X,y behavior as with scikit-learn. |
target_name |
NoneType |
None |
if target name is none the original mentioned target name would be used. |
Currently available datasets are:
['adult', 'palmerpenguins-raw', 'palmerpenguins', 'california', 'titanic']
# get_dataset(data_of='titanic').head()
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
1 |
0.0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
NaN |
S |
1 |
2 |
1.0 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
2 |
3 |
1.0 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
NaN |
S |
3 |
4 |
1.0 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
4 |
5 |
0.0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
NaN |
S |
# get_dataset(data_of='california').head()
|
longitude |
latitude |
housing_median_age |
total_rooms |
total_bedrooms |
population |
households |
median_income |
median_house_value |
ocean_proximity |
0 |
-122.23 |
37.88 |
41.0 |
880.0 |
129.0 |
322.0 |
126.0 |
8.3252 |
452600.0 |
NEAR BAY |
1 |
-122.22 |
37.86 |
21.0 |
7099.0 |
1106.0 |
2401.0 |
1138.0 |
8.3014 |
358500.0 |
NEAR BAY |
2 |
-122.24 |
37.85 |
52.0 |
1467.0 |
190.0 |
496.0 |
177.0 |
7.2574 |
352100.0 |
NEAR BAY |
3 |
-122.25 |
37.85 |
52.0 |
1274.0 |
235.0 |
558.0 |
219.0 |
5.6431 |
341300.0 |
NEAR BAY |
4 |
-122.25 |
37.85 |
52.0 |
1627.0 |
280.0 |
565.0 |
259.0 |
3.8462 |
342200.0 |
NEAR BAY |
# get_dataset(data_of='palmerpenguins-raw').head()
Index(['studyName', 'Sample Number', 'Species', 'Region', 'Island', 'Stage',
'Individual ID', 'Clutch Completion', 'Date Egg', 'Culmen Length (mm)',
'Culmen Depth (mm)', 'Flipper Length (mm)', 'Body Mass (g)', 'Sex',
'Delta 15 N (o/oo)', 'Delta 13 C (o/oo)', 'Comments'],
dtype='object')
# get_dataset(data_of='palmerpenguins').head()
|
species |
island |
bill_length_mm |
bill_depth_mm |
flipper_length_mm |
body_mass_g |
sex |
year |
0 |
Adelie Penguin (Pygoscelis adeliae) |
Torgersen |
39.1 |
18.7 |
181.0 |
3750.0 |
MALE |
2007-11-11 |
1 |
Adelie Penguin (Pygoscelis adeliae) |
Torgersen |
39.5 |
17.4 |
186.0 |
3800.0 |
FEMALE |
2007-11-11 |
2 |
Adelie Penguin (Pygoscelis adeliae) |
Torgersen |
40.3 |
18.0 |
195.0 |
3250.0 |
FEMALE |
2007-11-16 |
3 |
Adelie Penguin (Pygoscelis adeliae) |
Torgersen |
NaN |
NaN |
NaN |
NaN |
NaN |
2007-11-16 |
4 |
Adelie Penguin (Pygoscelis adeliae) |
Torgersen |
36.7 |
19.3 |
193.0 |
3450.0 |
FEMALE |
2007-11-16 |
# get_dataset(data_of='adult').head()
|
age |
workclass |
fnlwgt |
education |
education_num |
marital_status |
occupation |
relationship |
race |
sex |
capital_gain |
capital_loss |
hours_per_week |
native_country |
target |
0 |
39 |
State-gov |
77516 |
Bachelors |
13 |
Never-married |
Adm-clerical |
Not-in-family |
White |
Male |
2174 |
0 |
40 |
United-States |
<=50K |
1 |
50 |
Self-emp-not-inc |
83311 |
Bachelors |
13 |
Married-civ-spouse |
Exec-managerial |
Husband |
White |
Male |
0 |
0 |
13 |
United-States |
<=50K |
2 |
38 |
Private |
215646 |
HS-grad |
9 |
Divorced |
Handlers-cleaners |
Not-in-family |
White |
Male |
0 |
0 |
40 |
United-States |
<=50K |
3 |
53 |
Private |
234721 |
11th |
7 |
Married-civ-spouse |
Handlers-cleaners |
Husband |
Black |
Male |
0 |
0 |
40 |
United-States |
<=50K |
4 |
28 |
Private |
338409 |
Bachelors |
13 |
Married-civ-spouse |
Prof-specialty |
Wife |
Black |
Female |
0 |
0 |
40 |
Cuba |
<=50K |