import os # interact with system
import numpy as np # manipulation
import pandas as pd # manipulation
import statsmodels as stm # statistics
import seaborn as sea # visualization
#import matplotlib as mpl # "
import matplotlib.pyplot as plt # "
# change working directory
os.chdir("/home/heitor/ProjetosPy/ML_PyPratice/Basic")
# import the csv file as "dd"
dd = pd.read_csv("pokemon.csv")
dd # general
| name | type1 | hp | attack | defense | |
|---|---|---|---|---|---|
| 793 | Buzzwole | bug | 107 | 139 | 139 |
| 468 | Yanmega | bug | 86 | 76 | 86 |
| 636 | Volcarona | bug | 85 | 60 | 65 |
| 213 | Heracross | bug | 80 | 185 | 115 |
| 616 | Accelgor | bug | 80 | 70 | 40 |
| ... | ... | ... | ... | ... | ... |
| 97 | Krabby | water | 30 | 105 | 90 |
| 115 | Horsea | water | 30 | 40 | 70 |
| 119 | Staryu | water | 30 | 45 | 55 |
| 128 | Magikarp | water | 20 | 10 | 55 |
| 348 | Feebas | water | 20 | 15 | 20 |
801 rows × 5 columns
dd.head(5) # first 5 lines
| abilities | against_bug | against_dark | against_dragon | against_electric | against_fairy | against_fight | against_fire | against_flying | against_ghost | ... | percentage_male | pokedex_number | sp_attack | sp_defense | speed | type1 | type2 | weight_kg | generation | is_legendary | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | ['Overgrow', 'Chlorophyll'] | 1.0 | 1.0 | 1.0 | 0.5 | 0.5 | 0.5 | 2.0 | 2.0 | 1.0 | ... | 88.1 | 1 | 65 | 65 | 45 | grass | poison | 6.9 | 1 | 0 |
| 1 | ['Overgrow', 'Chlorophyll'] | 1.0 | 1.0 | 1.0 | 0.5 | 0.5 | 0.5 | 2.0 | 2.0 | 1.0 | ... | 88.1 | 2 | 80 | 80 | 60 | grass | poison | 13.0 | 1 | 0 |
| 2 | ['Overgrow', 'Chlorophyll'] | 1.0 | 1.0 | 1.0 | 0.5 | 0.5 | 0.5 | 2.0 | 2.0 | 1.0 | ... | 88.1 | 3 | 122 | 120 | 80 | grass | poison | 100.0 | 1 | 0 |
| 3 | ['Blaze', 'Solar Power'] | 0.5 | 1.0 | 1.0 | 1.0 | 0.5 | 1.0 | 0.5 | 1.0 | 1.0 | ... | 88.1 | 4 | 60 | 50 | 65 | fire | NaN | 8.5 | 1 | 0 |
| 4 | ['Blaze', 'Solar Power'] | 0.5 | 1.0 | 1.0 | 1.0 | 0.5 | 1.0 | 0.5 | 1.0 | 1.0 | ... | 88.1 | 5 | 80 | 65 | 80 | fire | NaN | 19.0 | 1 | 0 |
5 rows × 41 columns
dd["abilities"][0:7] # spec variable and lines
0 ['Overgrow', 'Chlorophyll'] 1 ['Overgrow', 'Chlorophyll'] 2 ['Overgrow', 'Chlorophyll'] 3 ['Blaze', 'Solar Power'] 4 ['Blaze', 'Solar Power'] 5 ['Blaze', 'Solar Power'] 6 ['Torrent', 'Rain Dish'] Name: abilities, dtype: object
dd[["speed", "name", "type1"]] # list spec variables
| speed | name | type1 | |
|---|---|---|---|
| 0 | 45 | Bulbasaur | grass |
| 1 | 60 | Ivysaur | grass |
| 2 | 80 | Venusaur | grass |
| 3 | 65 | Charmander | fire |
| 4 | 80 | Charmeleon | fire |
| ... | ... | ... | ... |
| 796 | 61 | Celesteela | steel |
| 797 | 109 | Kartana | grass |
| 798 | 43 | Guzzlord | dark |
| 799 | 79 | Necrozma | psychic |
| 800 | 65 | Magearna | steel |
801 rows × 3 columns
dd.columns # see col titles
Index(['abilities', 'against_bug', 'against_dark', 'against_dragon',
'against_electric', 'against_fairy', 'against_fight', 'against_fire',
'against_flying', 'against_ghost', 'against_grass', 'against_ground',
'against_ice', 'against_normal', 'against_poison', 'against_psychic',
'against_rock', 'against_steel', 'against_water', 'attack',
'base_egg_steps', 'base_happiness', 'base_total', 'capture_rate',
'classfication', 'defense', 'experience_growth', 'height_m', 'hp',
'japanese_name', 'name', 'percentage_male', 'pokedex_number',
'sp_attack', 'sp_defense', 'speed', 'type1', 'type2', 'weight_kg',
'generation', 'is_legendary'],
dtype='object')
dd.dtypes # see col variables types
abilities object against_bug float64 against_dark float64 against_dragon float64 against_electric float64 against_fairy float64 against_fight float64 against_fire float64 against_flying float64 against_ghost float64 against_grass float64 against_ground float64 against_ice float64 against_normal float64 against_poison float64 against_psychic float64 against_rock float64 against_steel float64 against_water float64 attack int64 base_egg_steps int64 base_happiness int64 base_total int64 capture_rate object classfication object defense int64 experience_growth int64 height_m float64 hp int64 japanese_name object name object percentage_male float64 pokedex_number int64 sp_attack int64 sp_defense int64 speed int64 type1 object type2 object weight_kg float64 generation int64 is_legendary int64 dtype: object
dd.describe()
| against_bug | against_dark | against_dragon | against_electric | against_fairy | against_fight | against_fire | against_flying | against_ghost | against_grass | ... | height_m | hp | percentage_male | pokedex_number | sp_attack | sp_defense | speed | weight_kg | generation | is_legendary | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 801.000000 | 801.000000 | 801.000000 | 801.000000 | 801.000000 | 801.000000 | 801.000000 | 801.000000 | 801.000000 | 801.000000 | ... | 781.000000 | 801.000000 | 703.000000 | 801.000000 | 801.000000 | 801.000000 | 801.000000 | 781.000000 | 801.000000 | 801.000000 |
| mean | 0.996255 | 1.057116 | 0.968789 | 1.073970 | 1.068976 | 1.065543 | 1.135456 | 1.192884 | 0.985019 | 1.034020 | ... | 1.163892 | 68.958801 | 55.155761 | 401.000000 | 71.305868 | 70.911361 | 66.334582 | 61.378105 | 3.690387 | 0.087391 |
| std | 0.597248 | 0.438142 | 0.353058 | 0.654962 | 0.522167 | 0.717251 | 0.691853 | 0.604488 | 0.558256 | 0.788896 | ... | 1.080326 | 26.576015 | 20.261623 | 231.373075 | 32.353826 | 27.942501 | 28.907662 | 109.354766 | 1.930420 | 0.282583 |
| min | 0.250000 | 0.250000 | 0.000000 | 0.000000 | 0.250000 | 0.000000 | 0.250000 | 0.250000 | 0.000000 | 0.250000 | ... | 0.100000 | 1.000000 | 0.000000 | 1.000000 | 10.000000 | 20.000000 | 5.000000 | 0.100000 | 1.000000 | 0.000000 |
| 25% | 0.500000 | 1.000000 | 1.000000 | 0.500000 | 1.000000 | 0.500000 | 0.500000 | 1.000000 | 1.000000 | 0.500000 | ... | 0.600000 | 50.000000 | 50.000000 | 201.000000 | 45.000000 | 50.000000 | 45.000000 | 9.000000 | 2.000000 | 0.000000 |
| 50% | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 1.000000 | 65.000000 | 50.000000 | 401.000000 | 65.000000 | 66.000000 | 65.000000 | 27.300000 | 4.000000 | 0.000000 |
| 75% | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 2.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 1.500000 | 80.000000 | 50.000000 | 601.000000 | 91.000000 | 90.000000 | 85.000000 | 64.800000 | 5.000000 | 0.000000 |
| max | 4.000000 | 4.000000 | 2.000000 | 4.000000 | 4.000000 | 4.000000 | 4.000000 | 4.000000 | 4.000000 | 4.000000 | ... | 14.500000 | 255.000000 | 100.000000 | 801.000000 | 194.000000 | 230.000000 | 180.000000 | 999.900000 | 7.000000 | 1.000000 |
8 rows × 34 columns
dd["speed"].describe()
count 801.000000 mean 66.334582 std 28.907662 min 5.000000 25% 45.000000 50% 65.000000 75% 85.000000 max 180.000000 Name: speed, dtype: float64
Lets see the strings variables:
dd["abilities"].describe() # freq: most common value’s frequency
| name | type1 | hp | attack | defense | |
|---|---|---|---|---|---|
| 793 | Buzzwole | bug | 107 | 139 | 139 |
| 468 | Yanmega | bug | 86 | 76 | 86 |
| 636 | Volcarona | bug | 85 | 60 | 65 |
| 213 | Heracross | bug | 80 | 185 | 115 |
| 616 | Accelgor | bug | 80 | 70 | 40 |
| ... | ... | ... | ... | ... | ... |
| 97 | Krabby | water | 30 | 105 | 90 |
| 115 | Horsea | water | 30 | 40 | 70 |
| 119 | Staryu | water | 30 | 45 | 55 |
| 128 | Magikarp | water | 20 | 10 | 55 |
| 348 | Feebas | water | 20 | 15 | 20 |
801 rows × 5 columns
dd["classfication"].describe()
count 801 unique 588 top Dragon Pokémon freq 8 Name: classfication, dtype: object
dd["capture_rate"].describe() # 'capture_rate' should be an int!!
count 801 unique 34 top 45 freq 250 Name: capture_rate, dtype: object
dd['is_legendary'] .value_counts()
0 731 1 70 Name: is_legendary, dtype: int64
dd["type1"].value_counts()
water 114 normal 105 grass 78 bug 72 psychic 53 fire 52 rock 45 electric 39 poison 32 ground 32 dark 29 fighting 28 ghost 27 dragon 27 steel 24 ice 23 fairy 18 flying 3 Name: type1, dtype: int64
dd["type2"].value_counts()
flying 95 poison 34 ground 34 fairy 29 psychic 29 fighting 25 steel 22 dark 21 grass 20 water 17 dragon 17 ice 15 rock 14 ghost 14 fire 13 electric 9 bug 5 normal 4 Name: type2, dtype: int64
Sorting:
dd.sort_values(by=["type1", "hp"],
ascending=[1, 0]) # ascending by type1 but not by hp
| abilities | against_bug | against_dark | against_dragon | against_electric | against_fairy | against_fight | against_fire | against_flying | against_ghost | ... | percentage_male | pokedex_number | sp_attack | sp_defense | speed | type1 | type2 | weight_kg | generation | is_legendary | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 793 | ['Beast Boost'] | 0.5 | 0.5 | 1.0 | 1.0 | 2.0 | 0.50 | 2.0 | 4.0 | 1.0 | ... | NaN | 794 | 53 | 53 | 79 | bug | fighting | 333.6 | 7 | 1 |
| 468 | ['Speed Boost', 'Tinted Lens', 'Frisk'] | 0.5 | 1.0 | 1.0 | 2.0 | 1.0 | 0.25 | 2.0 | 2.0 | 1.0 | ... | 50.0 | 469 | 116 | 56 | 95 | bug | flying | 51.5 | 4 | 0 |
| 636 | ['Flame Body', 'Swarm'] | 0.5 | 1.0 | 1.0 | 1.0 | 0.5 | 0.50 | 1.0 | 2.0 | 1.0 | ... | 50.0 | 637 | 135 | 105 | 100 | bug | fire | 46.0 | 5 | 0 |
| 213 | ['Swarm', 'Guts', 'Moxie'] | 0.5 | 0.5 | 1.0 | 1.0 | 2.0 | 0.50 | 2.0 | 4.0 | 1.0 | ... | 50.0 | 214 | 40 | 105 | 75 | bug | fighting | 54.0 | 2 | 0 |
| 616 | ['Hydration', 'Sticky Hold', 'Unburden'] | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.50 | 2.0 | 2.0 | 1.0 | ... | 50.0 | 617 | 100 | 60 | 145 | bug | NaN | 25.3 | 5 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 97 | ['Hyper Cutter', 'Shell Armor', 'Sheer Force'] | 1.0 | 1.0 | 1.0 | 2.0 | 1.0 | 1.00 | 0.5 | 1.0 | 1.0 | ... | 50.0 | 98 | 25 | 25 | 50 | water | NaN | 6.5 | 1 | 0 |
| 115 | ['Swift Swim', 'Sniper', 'Damp'] | 1.0 | 1.0 | 1.0 | 2.0 | 1.0 | 1.00 | 0.5 | 1.0 | 1.0 | ... | 50.0 | 116 | 70 | 25 | 60 | water | NaN | 8.0 | 1 | 0 |
| 119 | ['Illuminate', 'Natural Cure', 'Analytic'] | 1.0 | 1.0 | 1.0 | 2.0 | 1.0 | 1.00 | 0.5 | 1.0 | 1.0 | ... | NaN | 120 | 70 | 55 | 85 | water | NaN | 34.5 | 1 | 0 |
| 128 | ['Swift Swim', 'Rattled'] | 1.0 | 1.0 | 1.0 | 2.0 | 1.0 | 1.00 | 0.5 | 1.0 | 1.0 | ... | 50.0 | 129 | 15 | 20 | 80 | water | NaN | 10.0 | 1 | 0 |
| 348 | ['Swift Swim', 'Oblivious', 'Adaptability'] | 1.0 | 1.0 | 1.0 | 2.0 | 1.0 | 1.00 | 0.5 | 1.0 | 1.0 | ... | 50.0 | 349 | 10 | 55 | 80 | water | NaN | 7.4 | 3 | 0 |
801 rows × 41 columns
dd[["name", "type1", "hp", "attack", "defense"]].sort_values(
by=["type1", "hp"], ascending=[1, 0])
| name | type1 | hp | attack | defense | |
|---|---|---|---|---|---|
| 793 | Buzzwole | bug | 107 | 139 | 139 |
| 468 | Yanmega | bug | 86 | 76 | 86 |
| 636 | Volcarona | bug | 85 | 60 | 65 |
| 213 | Heracross | bug | 80 | 185 | 115 |
| 616 | Accelgor | bug | 80 | 70 | 40 |
| ... | ... | ... | ... | ... | ... |
| 97 | Krabby | water | 30 | 105 | 90 |
| 115 | Horsea | water | 30 | 40 | 70 |
| 119 | Staryu | water | 30 | 45 | 55 |
| 128 | Magikarp | water | 20 | 10 | 55 |
| 348 | Feebas | water | 20 | 15 | 20 |
801 rows × 5 columns
modify the type of 'capture_rate':
dd["capture_rate"] .astype(int)
dd.loc[dd['capture_rate'] == '30 (Meteorite)255 (Core)']
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) /tmp/ipykernel_23112/14930470.py in <module> ----> 1 dd["capture_rate"] .astype(int) 2 dd.loc[dd['capture_rate'] == '30 (Meteorite)255 (Core)'] ~/.anaconda3/lib/python3.9/site-packages/pandas/core/generic.py in astype(self, dtype, copy, errors) 5813 else: 5814 # else, only a single dtype is given -> 5815 new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors) 5816 return self._constructor(new_data).__finalize__(self, method="astype") 5817 ~/.anaconda3/lib/python3.9/site-packages/pandas/core/internals/managers.py in astype(self, dtype, copy, errors) 416 417 def astype(self: T, dtype, copy: bool = False, errors: str = "raise") -> T: --> 418 return self.apply("astype", dtype=dtype, copy=copy, errors=errors) 419 420 def convert( ~/.anaconda3/lib/python3.9/site-packages/pandas/core/internals/managers.py in apply(self, f, align_keys, ignore_failures, **kwargs) 325 applied = b.apply(f, **kwargs) 326 else: --> 327 applied = getattr(b, f)(**kwargs) 328 except (TypeError, NotImplementedError): 329 if not ignore_failures: ~/.anaconda3/lib/python3.9/site-packages/pandas/core/internals/blocks.py in astype(self, dtype, copy, errors) 589 values = self.values 590 --> 591 new_values = astype_array_safe(values, dtype, copy=copy, errors=errors) 592 593 new_values = maybe_coerce_values(new_values) ~/.anaconda3/lib/python3.9/site-packages/pandas/core/dtypes/cast.py in astype_array_safe(values, dtype, copy, errors) 1307 1308 try: -> 1309 new_values = astype_array(values, dtype, copy=copy) 1310 except (ValueError, TypeError): 1311 # e.g. astype_nansafe can fail on object-dtype of strings ~/.anaconda3/lib/python3.9/site-packages/pandas/core/dtypes/cast.py in astype_array(values, dtype, copy) 1255 1256 else: -> 1257 values = astype_nansafe(values, dtype, copy=copy) 1258 1259 # in pandas we don't store numpy str dtypes, so convert to object ~/.anaconda3/lib/python3.9/site-packages/pandas/core/dtypes/cast.py in astype_nansafe(arr, dtype, copy, skipna) 1172 # work around NumPy brokenness, #1987 1173 if np.issubdtype(dtype.type, np.integer): -> 1174 return lib.astype_intsafe(arr, dtype) 1175 1176 # if we have a datetime/timedelta array of objects ~/.anaconda3/lib/python3.9/site-packages/pandas/_libs/lib.pyx in pandas._libs.lib.astype_intsafe() ValueError: invalid literal for int() with base 10: '30 (Meteorite)255 (Core)'
there's an observation in 'capture_rate' thar have a text, lets change this observ to 30+255 = 285:
dd.loc[dd['capture_rate'] == '30 (Meteorite)255 (Core)', 'capture_rate'] = '285'
now we can aplly the tranformation to int:
dd["capture_rate"] = dd["capture_rate"].astype(int)
transforming some variables in categorical:
dd["generation"] = dd["generation"] .astype('category')
dd["is_legendary"] = dd["is_legendary"] .astype('category')
dd["type1"] = dd["type1"] .astype('category')
dd["type2"] = dd["type2"] .astype('category')
Create a new variable: normality of Attack among generation subgroups:
(var - var.min) / (var.max - var.min)
dd.groupby('generation')['attack'].min()
dd.groupby('generation')['attack'].max()
generation 1 155 2 185 3 180 4 170 5 150 6 160 7 181 Name: attack, dtype: int64
to this, we need a same length vector containing min & max of the generations:
dd.groupby('generation')['attack'].transform(min)
0 5
1 5
2 5
3 5
4 5
..
796 29
797 29
798 29
799 29
800 29
Name: attack, Length: 801, dtype: int64
dd.groupby('generation')['attack'].transform(max)
0 155
1 155
2 155
3 155
4 155
...
796 181
797 181
798 181
799 181
800 181
Name: attack, Length: 801, dtype: int64
so:
dd['atk_gen_dnorm'] = (dd['attack'] - dd.groupby('generation')['attack'].transform(min)) / (dd.groupby('generation')['attack'].transform(max) - dd.groupby('generation')['attack'].transform(min))
dd['atk_gen_dnorm'].describe()
count 801.000000 mean 0.415197 std 0.216498 min 0.000000 25% 0.257143 50% 0.393939 75% 0.560000 max 1.000000 Name: atk_gen_dnorm, dtype: float64
Creating new var: mean of the against_x variables:
dd["Against_M"] = dd.iloc[:, 1:18].mean(axis=1)
dd['Against_M'].describe()
count 801.000000 mean 1.057979 std 0.113671 min 0.720588 25% 0.985294 50% 1.029412 75% 1.117647 max 1.441176 Name: Against_M, dtype: float64
Creating new var from type1:
dd['group1'] = dd['type1'].astype('str')
dd.group1[(dd['type1']=='water') | (dd['type1']=='ice')] = 'aqua'
dd.group1[(dd['type1']=='normal') | (dd['type1']=='fighting')] = 'body'
dd.group1[(dd['type1']=='fire') | (dd['type1']=='dragon') |
(dd['type1']=='electric')] = 'energy'
dd.group1[(dd['type1']=='rock') | (dd['type1']=='steel') |
(dd['type1']=='ground') | (dd['type1']=='grass') |
(dd['type1']=='bug')] = 'earth'
dd.group1[(dd['type1']=='dark') | (dd['type1']=='ghost') |
(dd['type1']=='psychic') | (dd['type1']=='poison')] = 'dark'
dd.group1[(dd['type1']=='fairy') | (dd['type1']=='flying')] = 'air'
dd['group1'] = dd['group1'].astype('category')
/tmp/ipykernel_23112/2645664726.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy dd.group1[(dd['type1']=='water') | (dd['type1']=='ice')] = 'aqua' /tmp/ipykernel_23112/2645664726.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy dd.group1[(dd['type1']=='normal') | (dd['type1']=='fighting')] = 'body' /tmp/ipykernel_23112/2645664726.py:3: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy dd.group1[(dd['type1']=='fire') | (dd['type1']=='dragon') | /tmp/ipykernel_23112/2645664726.py:5: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy dd.group1[(dd['type1']=='rock') | (dd['type1']=='steel') | /tmp/ipykernel_23112/2645664726.py:8: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy dd.group1[(dd['type1']=='dark') | (dd['type1']=='ghost') | /tmp/ipykernel_23112/2645664726.py:10: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy dd.group1[(dd['type1']=='fairy') | (dd['type1']=='flying')] = 'air'
dd['group1'].value_counts()
earth 251 dark 141 aqua 137 body 133 energy 118 air 21 Name: group1, dtype: int64
Deleting Variables:
dd = dd[dd.columns.drop(list(dd.filter(regex='against')))] # deleting with condition
dd = dd.drop(columns=["abilities"])
dd = dd.drop(columns=["classfication"])
dd = dd.drop(columns=["base_egg_steps"])
dd = dd.drop(columns=["base_happiness"])
dd = dd.drop(columns=["base_total"])
dd = dd.drop(columns=["height_m"])
dd = dd.drop(columns=["percentage_male"])
dd = dd.drop(columns=["pokedex_number"])
dd = dd.drop(columns=["sp_attack"])
dd = dd.drop(columns=["sp_defense"])
dd = dd.drop(columns=["japanese_name"])
print(dd.iloc[5:10]) # some specific lines
attack capture_rate defense experience_growth hp name speed \ 5 104 45 78 1059860 78 Charizard 100 6 48 45 65 1059860 44 Squirtle 43 7 63 45 80 1059860 59 Wartortle 58 8 103 45 120 1059860 79 Blastoise 78 9 30 255 35 1000000 45 Caterpie 45 type1 type2 weight_kg generation is_legendary atk_gen_dnorm Against_M \ 5 fire flying 90.5 1 0 0.660000 0.970588 6 water NaN 9.0 1 0 0.286667 1.029412 7 water NaN 22.5 1 0 0.386667 1.029412 8 water NaN 85.5 1 0 0.653333 1.029412 9 bug NaN 2.9 1 0 0.166667 1.088235 group1 5 energy 6 aqua 7 aqua 8 aqua 9 earth
print(dd.loc[dd["type1"] == "fire"]) # some spec character
attack capture_rate defense experience_growth hp name \
3 52 45 43 1059860 39 Charmander
4 64 45 58 1059860 58 Charmeleon
5 104 45 78 1059860 78 Charizard
36 41 190 40 1000000 38 Vulpix
37 67 75 75 1000000 73 Ninetales
57 70 190 45 1250000 55 Growlithe
58 110 75 80 1250000 90 Arcanine
76 85 190 55 1000000 50 Ponyta
77 100 60 70 1000000 65 Rapidash
125 95 45 57 1000000 65 Magmar
135 130 45 60 1000000 65 Flareon
145 100 3 90 1250000 90 Moltres
154 52 45 43 1059860 39 Cyndaquil
155 64 45 58 1059860 58 Quilava
156 84 45 78 1059860 78 Typhlosion
217 40 190 40 1000000 40 Slugma
218 50 75 120 1000000 60 Magcargo
239 75 45 37 1000000 45 Magby
243 115 3 85 1250000 115 Entei
249 130 3 90 1250000 106 Ho-Oh
254 60 45 40 1059860 45 Torchic
255 85 45 60 1059860 60 Combusken
256 160 45 80 1059860 80 Blaziken
321 60 255 40 1000000 60 Numel
322 120 150 100 1000000 70 Camerupt
323 85 90 140 1000000 70 Torkoal
389 58 45 44 1059860 44 Chimchar
390 78 45 52 1059860 64 Monferno
391 104 45 71 1059860 76 Infernape
466 95 30 67 1000000 75 Magmortar
484 90 3 106 1250000 91 Heatran
497 63 45 45 1059860 65 Tepig
498 93 45 55 1059860 90 Pignite
499 123 45 65 1059860 110 Emboar
512 53 190 48 1000000 50 Pansear
513 98 75 63 1000000 75 Simisear
553 90 120 45 1059860 70 Darumaka
554 30 60 105 1059860 105 Darmanitan
630 97 90 66 1000000 85 Heatmor
652 45 45 40 1059860 40 Fennekin
653 59 45 58 1059860 59 Braixen
654 69 45 72 1059860 75 Delphox
661 73 120 55 1059860 62 Fletchinder
662 81 45 71 1059860 78 Talonflame
666 50 220 58 1059860 62 Litleo
667 68 65 72 1059860 86 Pyroar
720 110 3 120 1250000 80 Volcanion
724 65 45 40 1059860 45 Litten
725 85 45 50 1059860 65 Torracat
726 115 45 90 1059860 95 Incineroar
740 70 45 70 1000000 75 Oricorio
775 78 70 135 1000000 60 Turtonator
speed type1 type2 weight_kg generation is_legendary atk_gen_dnorm \
3 65 fire NaN 8.5 1 0 0.313333
4 80 fire NaN 19.0 1 0 0.393333
5 100 fire flying 90.5 1 0 0.660000
36 65 fire ice NaN 1 0 0.240000
37 109 fire ice NaN 1 0 0.413333
57 60 fire NaN 19.0 1 0 0.433333
58 95 fire NaN 155.0 1 0 0.700000
76 90 fire NaN 30.0 1 0 0.533333
77 105 fire NaN 95.0 1 0 0.633333
125 93 fire NaN 44.5 1 0 0.600000
135 65 fire NaN 25.0 1 0 0.833333
145 90 fire flying 60.0 1 1 0.633333
154 65 fire NaN 7.9 2 0 0.240000
155 80 fire NaN 19.0 2 0 0.308571
156 100 fire NaN 79.5 2 0 0.422857
217 20 fire NaN 35.0 2 0 0.171429
218 30 fire rock 55.0 2 0 0.228571
239 83 fire NaN 21.4 2 0 0.371429
243 100 fire NaN 198.0 2 1 0.600000
249 90 fire flying 199.0 2 1 0.685714
254 45 fire NaN 2.5 3 0 0.272727
255 55 fire fighting 19.5 3 0 0.424242
256 100 fire fighting 52.0 3 0 0.878788
321 35 fire ground 24.0 3 0 0.272727
322 20 fire ground 220.0 3 0 0.636364
323 20 fire NaN 80.4 3 0 0.424242
389 61 fire NaN 6.2 4 0 0.321212
390 81 fire fighting 22.0 4 0 0.442424
391 108 fire fighting 55.0 4 0 0.600000
466 83 fire NaN 68.0 4 0 0.545455
484 77 fire steel 430.0 4 1 0.515152
497 45 fire NaN 9.9 5 0 0.304000
498 55 fire fighting 55.5 5 0 0.544000
499 65 fire fighting 150.0 5 0 0.784000
512 64 fire NaN 11.0 5 0 0.224000
513 101 fire NaN 28.0 5 0 0.584000
553 50 fire NaN 37.5 5 0 0.520000
554 55 fire fire 92.9 5 0 0.040000
630 65 fire NaN 58.0 5 0 0.576000
652 60 fire NaN 9.4 6 0 0.166667
653 73 fire NaN 14.5 6 0 0.268116
654 104 fire psychic 39.0 6 0 0.340580
661 84 fire flying 16.0 6 0 0.369565
662 126 fire flying 24.5 6 0 0.427536
666 72 fire normal 13.5 6 0 0.202899
667 106 fire normal 81.5 6 0 0.333333
720 70 fire water 195.0 6 1 0.637681
724 70 fire NaN 4.3 7 0 0.236842
725 90 fire NaN 25.0 7 0 0.368421
726 60 fire dark 83.0 7 0 0.565789
740 93 fire flying 3.4 7 0 0.269737
775 36 fire dragon 212.0 7 0 0.322368
Against_M group1
3 0.941176 energy
4 0.941176 energy
5 0.970588 energy
36 0.941176 energy
37 0.941176 energy
57 0.941176 energy
58 0.941176 energy
76 0.941176 energy
77 0.941176 energy
125 0.941176 energy
135 0.941176 energy
145 0.970588 energy
154 0.941176 energy
155 0.941176 energy
156 0.941176 energy
217 0.941176 energy
218 1.073529 energy
239 0.941176 energy
243 0.941176 energy
249 0.970588 energy
254 0.941176 energy
255 0.985294 energy
256 0.985294 energy
321 0.852941 energy
322 0.852941 energy
323 0.941176 energy
389 0.941176 energy
390 0.985294 energy
391 0.985294 energy
466 0.941176 energy
484 0.838235 energy
497 0.941176 energy
498 0.985294 energy
499 0.985294 energy
512 0.941176 energy
513 0.941176 energy
553 0.941176 energy
554 0.941176 energy
630 0.941176 energy
652 0.941176 energy
653 0.941176 energy
654 1.029412 energy
661 0.970588 energy
662 0.970588 energy
666 0.941176 energy
667 0.941176 energy
720 0.985294 energy
724 0.941176 energy
725 0.941176 energy
726 0.941176 energy
740 0.970588 energy
775 1.000000 energy
dd.loc[(dd["type1"] == "grass") & (dd["type2"] == "poison") & (dd["hp"] > 75)]
| attack | capture_rate | defense | experience_growth | hp | name | speed | type1 | type2 | weight_kg | generation | is_legendary | atk_gen_dnorm | Against_M | group1 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 100 | 45 | 123 | 1059860 | 80 | Venusaur | 80 | grass | poison | 100.0 | 1 | 0 | 0.633333 | 1.102941 | earth |
| 70 | 105 | 45 | 65 | 1059860 | 80 | Victreebel | 70 | grass | poison | 15.5 | 1 | 0 | 0.666667 | 1.102941 | earth |
| 590 | 85 | 75 | 70 | 1000000 | 114 | Amoonguss | 30 | grass | poison | 10.5 | 5 | 0 | 0.480000 | 1.102941 | earth |
Obs: iloc is purely by indexes and loc by names. Now, lets select what contains 'Mega' in name:
dd.loc[dd["name"].str.contains("mega")]
| attack | capture_rate | defense | experience_growth | hp | name | speed | type1 | type2 | weight_kg | generation | is_legendary | atk_gen_dnorm | Against_M | group1 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 468 | 76 | 30 | 86 | 1000000 | 86 | Yanmega | 95 | bug | flying | 51.5 | 4 | 0 | 0.430303 | 1.235294 | earth |
Select type fire or water:
dd.loc[dd["type1"].str.contains("fire|water")]
| attack | capture_rate | defense | experience_growth | hp | name | speed | type1 | type2 | weight_kg | generation | is_legendary | atk_gen_dnorm | Against_M | group1 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 3 | 52 | 45 | 43 | 1059860 | 39 | Charmander | 65 | fire | NaN | 8.5 | 1 | 0 | 0.313333 | 0.941176 | energy |
| 4 | 64 | 45 | 58 | 1059860 | 58 | Charmeleon | 80 | fire | NaN | 19.0 | 1 | 0 | 0.393333 | 0.941176 | energy |
| 5 | 104 | 45 | 78 | 1059860 | 78 | Charizard | 100 | fire | flying | 90.5 | 1 | 0 | 0.660000 | 0.970588 | energy |
| 6 | 48 | 45 | 65 | 1059860 | 44 | Squirtle | 43 | water | NaN | 9.0 | 1 | 0 | 0.286667 | 1.029412 | aqua |
| 7 | 63 | 45 | 80 | 1059860 | 59 | Wartortle | 58 | water | NaN | 22.5 | 1 | 0 | 0.386667 | 1.029412 | aqua |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 751 | 70 | 100 | 92 | 1000000 | 68 | Araquanid | 42 | water | bug | 82.0 | 7 | 0 | 0.269737 | 1.058824 | aqua |
| 770 | 60 | 60 | 130 | 800000 | 55 | Pyukumuku | 5 | water | NaN | 1.2 | 7 | 0 | 0.203947 | 1.029412 | aqua |
| 775 | 78 | 70 | 135 | 1000000 | 60 | Turtonator | 36 | fire | dragon | 212.0 | 7 | 0 | 0.322368 | 1.000000 | energy |
| 778 | 105 | 80 | 70 | 1000000 | 68 | Bruxish | 92 | water | psychic | 19.0 | 7 | 0 | 0.500000 | 1.147059 | aqua |
| 787 | 75 | 3 | 115 | 1250000 | 70 | Tapu Fini | 85 | water | fairy | 21.2 | 7 | 1 | 0.302632 | 0.970588 | aqua |
166 rows × 15 columns
dd.groupby("type1").mean().sort_values("attack")
| attack | capture_rate | defense | experience_growth | hp | speed | weight_kg | atk_gen_dnorm | Against_M | |
|---|---|---|---|---|---|---|---|---|---|
| type1 | |||||||||
| fairy | 62.111111 | 116.944444 | 68.166667 | 9.138889e+05 | 73.944444 | 53.666667 | 23.555556 | 0.305327 | 0.977124 |
| psychic | 65.566038 | 86.660377 | 69.264151 | 1.079405e+06 | 72.943396 | 75.150943 | 57.328846 | 0.323916 | 1.112375 |
| flying | 66.666667 | 79.333333 | 65.000000 | 1.083333e+06 | 68.000000 | 99.666667 | 52.000000 | 0.335266 | 1.117647 |
| bug | 70.125000 | 119.833333 | 70.847222 | 1.009006e+06 | 56.722222 | 63.569444 | 33.083333 | 0.361085 | 1.119077 |
| electric | 70.820513 | 106.974359 | 61.820513 | 1.073312e+06 | 60.512821 | 85.410256 | 37.944737 | 0.378562 | 0.952489 |
| poison | 72.656250 | 128.437500 | 70.031250 | 1.086849e+06 | 65.593750 | 64.187500 | 33.830000 | 0.399824 | 0.978860 |
| ghost | 72.740741 | 93.814815 | 79.518519 | 1.018117e+06 | 63.370370 | 58.333333 | 69.570370 | 0.365167 | 0.971678 |
| ice | 73.304348 | 94.173913 | 71.913043 | 1.086069e+06 | 72.086957 | 62.739130 | 103.260870 | 0.386124 | 1.201407 |
| water | 73.307018 | 99.412281 | 73.482456 | 1.056716e+06 | 70.219298 | 63.921053 | 51.071930 | 0.387995 | 1.043473 |
| grass | 73.769231 | 106.935897 | 70.871795 | 1.079791e+06 | 65.358974 | 59.025641 | 33.255844 | 0.379938 | 1.200792 |
| normal | 75.161905 | 120.219048 | 59.695238 | 1.009973e+06 | 76.723810 | 69.533333 | 46.158416 | 0.399823 | 1.005322 |
| fire | 81.500000 | 72.403846 | 67.788462 | 1.064735e+06 | 68.730769 | 73.346154 | 66.096000 | 0.439117 | 0.950509 |
| dark | 87.793103 | 84.482759 | 70.517241 | 1.102720e+06 | 72.551724 | 75.310345 | 69.096552 | 0.472996 | 1.080122 |
| rock | 90.666667 | 76.533333 | 96.266667 | 9.793116e+05 | 66.333333 | 57.422222 | 92.946341 | 0.505399 | 1.146078 |
| steel | 93.083333 | 58.916667 | 120.208333 | 1.126232e+06 | 66.791667 | 56.583333 | 188.841667 | 0.493987 | 0.841912 |
| ground | 94.812500 | 108.312500 | 83.906250 | 1.069652e+06 | 73.187500 | 59.968750 | 150.044444 | 0.540137 | 1.028493 |
| fighting | 99.178571 | 103.750000 | 66.392857 | 1.076021e+06 | 71.428571 | 64.285714 | 58.675000 | 0.571414 | 1.090336 |
| dragon | 106.407407 | 37.333333 | 86.259259 | 1.216667e+06 | 79.851852 | 76.111111 | 107.125926 | 0.604641 | 1.129085 |
sea.set()
g1 = sea.histplot(dd, x="attack")
g11 = sea.histplot(dd, x="attack", kde=True)
g2 = sea.relplot(x= 'attack', y='defense', data=dd)
g3 = sea.relplot(data=dd,
x= 'attack',
y='defense',
hue='group1')
g4 = sea.jointplot(data=dd,
x= 'attack',
y='defense',
hue='group1')
g41 = sea.jointplot(data=dd,
x= 'attack',
y='defense',
hue='is_legendary',
kind= 'kde')
g42 = sea.jointplot(data=dd,
x= 'attack',
y='defense',
#hue='is_legendary',
kind= 'reg')
g43 = sea.jointplot(data=dd,
x= 'attack',
y='defense',
#hue='is_legendary',
kind= 'hex')
g5 = sea.jointplot(data=dd,
y='hp',
x='capture_rate')
g5.plot_joint(sea.kdeplot,
color='r',
zorder=0,
levels=6)
g5.plot_marginals(sea.rugplot,
color="r",
height=-.15,
clip_on=False)
<seaborn.axisgrid.JointGrid at 0x7fdb1c5b16a0>
g6 = sea.pairplot(dd[['attack', 'defense', 'hp', 'speed', 'group1']],
hue='group1', height=2.5)
sea.set_theme(style="ticks")
# Initialize the figure with a logarithmic x axis
f, ax = plt.subplots(figsize=(7, 6))
# Plot the orbital period with horizontal boxes
sea.boxplot(data=dd,
y="group1",
x="hp",
whis=[0, 100],
width=.6,
palette="Set3", showmeans=True) # the means are hidden by scarttes
# Add in points to show each observation
sea.stripplot(data=dd,
y="group1",
x="hp",
size=4,
color=".3",
linewidth=0,
alpha=.45)
# Tweak the visual presentation
ax.xaxis.grid(True)
ax.set(ylabel="Primary Type Group")
sea.despine(trim=True, left=True)
https://towardsdatascience.com/violin-strip-swarm-and-raincloud-plots-in-python-as-better-sometimes-alternatives-to-a-boxplot-15019bdff8f8
plt.figure(figsize=(15, 10))
# Create violin plots without mini-boxplots inside.
ax = sea.violinplot(data=dd,
x='speed',
y='group1',
color='paleturquoise',
cut=0,
inner=None)
# Clip the lower half of each violin.
for item in ax.collections:
x0, y0, width, height = item.get_paths()[0].get_extents().bounds
item.set_clip_path(plt.Rectangle((x0, y0), width, height/2,
transform=ax.transData))
# Create [swarm vs strip] plots with partially transparent points of different colors depending if is legendary.
num_items = len(ax.collections)
sea.stripplot(data=dd,
x='speed',
y='group1',
hue='is_legendary',
palette=['deepskyblue', 'navy'],
alpha=0.6,
size=7)
# Shift each strip plot strictly below the correponding volin.
for item in ax.collections[num_items:]:
item.set_offsets(item.get_offsets() + 0.15)
# Create narrow boxplots on top of the corresponding violin and strip plots, with thick lines, the mean values, without the outliers.
sea.boxplot(data=dd,
x='speed',
y='group1',
width=0.25,
showfliers=False,
showmeans=True,
meanprops=dict(marker='o', markerfacecolor='gold',
markersize=10, zorder=3),
boxprops=dict(facecolor=(0,0,0,0),
linewidth=3, zorder=3),
whiskerprops=dict(linewidth=3),
capprops=dict(linewidth=3),
medianprops=dict(linewidth=3))
plt.legend(frameon=False, fontsize=15, loc='upper left')
<matplotlib.legend.Legend at 0x7fdb15f20220>