import os # interact with system
import numpy as np # manipulation
import pandas as pd # manipulation
import statsmodels as stm # statistics
import seaborn as sea # visualization
#import matplotlib as mpl # "
import matplotlib.pyplot as plt # "
# change working directory
os.chdir("/home/heitor/ProjetosPy/ML_PyPratice/Basic")
# import the csv file as "dd"
dd = pd.read_csv("pokemon.csv")
dd # general
name | type1 | hp | attack | defense | |
---|---|---|---|---|---|
793 | Buzzwole | bug | 107 | 139 | 139 |
468 | Yanmega | bug | 86 | 76 | 86 |
636 | Volcarona | bug | 85 | 60 | 65 |
213 | Heracross | bug | 80 | 185 | 115 |
616 | Accelgor | bug | 80 | 70 | 40 |
... | ... | ... | ... | ... | ... |
97 | Krabby | water | 30 | 105 | 90 |
115 | Horsea | water | 30 | 40 | 70 |
119 | Staryu | water | 30 | 45 | 55 |
128 | Magikarp | water | 20 | 10 | 55 |
348 | Feebas | water | 20 | 15 | 20 |
801 rows × 5 columns
dd.head(5) # first 5 lines
abilities | against_bug | against_dark | against_dragon | against_electric | against_fairy | against_fight | against_fire | against_flying | against_ghost | ... | percentage_male | pokedex_number | sp_attack | sp_defense | speed | type1 | type2 | weight_kg | generation | is_legendary | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | ['Overgrow', 'Chlorophyll'] | 1.0 | 1.0 | 1.0 | 0.5 | 0.5 | 0.5 | 2.0 | 2.0 | 1.0 | ... | 88.1 | 1 | 65 | 65 | 45 | grass | poison | 6.9 | 1 | 0 |
1 | ['Overgrow', 'Chlorophyll'] | 1.0 | 1.0 | 1.0 | 0.5 | 0.5 | 0.5 | 2.0 | 2.0 | 1.0 | ... | 88.1 | 2 | 80 | 80 | 60 | grass | poison | 13.0 | 1 | 0 |
2 | ['Overgrow', 'Chlorophyll'] | 1.0 | 1.0 | 1.0 | 0.5 | 0.5 | 0.5 | 2.0 | 2.0 | 1.0 | ... | 88.1 | 3 | 122 | 120 | 80 | grass | poison | 100.0 | 1 | 0 |
3 | ['Blaze', 'Solar Power'] | 0.5 | 1.0 | 1.0 | 1.0 | 0.5 | 1.0 | 0.5 | 1.0 | 1.0 | ... | 88.1 | 4 | 60 | 50 | 65 | fire | NaN | 8.5 | 1 | 0 |
4 | ['Blaze', 'Solar Power'] | 0.5 | 1.0 | 1.0 | 1.0 | 0.5 | 1.0 | 0.5 | 1.0 | 1.0 | ... | 88.1 | 5 | 80 | 65 | 80 | fire | NaN | 19.0 | 1 | 0 |
5 rows × 41 columns
dd["abilities"][0:7] # spec variable and lines
0 ['Overgrow', 'Chlorophyll'] 1 ['Overgrow', 'Chlorophyll'] 2 ['Overgrow', 'Chlorophyll'] 3 ['Blaze', 'Solar Power'] 4 ['Blaze', 'Solar Power'] 5 ['Blaze', 'Solar Power'] 6 ['Torrent', 'Rain Dish'] Name: abilities, dtype: object
dd[["speed", "name", "type1"]] # list spec variables
speed | name | type1 | |
---|---|---|---|
0 | 45 | Bulbasaur | grass |
1 | 60 | Ivysaur | grass |
2 | 80 | Venusaur | grass |
3 | 65 | Charmander | fire |
4 | 80 | Charmeleon | fire |
... | ... | ... | ... |
796 | 61 | Celesteela | steel |
797 | 109 | Kartana | grass |
798 | 43 | Guzzlord | dark |
799 | 79 | Necrozma | psychic |
800 | 65 | Magearna | steel |
801 rows × 3 columns
dd.columns # see col titles
Index(['abilities', 'against_bug', 'against_dark', 'against_dragon', 'against_electric', 'against_fairy', 'against_fight', 'against_fire', 'against_flying', 'against_ghost', 'against_grass', 'against_ground', 'against_ice', 'against_normal', 'against_poison', 'against_psychic', 'against_rock', 'against_steel', 'against_water', 'attack', 'base_egg_steps', 'base_happiness', 'base_total', 'capture_rate', 'classfication', 'defense', 'experience_growth', 'height_m', 'hp', 'japanese_name', 'name', 'percentage_male', 'pokedex_number', 'sp_attack', 'sp_defense', 'speed', 'type1', 'type2', 'weight_kg', 'generation', 'is_legendary'], dtype='object')
dd.dtypes # see col variables types
abilities object against_bug float64 against_dark float64 against_dragon float64 against_electric float64 against_fairy float64 against_fight float64 against_fire float64 against_flying float64 against_ghost float64 against_grass float64 against_ground float64 against_ice float64 against_normal float64 against_poison float64 against_psychic float64 against_rock float64 against_steel float64 against_water float64 attack int64 base_egg_steps int64 base_happiness int64 base_total int64 capture_rate object classfication object defense int64 experience_growth int64 height_m float64 hp int64 japanese_name object name object percentage_male float64 pokedex_number int64 sp_attack int64 sp_defense int64 speed int64 type1 object type2 object weight_kg float64 generation int64 is_legendary int64 dtype: object
dd.describe()
against_bug | against_dark | against_dragon | against_electric | against_fairy | against_fight | against_fire | against_flying | against_ghost | against_grass | ... | height_m | hp | percentage_male | pokedex_number | sp_attack | sp_defense | speed | weight_kg | generation | is_legendary | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 801.000000 | 801.000000 | 801.000000 | 801.000000 | 801.000000 | 801.000000 | 801.000000 | 801.000000 | 801.000000 | 801.000000 | ... | 781.000000 | 801.000000 | 703.000000 | 801.000000 | 801.000000 | 801.000000 | 801.000000 | 781.000000 | 801.000000 | 801.000000 |
mean | 0.996255 | 1.057116 | 0.968789 | 1.073970 | 1.068976 | 1.065543 | 1.135456 | 1.192884 | 0.985019 | 1.034020 | ... | 1.163892 | 68.958801 | 55.155761 | 401.000000 | 71.305868 | 70.911361 | 66.334582 | 61.378105 | 3.690387 | 0.087391 |
std | 0.597248 | 0.438142 | 0.353058 | 0.654962 | 0.522167 | 0.717251 | 0.691853 | 0.604488 | 0.558256 | 0.788896 | ... | 1.080326 | 26.576015 | 20.261623 | 231.373075 | 32.353826 | 27.942501 | 28.907662 | 109.354766 | 1.930420 | 0.282583 |
min | 0.250000 | 0.250000 | 0.000000 | 0.000000 | 0.250000 | 0.000000 | 0.250000 | 0.250000 | 0.000000 | 0.250000 | ... | 0.100000 | 1.000000 | 0.000000 | 1.000000 | 10.000000 | 20.000000 | 5.000000 | 0.100000 | 1.000000 | 0.000000 |
25% | 0.500000 | 1.000000 | 1.000000 | 0.500000 | 1.000000 | 0.500000 | 0.500000 | 1.000000 | 1.000000 | 0.500000 | ... | 0.600000 | 50.000000 | 50.000000 | 201.000000 | 45.000000 | 50.000000 | 45.000000 | 9.000000 | 2.000000 | 0.000000 |
50% | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 1.000000 | 65.000000 | 50.000000 | 401.000000 | 65.000000 | 66.000000 | 65.000000 | 27.300000 | 4.000000 | 0.000000 |
75% | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 2.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 1.500000 | 80.000000 | 50.000000 | 601.000000 | 91.000000 | 90.000000 | 85.000000 | 64.800000 | 5.000000 | 0.000000 |
max | 4.000000 | 4.000000 | 2.000000 | 4.000000 | 4.000000 | 4.000000 | 4.000000 | 4.000000 | 4.000000 | 4.000000 | ... | 14.500000 | 255.000000 | 100.000000 | 801.000000 | 194.000000 | 230.000000 | 180.000000 | 999.900000 | 7.000000 | 1.000000 |
8 rows × 34 columns
dd["speed"].describe()
count 801.000000 mean 66.334582 std 28.907662 min 5.000000 25% 45.000000 50% 65.000000 75% 85.000000 max 180.000000 Name: speed, dtype: float64
Lets see the strings variables:
dd["abilities"].describe() # freq: most common value’s frequency
name | type1 | hp | attack | defense | |
---|---|---|---|---|---|
793 | Buzzwole | bug | 107 | 139 | 139 |
468 | Yanmega | bug | 86 | 76 | 86 |
636 | Volcarona | bug | 85 | 60 | 65 |
213 | Heracross | bug | 80 | 185 | 115 |
616 | Accelgor | bug | 80 | 70 | 40 |
... | ... | ... | ... | ... | ... |
97 | Krabby | water | 30 | 105 | 90 |
115 | Horsea | water | 30 | 40 | 70 |
119 | Staryu | water | 30 | 45 | 55 |
128 | Magikarp | water | 20 | 10 | 55 |
348 | Feebas | water | 20 | 15 | 20 |
801 rows × 5 columns
dd["classfication"].describe()
count 801 unique 588 top Dragon Pokémon freq 8 Name: classfication, dtype: object
dd["capture_rate"].describe() # 'capture_rate' should be an int!!
count 801 unique 34 top 45 freq 250 Name: capture_rate, dtype: object
dd['is_legendary'] .value_counts()
0 731 1 70 Name: is_legendary, dtype: int64
dd["type1"].value_counts()
water 114 normal 105 grass 78 bug 72 psychic 53 fire 52 rock 45 electric 39 poison 32 ground 32 dark 29 fighting 28 ghost 27 dragon 27 steel 24 ice 23 fairy 18 flying 3 Name: type1, dtype: int64
dd["type2"].value_counts()
flying 95 poison 34 ground 34 fairy 29 psychic 29 fighting 25 steel 22 dark 21 grass 20 water 17 dragon 17 ice 15 rock 14 ghost 14 fire 13 electric 9 bug 5 normal 4 Name: type2, dtype: int64
Sorting:
dd.sort_values(by=["type1", "hp"],
ascending=[1, 0]) # ascending by type1 but not by hp
abilities | against_bug | against_dark | against_dragon | against_electric | against_fairy | against_fight | against_fire | against_flying | against_ghost | ... | percentage_male | pokedex_number | sp_attack | sp_defense | speed | type1 | type2 | weight_kg | generation | is_legendary | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
793 | ['Beast Boost'] | 0.5 | 0.5 | 1.0 | 1.0 | 2.0 | 0.50 | 2.0 | 4.0 | 1.0 | ... | NaN | 794 | 53 | 53 | 79 | bug | fighting | 333.6 | 7 | 1 |
468 | ['Speed Boost', 'Tinted Lens', 'Frisk'] | 0.5 | 1.0 | 1.0 | 2.0 | 1.0 | 0.25 | 2.0 | 2.0 | 1.0 | ... | 50.0 | 469 | 116 | 56 | 95 | bug | flying | 51.5 | 4 | 0 |
636 | ['Flame Body', 'Swarm'] | 0.5 | 1.0 | 1.0 | 1.0 | 0.5 | 0.50 | 1.0 | 2.0 | 1.0 | ... | 50.0 | 637 | 135 | 105 | 100 | bug | fire | 46.0 | 5 | 0 |
213 | ['Swarm', 'Guts', 'Moxie'] | 0.5 | 0.5 | 1.0 | 1.0 | 2.0 | 0.50 | 2.0 | 4.0 | 1.0 | ... | 50.0 | 214 | 40 | 105 | 75 | bug | fighting | 54.0 | 2 | 0 |
616 | ['Hydration', 'Sticky Hold', 'Unburden'] | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.50 | 2.0 | 2.0 | 1.0 | ... | 50.0 | 617 | 100 | 60 | 145 | bug | NaN | 25.3 | 5 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
97 | ['Hyper Cutter', 'Shell Armor', 'Sheer Force'] | 1.0 | 1.0 | 1.0 | 2.0 | 1.0 | 1.00 | 0.5 | 1.0 | 1.0 | ... | 50.0 | 98 | 25 | 25 | 50 | water | NaN | 6.5 | 1 | 0 |
115 | ['Swift Swim', 'Sniper', 'Damp'] | 1.0 | 1.0 | 1.0 | 2.0 | 1.0 | 1.00 | 0.5 | 1.0 | 1.0 | ... | 50.0 | 116 | 70 | 25 | 60 | water | NaN | 8.0 | 1 | 0 |
119 | ['Illuminate', 'Natural Cure', 'Analytic'] | 1.0 | 1.0 | 1.0 | 2.0 | 1.0 | 1.00 | 0.5 | 1.0 | 1.0 | ... | NaN | 120 | 70 | 55 | 85 | water | NaN | 34.5 | 1 | 0 |
128 | ['Swift Swim', 'Rattled'] | 1.0 | 1.0 | 1.0 | 2.0 | 1.0 | 1.00 | 0.5 | 1.0 | 1.0 | ... | 50.0 | 129 | 15 | 20 | 80 | water | NaN | 10.0 | 1 | 0 |
348 | ['Swift Swim', 'Oblivious', 'Adaptability'] | 1.0 | 1.0 | 1.0 | 2.0 | 1.0 | 1.00 | 0.5 | 1.0 | 1.0 | ... | 50.0 | 349 | 10 | 55 | 80 | water | NaN | 7.4 | 3 | 0 |
801 rows × 41 columns
dd[["name", "type1", "hp", "attack", "defense"]].sort_values(
by=["type1", "hp"], ascending=[1, 0])
name | type1 | hp | attack | defense | |
---|---|---|---|---|---|
793 | Buzzwole | bug | 107 | 139 | 139 |
468 | Yanmega | bug | 86 | 76 | 86 |
636 | Volcarona | bug | 85 | 60 | 65 |
213 | Heracross | bug | 80 | 185 | 115 |
616 | Accelgor | bug | 80 | 70 | 40 |
... | ... | ... | ... | ... | ... |
97 | Krabby | water | 30 | 105 | 90 |
115 | Horsea | water | 30 | 40 | 70 |
119 | Staryu | water | 30 | 45 | 55 |
128 | Magikarp | water | 20 | 10 | 55 |
348 | Feebas | water | 20 | 15 | 20 |
801 rows × 5 columns
modify the type of 'capture_rate':
dd["capture_rate"] .astype(int)
dd.loc[dd['capture_rate'] == '30 (Meteorite)255 (Core)']
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) /tmp/ipykernel_23112/14930470.py in <module> ----> 1 dd["capture_rate"] .astype(int) 2 dd.loc[dd['capture_rate'] == '30 (Meteorite)255 (Core)'] ~/.anaconda3/lib/python3.9/site-packages/pandas/core/generic.py in astype(self, dtype, copy, errors) 5813 else: 5814 # else, only a single dtype is given -> 5815 new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors) 5816 return self._constructor(new_data).__finalize__(self, method="astype") 5817 ~/.anaconda3/lib/python3.9/site-packages/pandas/core/internals/managers.py in astype(self, dtype, copy, errors) 416 417 def astype(self: T, dtype, copy: bool = False, errors: str = "raise") -> T: --> 418 return self.apply("astype", dtype=dtype, copy=copy, errors=errors) 419 420 def convert( ~/.anaconda3/lib/python3.9/site-packages/pandas/core/internals/managers.py in apply(self, f, align_keys, ignore_failures, **kwargs) 325 applied = b.apply(f, **kwargs) 326 else: --> 327 applied = getattr(b, f)(**kwargs) 328 except (TypeError, NotImplementedError): 329 if not ignore_failures: ~/.anaconda3/lib/python3.9/site-packages/pandas/core/internals/blocks.py in astype(self, dtype, copy, errors) 589 values = self.values 590 --> 591 new_values = astype_array_safe(values, dtype, copy=copy, errors=errors) 592 593 new_values = maybe_coerce_values(new_values) ~/.anaconda3/lib/python3.9/site-packages/pandas/core/dtypes/cast.py in astype_array_safe(values, dtype, copy, errors) 1307 1308 try: -> 1309 new_values = astype_array(values, dtype, copy=copy) 1310 except (ValueError, TypeError): 1311 # e.g. astype_nansafe can fail on object-dtype of strings ~/.anaconda3/lib/python3.9/site-packages/pandas/core/dtypes/cast.py in astype_array(values, dtype, copy) 1255 1256 else: -> 1257 values = astype_nansafe(values, dtype, copy=copy) 1258 1259 # in pandas we don't store numpy str dtypes, so convert to object ~/.anaconda3/lib/python3.9/site-packages/pandas/core/dtypes/cast.py in astype_nansafe(arr, dtype, copy, skipna) 1172 # work around NumPy brokenness, #1987 1173 if np.issubdtype(dtype.type, np.integer): -> 1174 return lib.astype_intsafe(arr, dtype) 1175 1176 # if we have a datetime/timedelta array of objects ~/.anaconda3/lib/python3.9/site-packages/pandas/_libs/lib.pyx in pandas._libs.lib.astype_intsafe() ValueError: invalid literal for int() with base 10: '30 (Meteorite)255 (Core)'
there's an observation in 'capture_rate' thar have a text, lets change this observ to 30+255 = 285:
dd.loc[dd['capture_rate'] == '30 (Meteorite)255 (Core)', 'capture_rate'] = '285'
now we can aplly the tranformation to int:
dd["capture_rate"] = dd["capture_rate"].astype(int)
transforming some variables in categorical:
dd["generation"] = dd["generation"] .astype('category')
dd["is_legendary"] = dd["is_legendary"] .astype('category')
dd["type1"] = dd["type1"] .astype('category')
dd["type2"] = dd["type2"] .astype('category')
Create a new variable: normality of Attack among generation subgroups:
(var - var.min) / (var.max - var.min)
dd.groupby('generation')['attack'].min()
dd.groupby('generation')['attack'].max()
generation 1 155 2 185 3 180 4 170 5 150 6 160 7 181 Name: attack, dtype: int64
to this, we need a same length vector containing min & max of the generations:
dd.groupby('generation')['attack'].transform(min)
0 5 1 5 2 5 3 5 4 5 .. 796 29 797 29 798 29 799 29 800 29 Name: attack, Length: 801, dtype: int64
dd.groupby('generation')['attack'].transform(max)
0 155 1 155 2 155 3 155 4 155 ... 796 181 797 181 798 181 799 181 800 181 Name: attack, Length: 801, dtype: int64
so:
dd['atk_gen_dnorm'] = (dd['attack'] - dd.groupby('generation')['attack'].transform(min)) / (dd.groupby('generation')['attack'].transform(max) - dd.groupby('generation')['attack'].transform(min))
dd['atk_gen_dnorm'].describe()
count 801.000000 mean 0.415197 std 0.216498 min 0.000000 25% 0.257143 50% 0.393939 75% 0.560000 max 1.000000 Name: atk_gen_dnorm, dtype: float64
Creating new var: mean of the against_x variables:
dd["Against_M"] = dd.iloc[:, 1:18].mean(axis=1)
dd['Against_M'].describe()
count 801.000000 mean 1.057979 std 0.113671 min 0.720588 25% 0.985294 50% 1.029412 75% 1.117647 max 1.441176 Name: Against_M, dtype: float64
Creating new var from type1:
dd['group1'] = dd['type1'].astype('str')
dd.group1[(dd['type1']=='water') | (dd['type1']=='ice')] = 'aqua'
dd.group1[(dd['type1']=='normal') | (dd['type1']=='fighting')] = 'body'
dd.group1[(dd['type1']=='fire') | (dd['type1']=='dragon') |
(dd['type1']=='electric')] = 'energy'
dd.group1[(dd['type1']=='rock') | (dd['type1']=='steel') |
(dd['type1']=='ground') | (dd['type1']=='grass') |
(dd['type1']=='bug')] = 'earth'
dd.group1[(dd['type1']=='dark') | (dd['type1']=='ghost') |
(dd['type1']=='psychic') | (dd['type1']=='poison')] = 'dark'
dd.group1[(dd['type1']=='fairy') | (dd['type1']=='flying')] = 'air'
dd['group1'] = dd['group1'].astype('category')
/tmp/ipykernel_23112/2645664726.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy dd.group1[(dd['type1']=='water') | (dd['type1']=='ice')] = 'aqua' /tmp/ipykernel_23112/2645664726.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy dd.group1[(dd['type1']=='normal') | (dd['type1']=='fighting')] = 'body' /tmp/ipykernel_23112/2645664726.py:3: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy dd.group1[(dd['type1']=='fire') | (dd['type1']=='dragon') | /tmp/ipykernel_23112/2645664726.py:5: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy dd.group1[(dd['type1']=='rock') | (dd['type1']=='steel') | /tmp/ipykernel_23112/2645664726.py:8: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy dd.group1[(dd['type1']=='dark') | (dd['type1']=='ghost') | /tmp/ipykernel_23112/2645664726.py:10: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy dd.group1[(dd['type1']=='fairy') | (dd['type1']=='flying')] = 'air'
dd['group1'].value_counts()
earth 251 dark 141 aqua 137 body 133 energy 118 air 21 Name: group1, dtype: int64
Deleting Variables:
dd = dd[dd.columns.drop(list(dd.filter(regex='against')))] # deleting with condition
dd = dd.drop(columns=["abilities"])
dd = dd.drop(columns=["classfication"])
dd = dd.drop(columns=["base_egg_steps"])
dd = dd.drop(columns=["base_happiness"])
dd = dd.drop(columns=["base_total"])
dd = dd.drop(columns=["height_m"])
dd = dd.drop(columns=["percentage_male"])
dd = dd.drop(columns=["pokedex_number"])
dd = dd.drop(columns=["sp_attack"])
dd = dd.drop(columns=["sp_defense"])
dd = dd.drop(columns=["japanese_name"])
print(dd.iloc[5:10]) # some specific lines
attack capture_rate defense experience_growth hp name speed \ 5 104 45 78 1059860 78 Charizard 100 6 48 45 65 1059860 44 Squirtle 43 7 63 45 80 1059860 59 Wartortle 58 8 103 45 120 1059860 79 Blastoise 78 9 30 255 35 1000000 45 Caterpie 45 type1 type2 weight_kg generation is_legendary atk_gen_dnorm Against_M \ 5 fire flying 90.5 1 0 0.660000 0.970588 6 water NaN 9.0 1 0 0.286667 1.029412 7 water NaN 22.5 1 0 0.386667 1.029412 8 water NaN 85.5 1 0 0.653333 1.029412 9 bug NaN 2.9 1 0 0.166667 1.088235 group1 5 energy 6 aqua 7 aqua 8 aqua 9 earth
print(dd.loc[dd["type1"] == "fire"]) # some spec character
attack capture_rate defense experience_growth hp name \ 3 52 45 43 1059860 39 Charmander 4 64 45 58 1059860 58 Charmeleon 5 104 45 78 1059860 78 Charizard 36 41 190 40 1000000 38 Vulpix 37 67 75 75 1000000 73 Ninetales 57 70 190 45 1250000 55 Growlithe 58 110 75 80 1250000 90 Arcanine 76 85 190 55 1000000 50 Ponyta 77 100 60 70 1000000 65 Rapidash 125 95 45 57 1000000 65 Magmar 135 130 45 60 1000000 65 Flareon 145 100 3 90 1250000 90 Moltres 154 52 45 43 1059860 39 Cyndaquil 155 64 45 58 1059860 58 Quilava 156 84 45 78 1059860 78 Typhlosion 217 40 190 40 1000000 40 Slugma 218 50 75 120 1000000 60 Magcargo 239 75 45 37 1000000 45 Magby 243 115 3 85 1250000 115 Entei 249 130 3 90 1250000 106 Ho-Oh 254 60 45 40 1059860 45 Torchic 255 85 45 60 1059860 60 Combusken 256 160 45 80 1059860 80 Blaziken 321 60 255 40 1000000 60 Numel 322 120 150 100 1000000 70 Camerupt 323 85 90 140 1000000 70 Torkoal 389 58 45 44 1059860 44 Chimchar 390 78 45 52 1059860 64 Monferno 391 104 45 71 1059860 76 Infernape 466 95 30 67 1000000 75 Magmortar 484 90 3 106 1250000 91 Heatran 497 63 45 45 1059860 65 Tepig 498 93 45 55 1059860 90 Pignite 499 123 45 65 1059860 110 Emboar 512 53 190 48 1000000 50 Pansear 513 98 75 63 1000000 75 Simisear 553 90 120 45 1059860 70 Darumaka 554 30 60 105 1059860 105 Darmanitan 630 97 90 66 1000000 85 Heatmor 652 45 45 40 1059860 40 Fennekin 653 59 45 58 1059860 59 Braixen 654 69 45 72 1059860 75 Delphox 661 73 120 55 1059860 62 Fletchinder 662 81 45 71 1059860 78 Talonflame 666 50 220 58 1059860 62 Litleo 667 68 65 72 1059860 86 Pyroar 720 110 3 120 1250000 80 Volcanion 724 65 45 40 1059860 45 Litten 725 85 45 50 1059860 65 Torracat 726 115 45 90 1059860 95 Incineroar 740 70 45 70 1000000 75 Oricorio 775 78 70 135 1000000 60 Turtonator speed type1 type2 weight_kg generation is_legendary atk_gen_dnorm \ 3 65 fire NaN 8.5 1 0 0.313333 4 80 fire NaN 19.0 1 0 0.393333 5 100 fire flying 90.5 1 0 0.660000 36 65 fire ice NaN 1 0 0.240000 37 109 fire ice NaN 1 0 0.413333 57 60 fire NaN 19.0 1 0 0.433333 58 95 fire NaN 155.0 1 0 0.700000 76 90 fire NaN 30.0 1 0 0.533333 77 105 fire NaN 95.0 1 0 0.633333 125 93 fire NaN 44.5 1 0 0.600000 135 65 fire NaN 25.0 1 0 0.833333 145 90 fire flying 60.0 1 1 0.633333 154 65 fire NaN 7.9 2 0 0.240000 155 80 fire NaN 19.0 2 0 0.308571 156 100 fire NaN 79.5 2 0 0.422857 217 20 fire NaN 35.0 2 0 0.171429 218 30 fire rock 55.0 2 0 0.228571 239 83 fire NaN 21.4 2 0 0.371429 243 100 fire NaN 198.0 2 1 0.600000 249 90 fire flying 199.0 2 1 0.685714 254 45 fire NaN 2.5 3 0 0.272727 255 55 fire fighting 19.5 3 0 0.424242 256 100 fire fighting 52.0 3 0 0.878788 321 35 fire ground 24.0 3 0 0.272727 322 20 fire ground 220.0 3 0 0.636364 323 20 fire NaN 80.4 3 0 0.424242 389 61 fire NaN 6.2 4 0 0.321212 390 81 fire fighting 22.0 4 0 0.442424 391 108 fire fighting 55.0 4 0 0.600000 466 83 fire NaN 68.0 4 0 0.545455 484 77 fire steel 430.0 4 1 0.515152 497 45 fire NaN 9.9 5 0 0.304000 498 55 fire fighting 55.5 5 0 0.544000 499 65 fire fighting 150.0 5 0 0.784000 512 64 fire NaN 11.0 5 0 0.224000 513 101 fire NaN 28.0 5 0 0.584000 553 50 fire NaN 37.5 5 0 0.520000 554 55 fire fire 92.9 5 0 0.040000 630 65 fire NaN 58.0 5 0 0.576000 652 60 fire NaN 9.4 6 0 0.166667 653 73 fire NaN 14.5 6 0 0.268116 654 104 fire psychic 39.0 6 0 0.340580 661 84 fire flying 16.0 6 0 0.369565 662 126 fire flying 24.5 6 0 0.427536 666 72 fire normal 13.5 6 0 0.202899 667 106 fire normal 81.5 6 0 0.333333 720 70 fire water 195.0 6 1 0.637681 724 70 fire NaN 4.3 7 0 0.236842 725 90 fire NaN 25.0 7 0 0.368421 726 60 fire dark 83.0 7 0 0.565789 740 93 fire flying 3.4 7 0 0.269737 775 36 fire dragon 212.0 7 0 0.322368 Against_M group1 3 0.941176 energy 4 0.941176 energy 5 0.970588 energy 36 0.941176 energy 37 0.941176 energy 57 0.941176 energy 58 0.941176 energy 76 0.941176 energy 77 0.941176 energy 125 0.941176 energy 135 0.941176 energy 145 0.970588 energy 154 0.941176 energy 155 0.941176 energy 156 0.941176 energy 217 0.941176 energy 218 1.073529 energy 239 0.941176 energy 243 0.941176 energy 249 0.970588 energy 254 0.941176 energy 255 0.985294 energy 256 0.985294 energy 321 0.852941 energy 322 0.852941 energy 323 0.941176 energy 389 0.941176 energy 390 0.985294 energy 391 0.985294 energy 466 0.941176 energy 484 0.838235 energy 497 0.941176 energy 498 0.985294 energy 499 0.985294 energy 512 0.941176 energy 513 0.941176 energy 553 0.941176 energy 554 0.941176 energy 630 0.941176 energy 652 0.941176 energy 653 0.941176 energy 654 1.029412 energy 661 0.970588 energy 662 0.970588 energy 666 0.941176 energy 667 0.941176 energy 720 0.985294 energy 724 0.941176 energy 725 0.941176 energy 726 0.941176 energy 740 0.970588 energy 775 1.000000 energy
dd.loc[(dd["type1"] == "grass") & (dd["type2"] == "poison") & (dd["hp"] > 75)]
attack | capture_rate | defense | experience_growth | hp | name | speed | type1 | type2 | weight_kg | generation | is_legendary | atk_gen_dnorm | Against_M | group1 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2 | 100 | 45 | 123 | 1059860 | 80 | Venusaur | 80 | grass | poison | 100.0 | 1 | 0 | 0.633333 | 1.102941 | earth |
70 | 105 | 45 | 65 | 1059860 | 80 | Victreebel | 70 | grass | poison | 15.5 | 1 | 0 | 0.666667 | 1.102941 | earth |
590 | 85 | 75 | 70 | 1000000 | 114 | Amoonguss | 30 | grass | poison | 10.5 | 5 | 0 | 0.480000 | 1.102941 | earth |
Obs: iloc is purely by indexes and loc by names. Now, lets select what contains 'Mega' in name:
dd.loc[dd["name"].str.contains("mega")]
attack | capture_rate | defense | experience_growth | hp | name | speed | type1 | type2 | weight_kg | generation | is_legendary | atk_gen_dnorm | Against_M | group1 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
468 | 76 | 30 | 86 | 1000000 | 86 | Yanmega | 95 | bug | flying | 51.5 | 4 | 0 | 0.430303 | 1.235294 | earth |
Select type fire or water:
dd.loc[dd["type1"].str.contains("fire|water")]
attack | capture_rate | defense | experience_growth | hp | name | speed | type1 | type2 | weight_kg | generation | is_legendary | atk_gen_dnorm | Against_M | group1 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
3 | 52 | 45 | 43 | 1059860 | 39 | Charmander | 65 | fire | NaN | 8.5 | 1 | 0 | 0.313333 | 0.941176 | energy |
4 | 64 | 45 | 58 | 1059860 | 58 | Charmeleon | 80 | fire | NaN | 19.0 | 1 | 0 | 0.393333 | 0.941176 | energy |
5 | 104 | 45 | 78 | 1059860 | 78 | Charizard | 100 | fire | flying | 90.5 | 1 | 0 | 0.660000 | 0.970588 | energy |
6 | 48 | 45 | 65 | 1059860 | 44 | Squirtle | 43 | water | NaN | 9.0 | 1 | 0 | 0.286667 | 1.029412 | aqua |
7 | 63 | 45 | 80 | 1059860 | 59 | Wartortle | 58 | water | NaN | 22.5 | 1 | 0 | 0.386667 | 1.029412 | aqua |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
751 | 70 | 100 | 92 | 1000000 | 68 | Araquanid | 42 | water | bug | 82.0 | 7 | 0 | 0.269737 | 1.058824 | aqua |
770 | 60 | 60 | 130 | 800000 | 55 | Pyukumuku | 5 | water | NaN | 1.2 | 7 | 0 | 0.203947 | 1.029412 | aqua |
775 | 78 | 70 | 135 | 1000000 | 60 | Turtonator | 36 | fire | dragon | 212.0 | 7 | 0 | 0.322368 | 1.000000 | energy |
778 | 105 | 80 | 70 | 1000000 | 68 | Bruxish | 92 | water | psychic | 19.0 | 7 | 0 | 0.500000 | 1.147059 | aqua |
787 | 75 | 3 | 115 | 1250000 | 70 | Tapu Fini | 85 | water | fairy | 21.2 | 7 | 1 | 0.302632 | 0.970588 | aqua |
166 rows × 15 columns
dd.groupby("type1").mean().sort_values("attack")
attack | capture_rate | defense | experience_growth | hp | speed | weight_kg | atk_gen_dnorm | Against_M | |
---|---|---|---|---|---|---|---|---|---|
type1 | |||||||||
fairy | 62.111111 | 116.944444 | 68.166667 | 9.138889e+05 | 73.944444 | 53.666667 | 23.555556 | 0.305327 | 0.977124 |
psychic | 65.566038 | 86.660377 | 69.264151 | 1.079405e+06 | 72.943396 | 75.150943 | 57.328846 | 0.323916 | 1.112375 |
flying | 66.666667 | 79.333333 | 65.000000 | 1.083333e+06 | 68.000000 | 99.666667 | 52.000000 | 0.335266 | 1.117647 |
bug | 70.125000 | 119.833333 | 70.847222 | 1.009006e+06 | 56.722222 | 63.569444 | 33.083333 | 0.361085 | 1.119077 |
electric | 70.820513 | 106.974359 | 61.820513 | 1.073312e+06 | 60.512821 | 85.410256 | 37.944737 | 0.378562 | 0.952489 |
poison | 72.656250 | 128.437500 | 70.031250 | 1.086849e+06 | 65.593750 | 64.187500 | 33.830000 | 0.399824 | 0.978860 |
ghost | 72.740741 | 93.814815 | 79.518519 | 1.018117e+06 | 63.370370 | 58.333333 | 69.570370 | 0.365167 | 0.971678 |
ice | 73.304348 | 94.173913 | 71.913043 | 1.086069e+06 | 72.086957 | 62.739130 | 103.260870 | 0.386124 | 1.201407 |
water | 73.307018 | 99.412281 | 73.482456 | 1.056716e+06 | 70.219298 | 63.921053 | 51.071930 | 0.387995 | 1.043473 |
grass | 73.769231 | 106.935897 | 70.871795 | 1.079791e+06 | 65.358974 | 59.025641 | 33.255844 | 0.379938 | 1.200792 |
normal | 75.161905 | 120.219048 | 59.695238 | 1.009973e+06 | 76.723810 | 69.533333 | 46.158416 | 0.399823 | 1.005322 |
fire | 81.500000 | 72.403846 | 67.788462 | 1.064735e+06 | 68.730769 | 73.346154 | 66.096000 | 0.439117 | 0.950509 |
dark | 87.793103 | 84.482759 | 70.517241 | 1.102720e+06 | 72.551724 | 75.310345 | 69.096552 | 0.472996 | 1.080122 |
rock | 90.666667 | 76.533333 | 96.266667 | 9.793116e+05 | 66.333333 | 57.422222 | 92.946341 | 0.505399 | 1.146078 |
steel | 93.083333 | 58.916667 | 120.208333 | 1.126232e+06 | 66.791667 | 56.583333 | 188.841667 | 0.493987 | 0.841912 |
ground | 94.812500 | 108.312500 | 83.906250 | 1.069652e+06 | 73.187500 | 59.968750 | 150.044444 | 0.540137 | 1.028493 |
fighting | 99.178571 | 103.750000 | 66.392857 | 1.076021e+06 | 71.428571 | 64.285714 | 58.675000 | 0.571414 | 1.090336 |
dragon | 106.407407 | 37.333333 | 86.259259 | 1.216667e+06 | 79.851852 | 76.111111 | 107.125926 | 0.604641 | 1.129085 |
sea.set()
g1 = sea.histplot(dd, x="attack")
g11 = sea.histplot(dd, x="attack", kde=True)
g2 = sea.relplot(x= 'attack', y='defense', data=dd)
g3 = sea.relplot(data=dd,
x= 'attack',
y='defense',
hue='group1')
g4 = sea.jointplot(data=dd,
x= 'attack',
y='defense',
hue='group1')
g41 = sea.jointplot(data=dd,
x= 'attack',
y='defense',
hue='is_legendary',
kind= 'kde')
g42 = sea.jointplot(data=dd,
x= 'attack',
y='defense',
#hue='is_legendary',
kind= 'reg')
g43 = sea.jointplot(data=dd,
x= 'attack',
y='defense',
#hue='is_legendary',
kind= 'hex')
g5 = sea.jointplot(data=dd,
y='hp',
x='capture_rate')
g5.plot_joint(sea.kdeplot,
color='r',
zorder=0,
levels=6)
g5.plot_marginals(sea.rugplot,
color="r",
height=-.15,
clip_on=False)
<seaborn.axisgrid.JointGrid at 0x7fdb1c5b16a0>
g6 = sea.pairplot(dd[['attack', 'defense', 'hp', 'speed', 'group1']],
hue='group1', height=2.5)
sea.set_theme(style="ticks")
# Initialize the figure with a logarithmic x axis
f, ax = plt.subplots(figsize=(7, 6))
# Plot the orbital period with horizontal boxes
sea.boxplot(data=dd,
y="group1",
x="hp",
whis=[0, 100],
width=.6,
palette="Set3", showmeans=True) # the means are hidden by scarttes
# Add in points to show each observation
sea.stripplot(data=dd,
y="group1",
x="hp",
size=4,
color=".3",
linewidth=0,
alpha=.45)
# Tweak the visual presentation
ax.xaxis.grid(True)
ax.set(ylabel="Primary Type Group")
sea.despine(trim=True, left=True)
https://towardsdatascience.com/violin-strip-swarm-and-raincloud-plots-in-python-as-better-sometimes-alternatives-to-a-boxplot-15019bdff8f8
plt.figure(figsize=(15, 10))
# Create violin plots without mini-boxplots inside.
ax = sea.violinplot(data=dd,
x='speed',
y='group1',
color='paleturquoise',
cut=0,
inner=None)
# Clip the lower half of each violin.
for item in ax.collections:
x0, y0, width, height = item.get_paths()[0].get_extents().bounds
item.set_clip_path(plt.Rectangle((x0, y0), width, height/2,
transform=ax.transData))
# Create [swarm vs strip] plots with partially transparent points of different colors depending if is legendary.
num_items = len(ax.collections)
sea.stripplot(data=dd,
x='speed',
y='group1',
hue='is_legendary',
palette=['deepskyblue', 'navy'],
alpha=0.6,
size=7)
# Shift each strip plot strictly below the correponding volin.
for item in ax.collections[num_items:]:
item.set_offsets(item.get_offsets() + 0.15)
# Create narrow boxplots on top of the corresponding violin and strip plots, with thick lines, the mean values, without the outliers.
sea.boxplot(data=dd,
x='speed',
y='group1',
width=0.25,
showfliers=False,
showmeans=True,
meanprops=dict(marker='o', markerfacecolor='gold',
markersize=10, zorder=3),
boxprops=dict(facecolor=(0,0,0,0),
linewidth=3, zorder=3),
whiskerprops=dict(linewidth=3),
capprops=dict(linewidth=3),
medianprops=dict(linewidth=3))
plt.legend(frameon=False, fontsize=15, loc='upper left')
<matplotlib.legend.Legend at 0x7fdb15f20220>