import os                        # interact with system
import numpy             as np   # manipulation
import pandas            as pd   # manipulation
import statsmodels       as stm  # statistics
import seaborn           as sea  # visualization
#import matplotlib       as mpl  #      "
import matplotlib.pyplot as plt  #      "
# change working directory
os.chdir("/home/heitor/ProjetosPy/ML_PyPratice/Basic")
# import the csv file as "dd"
dd = pd.read_csv("pokemon.csv")
dd                              # general
| name | type1 | hp | attack | defense | |
|---|---|---|---|---|---|
| 793 | Buzzwole | bug | 107 | 139 | 139 | 
| 468 | Yanmega | bug | 86 | 76 | 86 | 
| 636 | Volcarona | bug | 85 | 60 | 65 | 
| 213 | Heracross | bug | 80 | 185 | 115 | 
| 616 | Accelgor | bug | 80 | 70 | 40 | 
| ... | ... | ... | ... | ... | ... | 
| 97 | Krabby | water | 30 | 105 | 90 | 
| 115 | Horsea | water | 30 | 40 | 70 | 
| 119 | Staryu | water | 30 | 45 | 55 | 
| 128 | Magikarp | water | 20 | 10 | 55 | 
| 348 | Feebas | water | 20 | 15 | 20 | 
801 rows × 5 columns
dd.head(5)                      # first 5 lines
| abilities | against_bug | against_dark | against_dragon | against_electric | against_fairy | against_fight | against_fire | against_flying | against_ghost | ... | percentage_male | pokedex_number | sp_attack | sp_defense | speed | type1 | type2 | weight_kg | generation | is_legendary | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | ['Overgrow', 'Chlorophyll'] | 1.0 | 1.0 | 1.0 | 0.5 | 0.5 | 0.5 | 2.0 | 2.0 | 1.0 | ... | 88.1 | 1 | 65 | 65 | 45 | grass | poison | 6.9 | 1 | 0 | 
| 1 | ['Overgrow', 'Chlorophyll'] | 1.0 | 1.0 | 1.0 | 0.5 | 0.5 | 0.5 | 2.0 | 2.0 | 1.0 | ... | 88.1 | 2 | 80 | 80 | 60 | grass | poison | 13.0 | 1 | 0 | 
| 2 | ['Overgrow', 'Chlorophyll'] | 1.0 | 1.0 | 1.0 | 0.5 | 0.5 | 0.5 | 2.0 | 2.0 | 1.0 | ... | 88.1 | 3 | 122 | 120 | 80 | grass | poison | 100.0 | 1 | 0 | 
| 3 | ['Blaze', 'Solar Power'] | 0.5 | 1.0 | 1.0 | 1.0 | 0.5 | 1.0 | 0.5 | 1.0 | 1.0 | ... | 88.1 | 4 | 60 | 50 | 65 | fire | NaN | 8.5 | 1 | 0 | 
| 4 | ['Blaze', 'Solar Power'] | 0.5 | 1.0 | 1.0 | 1.0 | 0.5 | 1.0 | 0.5 | 1.0 | 1.0 | ... | 88.1 | 5 | 80 | 65 | 80 | fire | NaN | 19.0 | 1 | 0 | 
5 rows × 41 columns
dd["abilities"][0:7]            # spec variable and lines
0 ['Overgrow', 'Chlorophyll'] 1 ['Overgrow', 'Chlorophyll'] 2 ['Overgrow', 'Chlorophyll'] 3 ['Blaze', 'Solar Power'] 4 ['Blaze', 'Solar Power'] 5 ['Blaze', 'Solar Power'] 6 ['Torrent', 'Rain Dish'] Name: abilities, dtype: object
dd[["speed", "name", "type1"]]  # list spec variables
| speed | name | type1 | |
|---|---|---|---|
| 0 | 45 | Bulbasaur | grass | 
| 1 | 60 | Ivysaur | grass | 
| 2 | 80 | Venusaur | grass | 
| 3 | 65 | Charmander | fire | 
| 4 | 80 | Charmeleon | fire | 
| ... | ... | ... | ... | 
| 796 | 61 | Celesteela | steel | 
| 797 | 109 | Kartana | grass | 
| 798 | 43 | Guzzlord | dark | 
| 799 | 79 | Necrozma | psychic | 
| 800 | 65 | Magearna | steel | 
801 rows × 3 columns
dd.columns  # see col titles
Index(['abilities', 'against_bug', 'against_dark', 'against_dragon',
       'against_electric', 'against_fairy', 'against_fight', 'against_fire',
       'against_flying', 'against_ghost', 'against_grass', 'against_ground',
       'against_ice', 'against_normal', 'against_poison', 'against_psychic',
       'against_rock', 'against_steel', 'against_water', 'attack',
       'base_egg_steps', 'base_happiness', 'base_total', 'capture_rate',
       'classfication', 'defense', 'experience_growth', 'height_m', 'hp',
       'japanese_name', 'name', 'percentage_male', 'pokedex_number',
       'sp_attack', 'sp_defense', 'speed', 'type1', 'type2', 'weight_kg',
       'generation', 'is_legendary'],
      dtype='object')
dd.dtypes   # see col variables types
abilities object against_bug float64 against_dark float64 against_dragon float64 against_electric float64 against_fairy float64 against_fight float64 against_fire float64 against_flying float64 against_ghost float64 against_grass float64 against_ground float64 against_ice float64 against_normal float64 against_poison float64 against_psychic float64 against_rock float64 against_steel float64 against_water float64 attack int64 base_egg_steps int64 base_happiness int64 base_total int64 capture_rate object classfication object defense int64 experience_growth int64 height_m float64 hp int64 japanese_name object name object percentage_male float64 pokedex_number int64 sp_attack int64 sp_defense int64 speed int64 type1 object type2 object weight_kg float64 generation int64 is_legendary int64 dtype: object
dd.describe()
| against_bug | against_dark | against_dragon | against_electric | against_fairy | against_fight | against_fire | against_flying | against_ghost | against_grass | ... | height_m | hp | percentage_male | pokedex_number | sp_attack | sp_defense | speed | weight_kg | generation | is_legendary | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 801.000000 | 801.000000 | 801.000000 | 801.000000 | 801.000000 | 801.000000 | 801.000000 | 801.000000 | 801.000000 | 801.000000 | ... | 781.000000 | 801.000000 | 703.000000 | 801.000000 | 801.000000 | 801.000000 | 801.000000 | 781.000000 | 801.000000 | 801.000000 | 
| mean | 0.996255 | 1.057116 | 0.968789 | 1.073970 | 1.068976 | 1.065543 | 1.135456 | 1.192884 | 0.985019 | 1.034020 | ... | 1.163892 | 68.958801 | 55.155761 | 401.000000 | 71.305868 | 70.911361 | 66.334582 | 61.378105 | 3.690387 | 0.087391 | 
| std | 0.597248 | 0.438142 | 0.353058 | 0.654962 | 0.522167 | 0.717251 | 0.691853 | 0.604488 | 0.558256 | 0.788896 | ... | 1.080326 | 26.576015 | 20.261623 | 231.373075 | 32.353826 | 27.942501 | 28.907662 | 109.354766 | 1.930420 | 0.282583 | 
| min | 0.250000 | 0.250000 | 0.000000 | 0.000000 | 0.250000 | 0.000000 | 0.250000 | 0.250000 | 0.000000 | 0.250000 | ... | 0.100000 | 1.000000 | 0.000000 | 1.000000 | 10.000000 | 20.000000 | 5.000000 | 0.100000 | 1.000000 | 0.000000 | 
| 25% | 0.500000 | 1.000000 | 1.000000 | 0.500000 | 1.000000 | 0.500000 | 0.500000 | 1.000000 | 1.000000 | 0.500000 | ... | 0.600000 | 50.000000 | 50.000000 | 201.000000 | 45.000000 | 50.000000 | 45.000000 | 9.000000 | 2.000000 | 0.000000 | 
| 50% | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 1.000000 | 65.000000 | 50.000000 | 401.000000 | 65.000000 | 66.000000 | 65.000000 | 27.300000 | 4.000000 | 0.000000 | 
| 75% | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 2.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 1.500000 | 80.000000 | 50.000000 | 601.000000 | 91.000000 | 90.000000 | 85.000000 | 64.800000 | 5.000000 | 0.000000 | 
| max | 4.000000 | 4.000000 | 2.000000 | 4.000000 | 4.000000 | 4.000000 | 4.000000 | 4.000000 | 4.000000 | 4.000000 | ... | 14.500000 | 255.000000 | 100.000000 | 801.000000 | 194.000000 | 230.000000 | 180.000000 | 999.900000 | 7.000000 | 1.000000 | 
8 rows × 34 columns
dd["speed"].describe()
count 801.000000 mean 66.334582 std 28.907662 min 5.000000 25% 45.000000 50% 65.000000 75% 85.000000 max 180.000000 Name: speed, dtype: float64
Lets see the strings variables:
dd["abilities"].describe()  # freq: most common value’s frequency
| name | type1 | hp | attack | defense | |
|---|---|---|---|---|---|
| 793 | Buzzwole | bug | 107 | 139 | 139 | 
| 468 | Yanmega | bug | 86 | 76 | 86 | 
| 636 | Volcarona | bug | 85 | 60 | 65 | 
| 213 | Heracross | bug | 80 | 185 | 115 | 
| 616 | Accelgor | bug | 80 | 70 | 40 | 
| ... | ... | ... | ... | ... | ... | 
| 97 | Krabby | water | 30 | 105 | 90 | 
| 115 | Horsea | water | 30 | 40 | 70 | 
| 119 | Staryu | water | 30 | 45 | 55 | 
| 128 | Magikarp | water | 20 | 10 | 55 | 
| 348 | Feebas | water | 20 | 15 | 20 | 
801 rows × 5 columns
dd["classfication"].describe()
count 801 unique 588 top Dragon Pokémon freq 8 Name: classfication, dtype: object
dd["capture_rate"].describe()  # 'capture_rate' should be an int!!
count 801 unique 34 top 45 freq 250 Name: capture_rate, dtype: object
dd['is_legendary']  .value_counts()
0 731 1 70 Name: is_legendary, dtype: int64
dd["type1"].value_counts()
water 114 normal 105 grass 78 bug 72 psychic 53 fire 52 rock 45 electric 39 poison 32 ground 32 dark 29 fighting 28 ghost 27 dragon 27 steel 24 ice 23 fairy 18 flying 3 Name: type1, dtype: int64
dd["type2"].value_counts()
flying 95 poison 34 ground 34 fairy 29 psychic 29 fighting 25 steel 22 dark 21 grass 20 water 17 dragon 17 ice 15 rock 14 ghost 14 fire 13 electric 9 bug 5 normal 4 Name: type2, dtype: int64
Sorting:
dd.sort_values(by=["type1", "hp"],
               ascending=[1, 0])  # ascending by type1 but not by hp
| abilities | against_bug | against_dark | against_dragon | against_electric | against_fairy | against_fight | against_fire | against_flying | against_ghost | ... | percentage_male | pokedex_number | sp_attack | sp_defense | speed | type1 | type2 | weight_kg | generation | is_legendary | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 793 | ['Beast Boost'] | 0.5 | 0.5 | 1.0 | 1.0 | 2.0 | 0.50 | 2.0 | 4.0 | 1.0 | ... | NaN | 794 | 53 | 53 | 79 | bug | fighting | 333.6 | 7 | 1 | 
| 468 | ['Speed Boost', 'Tinted Lens', 'Frisk'] | 0.5 | 1.0 | 1.0 | 2.0 | 1.0 | 0.25 | 2.0 | 2.0 | 1.0 | ... | 50.0 | 469 | 116 | 56 | 95 | bug | flying | 51.5 | 4 | 0 | 
| 636 | ['Flame Body', 'Swarm'] | 0.5 | 1.0 | 1.0 | 1.0 | 0.5 | 0.50 | 1.0 | 2.0 | 1.0 | ... | 50.0 | 637 | 135 | 105 | 100 | bug | fire | 46.0 | 5 | 0 | 
| 213 | ['Swarm', 'Guts', 'Moxie'] | 0.5 | 0.5 | 1.0 | 1.0 | 2.0 | 0.50 | 2.0 | 4.0 | 1.0 | ... | 50.0 | 214 | 40 | 105 | 75 | bug | fighting | 54.0 | 2 | 0 | 
| 616 | ['Hydration', 'Sticky Hold', 'Unburden'] | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.50 | 2.0 | 2.0 | 1.0 | ... | 50.0 | 617 | 100 | 60 | 145 | bug | NaN | 25.3 | 5 | 0 | 
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | 
| 97 | ['Hyper Cutter', 'Shell Armor', 'Sheer Force'] | 1.0 | 1.0 | 1.0 | 2.0 | 1.0 | 1.00 | 0.5 | 1.0 | 1.0 | ... | 50.0 | 98 | 25 | 25 | 50 | water | NaN | 6.5 | 1 | 0 | 
| 115 | ['Swift Swim', 'Sniper', 'Damp'] | 1.0 | 1.0 | 1.0 | 2.0 | 1.0 | 1.00 | 0.5 | 1.0 | 1.0 | ... | 50.0 | 116 | 70 | 25 | 60 | water | NaN | 8.0 | 1 | 0 | 
| 119 | ['Illuminate', 'Natural Cure', 'Analytic'] | 1.0 | 1.0 | 1.0 | 2.0 | 1.0 | 1.00 | 0.5 | 1.0 | 1.0 | ... | NaN | 120 | 70 | 55 | 85 | water | NaN | 34.5 | 1 | 0 | 
| 128 | ['Swift Swim', 'Rattled'] | 1.0 | 1.0 | 1.0 | 2.0 | 1.0 | 1.00 | 0.5 | 1.0 | 1.0 | ... | 50.0 | 129 | 15 | 20 | 80 | water | NaN | 10.0 | 1 | 0 | 
| 348 | ['Swift Swim', 'Oblivious', 'Adaptability'] | 1.0 | 1.0 | 1.0 | 2.0 | 1.0 | 1.00 | 0.5 | 1.0 | 1.0 | ... | 50.0 | 349 | 10 | 55 | 80 | water | NaN | 7.4 | 3 | 0 | 
801 rows × 41 columns
dd[["name", "type1", "hp", "attack", "defense"]].sort_values(
    by=["type1", "hp"], ascending=[1, 0])
| name | type1 | hp | attack | defense | |
|---|---|---|---|---|---|
| 793 | Buzzwole | bug | 107 | 139 | 139 | 
| 468 | Yanmega | bug | 86 | 76 | 86 | 
| 636 | Volcarona | bug | 85 | 60 | 65 | 
| 213 | Heracross | bug | 80 | 185 | 115 | 
| 616 | Accelgor | bug | 80 | 70 | 40 | 
| ... | ... | ... | ... | ... | ... | 
| 97 | Krabby | water | 30 | 105 | 90 | 
| 115 | Horsea | water | 30 | 40 | 70 | 
| 119 | Staryu | water | 30 | 45 | 55 | 
| 128 | Magikarp | water | 20 | 10 | 55 | 
| 348 | Feebas | water | 20 | 15 | 20 | 
801 rows × 5 columns
modify the type of 'capture_rate':
dd["capture_rate"]  .astype(int)
dd.loc[dd['capture_rate'] == '30 (Meteorite)255 (Core)']
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) /tmp/ipykernel_23112/14930470.py in <module> ----> 1 dd["capture_rate"] .astype(int) 2 dd.loc[dd['capture_rate'] == '30 (Meteorite)255 (Core)'] ~/.anaconda3/lib/python3.9/site-packages/pandas/core/generic.py in astype(self, dtype, copy, errors) 5813 else: 5814 # else, only a single dtype is given -> 5815 new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors) 5816 return self._constructor(new_data).__finalize__(self, method="astype") 5817 ~/.anaconda3/lib/python3.9/site-packages/pandas/core/internals/managers.py in astype(self, dtype, copy, errors) 416 417 def astype(self: T, dtype, copy: bool = False, errors: str = "raise") -> T: --> 418 return self.apply("astype", dtype=dtype, copy=copy, errors=errors) 419 420 def convert( ~/.anaconda3/lib/python3.9/site-packages/pandas/core/internals/managers.py in apply(self, f, align_keys, ignore_failures, **kwargs) 325 applied = b.apply(f, **kwargs) 326 else: --> 327 applied = getattr(b, f)(**kwargs) 328 except (TypeError, NotImplementedError): 329 if not ignore_failures: ~/.anaconda3/lib/python3.9/site-packages/pandas/core/internals/blocks.py in astype(self, dtype, copy, errors) 589 values = self.values 590 --> 591 new_values = astype_array_safe(values, dtype, copy=copy, errors=errors) 592 593 new_values = maybe_coerce_values(new_values) ~/.anaconda3/lib/python3.9/site-packages/pandas/core/dtypes/cast.py in astype_array_safe(values, dtype, copy, errors) 1307 1308 try: -> 1309 new_values = astype_array(values, dtype, copy=copy) 1310 except (ValueError, TypeError): 1311 # e.g. astype_nansafe can fail on object-dtype of strings ~/.anaconda3/lib/python3.9/site-packages/pandas/core/dtypes/cast.py in astype_array(values, dtype, copy) 1255 1256 else: -> 1257 values = astype_nansafe(values, dtype, copy=copy) 1258 1259 # in pandas we don't store numpy str dtypes, so convert to object ~/.anaconda3/lib/python3.9/site-packages/pandas/core/dtypes/cast.py in astype_nansafe(arr, dtype, copy, skipna) 1172 # work around NumPy brokenness, #1987 1173 if np.issubdtype(dtype.type, np.integer): -> 1174 return lib.astype_intsafe(arr, dtype) 1175 1176 # if we have a datetime/timedelta array of objects ~/.anaconda3/lib/python3.9/site-packages/pandas/_libs/lib.pyx in pandas._libs.lib.astype_intsafe() ValueError: invalid literal for int() with base 10: '30 (Meteorite)255 (Core)'
there's an observation in 'capture_rate' thar have a text, lets change this observ to 30+255 = 285:
dd.loc[dd['capture_rate'] == '30 (Meteorite)255 (Core)', 'capture_rate'] = '285'
now we can aplly the tranformation to int:
dd["capture_rate"] = dd["capture_rate"].astype(int)
transforming some variables in categorical:
dd["generation"]   = dd["generation"]   .astype('category')
dd["is_legendary"] = dd["is_legendary"] .astype('category')
dd["type1"]        = dd["type1"]        .astype('category')
dd["type2"]        = dd["type2"]        .astype('category')
Create a new variable: normality of Attack among generation subgroups:
(var - var.min) / (var.max - var.min)
dd.groupby('generation')['attack'].min()
dd.groupby('generation')['attack'].max()
generation 1 155 2 185 3 180 4 170 5 150 6 160 7 181 Name: attack, dtype: int64
to this, we need a same length vector containing min & max of the generations:
dd.groupby('generation')['attack'].transform(min)
0       5
1       5
2       5
3       5
4       5
       ..
796    29
797    29
798    29
799    29
800    29
Name: attack, Length: 801, dtype: int64
dd.groupby('generation')['attack'].transform(max)
0      155
1      155
2      155
3      155
4      155
      ... 
796    181
797    181
798    181
799    181
800    181
Name: attack, Length: 801, dtype: int64
so:
dd['atk_gen_dnorm'] = (dd['attack'] - dd.groupby('generation')['attack'].transform(min)) / (dd.groupby('generation')['attack'].transform(max) - dd.groupby('generation')['attack'].transform(min))
dd['atk_gen_dnorm'].describe()
count 801.000000 mean 0.415197 std 0.216498 min 0.000000 25% 0.257143 50% 0.393939 75% 0.560000 max 1.000000 Name: atk_gen_dnorm, dtype: float64
Creating new var: mean of the against_x variables:
dd["Against_M"] = dd.iloc[:, 1:18].mean(axis=1)
dd['Against_M'].describe()
count 801.000000 mean 1.057979 std 0.113671 min 0.720588 25% 0.985294 50% 1.029412 75% 1.117647 max 1.441176 Name: Against_M, dtype: float64
Creating new var from type1:
dd['group1'] = dd['type1'].astype('str')
dd.group1[(dd['type1']=='water') | (dd['type1']=='ice')] = 'aqua'
dd.group1[(dd['type1']=='normal') | (dd['type1']=='fighting')] = 'body'
dd.group1[(dd['type1']=='fire') | (dd['type1']=='dragon') |
          (dd['type1']=='electric')] = 'energy'
dd.group1[(dd['type1']=='rock') | (dd['type1']=='steel') |
          (dd['type1']=='ground') | (dd['type1']=='grass') |
          (dd['type1']=='bug')] = 'earth'
dd.group1[(dd['type1']=='dark') | (dd['type1']=='ghost') |
           (dd['type1']=='psychic') | (dd['type1']=='poison')] = 'dark'
dd.group1[(dd['type1']=='fairy') | (dd['type1']=='flying')] = 'air'
dd['group1'] = dd['group1'].astype('category')
/tmp/ipykernel_23112/2645664726.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy dd.group1[(dd['type1']=='water') | (dd['type1']=='ice')] = 'aqua' /tmp/ipykernel_23112/2645664726.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy dd.group1[(dd['type1']=='normal') | (dd['type1']=='fighting')] = 'body' /tmp/ipykernel_23112/2645664726.py:3: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy dd.group1[(dd['type1']=='fire') | (dd['type1']=='dragon') | /tmp/ipykernel_23112/2645664726.py:5: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy dd.group1[(dd['type1']=='rock') | (dd['type1']=='steel') | /tmp/ipykernel_23112/2645664726.py:8: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy dd.group1[(dd['type1']=='dark') | (dd['type1']=='ghost') | /tmp/ipykernel_23112/2645664726.py:10: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy dd.group1[(dd['type1']=='fairy') | (dd['type1']=='flying')] = 'air'
dd['group1'].value_counts()
earth 251 dark 141 aqua 137 body 133 energy 118 air 21 Name: group1, dtype: int64
Deleting Variables:
dd = dd[dd.columns.drop(list(dd.filter(regex='against')))] # deleting with condition
dd = dd.drop(columns=["abilities"])
dd = dd.drop(columns=["classfication"])
dd = dd.drop(columns=["base_egg_steps"])
dd = dd.drop(columns=["base_happiness"])
dd = dd.drop(columns=["base_total"])
dd = dd.drop(columns=["height_m"])
dd = dd.drop(columns=["percentage_male"])
dd = dd.drop(columns=["pokedex_number"])
dd = dd.drop(columns=["sp_attack"])
dd = dd.drop(columns=["sp_defense"])
dd = dd.drop(columns=["japanese_name"])
print(dd.iloc[5:10])  # some specific lines
attack capture_rate defense experience_growth hp name speed \ 5 104 45 78 1059860 78 Charizard 100 6 48 45 65 1059860 44 Squirtle 43 7 63 45 80 1059860 59 Wartortle 58 8 103 45 120 1059860 79 Blastoise 78 9 30 255 35 1000000 45 Caterpie 45 type1 type2 weight_kg generation is_legendary atk_gen_dnorm Against_M \ 5 fire flying 90.5 1 0 0.660000 0.970588 6 water NaN 9.0 1 0 0.286667 1.029412 7 water NaN 22.5 1 0 0.386667 1.029412 8 water NaN 85.5 1 0 0.653333 1.029412 9 bug NaN 2.9 1 0 0.166667 1.088235 group1 5 energy 6 aqua 7 aqua 8 aqua 9 earth
print(dd.loc[dd["type1"] == "fire"])  # some spec character
     attack  capture_rate  defense  experience_growth   hp         name  \
3        52            45       43            1059860   39   Charmander   
4        64            45       58            1059860   58   Charmeleon   
5       104            45       78            1059860   78    Charizard   
36       41           190       40            1000000   38       Vulpix   
37       67            75       75            1000000   73    Ninetales   
57       70           190       45            1250000   55    Growlithe   
58      110            75       80            1250000   90     Arcanine   
76       85           190       55            1000000   50       Ponyta   
77      100            60       70            1000000   65     Rapidash   
125      95            45       57            1000000   65       Magmar   
135     130            45       60            1000000   65      Flareon   
145     100             3       90            1250000   90      Moltres   
154      52            45       43            1059860   39    Cyndaquil   
155      64            45       58            1059860   58      Quilava   
156      84            45       78            1059860   78   Typhlosion   
217      40           190       40            1000000   40       Slugma   
218      50            75      120            1000000   60     Magcargo   
239      75            45       37            1000000   45        Magby   
243     115             3       85            1250000  115        Entei   
249     130             3       90            1250000  106        Ho-Oh   
254      60            45       40            1059860   45      Torchic   
255      85            45       60            1059860   60    Combusken   
256     160            45       80            1059860   80     Blaziken   
321      60           255       40            1000000   60        Numel   
322     120           150      100            1000000   70     Camerupt   
323      85            90      140            1000000   70      Torkoal   
389      58            45       44            1059860   44     Chimchar   
390      78            45       52            1059860   64     Monferno   
391     104            45       71            1059860   76    Infernape   
466      95            30       67            1000000   75    Magmortar   
484      90             3      106            1250000   91      Heatran   
497      63            45       45            1059860   65        Tepig   
498      93            45       55            1059860   90      Pignite   
499     123            45       65            1059860  110       Emboar   
512      53           190       48            1000000   50      Pansear   
513      98            75       63            1000000   75     Simisear   
553      90           120       45            1059860   70     Darumaka   
554      30            60      105            1059860  105   Darmanitan   
630      97            90       66            1000000   85      Heatmor   
652      45            45       40            1059860   40     Fennekin   
653      59            45       58            1059860   59      Braixen   
654      69            45       72            1059860   75      Delphox   
661      73           120       55            1059860   62  Fletchinder   
662      81            45       71            1059860   78   Talonflame   
666      50           220       58            1059860   62       Litleo   
667      68            65       72            1059860   86       Pyroar   
720     110             3      120            1250000   80    Volcanion   
724      65            45       40            1059860   45       Litten   
725      85            45       50            1059860   65     Torracat   
726     115            45       90            1059860   95   Incineroar   
740      70            45       70            1000000   75     Oricorio   
775      78            70      135            1000000   60   Turtonator   
     speed type1     type2  weight_kg generation is_legendary  atk_gen_dnorm  \
3       65  fire       NaN        8.5          1            0       0.313333   
4       80  fire       NaN       19.0          1            0       0.393333   
5      100  fire    flying       90.5          1            0       0.660000   
36      65  fire       ice        NaN          1            0       0.240000   
37     109  fire       ice        NaN          1            0       0.413333   
57      60  fire       NaN       19.0          1            0       0.433333   
58      95  fire       NaN      155.0          1            0       0.700000   
76      90  fire       NaN       30.0          1            0       0.533333   
77     105  fire       NaN       95.0          1            0       0.633333   
125     93  fire       NaN       44.5          1            0       0.600000   
135     65  fire       NaN       25.0          1            0       0.833333   
145     90  fire    flying       60.0          1            1       0.633333   
154     65  fire       NaN        7.9          2            0       0.240000   
155     80  fire       NaN       19.0          2            0       0.308571   
156    100  fire       NaN       79.5          2            0       0.422857   
217     20  fire       NaN       35.0          2            0       0.171429   
218     30  fire      rock       55.0          2            0       0.228571   
239     83  fire       NaN       21.4          2            0       0.371429   
243    100  fire       NaN      198.0          2            1       0.600000   
249     90  fire    flying      199.0          2            1       0.685714   
254     45  fire       NaN        2.5          3            0       0.272727   
255     55  fire  fighting       19.5          3            0       0.424242   
256    100  fire  fighting       52.0          3            0       0.878788   
321     35  fire    ground       24.0          3            0       0.272727   
322     20  fire    ground      220.0          3            0       0.636364   
323     20  fire       NaN       80.4          3            0       0.424242   
389     61  fire       NaN        6.2          4            0       0.321212   
390     81  fire  fighting       22.0          4            0       0.442424   
391    108  fire  fighting       55.0          4            0       0.600000   
466     83  fire       NaN       68.0          4            0       0.545455   
484     77  fire     steel      430.0          4            1       0.515152   
497     45  fire       NaN        9.9          5            0       0.304000   
498     55  fire  fighting       55.5          5            0       0.544000   
499     65  fire  fighting      150.0          5            0       0.784000   
512     64  fire       NaN       11.0          5            0       0.224000   
513    101  fire       NaN       28.0          5            0       0.584000   
553     50  fire       NaN       37.5          5            0       0.520000   
554     55  fire      fire       92.9          5            0       0.040000   
630     65  fire       NaN       58.0          5            0       0.576000   
652     60  fire       NaN        9.4          6            0       0.166667   
653     73  fire       NaN       14.5          6            0       0.268116   
654    104  fire   psychic       39.0          6            0       0.340580   
661     84  fire    flying       16.0          6            0       0.369565   
662    126  fire    flying       24.5          6            0       0.427536   
666     72  fire    normal       13.5          6            0       0.202899   
667    106  fire    normal       81.5          6            0       0.333333   
720     70  fire     water      195.0          6            1       0.637681   
724     70  fire       NaN        4.3          7            0       0.236842   
725     90  fire       NaN       25.0          7            0       0.368421   
726     60  fire      dark       83.0          7            0       0.565789   
740     93  fire    flying        3.4          7            0       0.269737   
775     36  fire    dragon      212.0          7            0       0.322368   
     Against_M  group1  
3     0.941176  energy  
4     0.941176  energy  
5     0.970588  energy  
36    0.941176  energy  
37    0.941176  energy  
57    0.941176  energy  
58    0.941176  energy  
76    0.941176  energy  
77    0.941176  energy  
125   0.941176  energy  
135   0.941176  energy  
145   0.970588  energy  
154   0.941176  energy  
155   0.941176  energy  
156   0.941176  energy  
217   0.941176  energy  
218   1.073529  energy  
239   0.941176  energy  
243   0.941176  energy  
249   0.970588  energy  
254   0.941176  energy  
255   0.985294  energy  
256   0.985294  energy  
321   0.852941  energy  
322   0.852941  energy  
323   0.941176  energy  
389   0.941176  energy  
390   0.985294  energy  
391   0.985294  energy  
466   0.941176  energy  
484   0.838235  energy  
497   0.941176  energy  
498   0.985294  energy  
499   0.985294  energy  
512   0.941176  energy  
513   0.941176  energy  
553   0.941176  energy  
554   0.941176  energy  
630   0.941176  energy  
652   0.941176  energy  
653   0.941176  energy  
654   1.029412  energy  
661   0.970588  energy  
662   0.970588  energy  
666   0.941176  energy  
667   0.941176  energy  
720   0.985294  energy  
724   0.941176  energy  
725   0.941176  energy  
726   0.941176  energy  
740   0.970588  energy  
775   1.000000  energy  
dd.loc[(dd["type1"] == "grass") & (dd["type2"] == "poison") & (dd["hp"] > 75)]
| attack | capture_rate | defense | experience_growth | hp | name | speed | type1 | type2 | weight_kg | generation | is_legendary | atk_gen_dnorm | Against_M | group1 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 100 | 45 | 123 | 1059860 | 80 | Venusaur | 80 | grass | poison | 100.0 | 1 | 0 | 0.633333 | 1.102941 | earth | 
| 70 | 105 | 45 | 65 | 1059860 | 80 | Victreebel | 70 | grass | poison | 15.5 | 1 | 0 | 0.666667 | 1.102941 | earth | 
| 590 | 85 | 75 | 70 | 1000000 | 114 | Amoonguss | 30 | grass | poison | 10.5 | 5 | 0 | 0.480000 | 1.102941 | earth | 
Obs: iloc is purely by indexes and loc by names. Now, lets select what contains 'Mega' in name:
dd.loc[dd["name"].str.contains("mega")]
| attack | capture_rate | defense | experience_growth | hp | name | speed | type1 | type2 | weight_kg | generation | is_legendary | atk_gen_dnorm | Against_M | group1 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 468 | 76 | 30 | 86 | 1000000 | 86 | Yanmega | 95 | bug | flying | 51.5 | 4 | 0 | 0.430303 | 1.235294 | earth | 
Select type fire or water:
dd.loc[dd["type1"].str.contains("fire|water")]
| attack | capture_rate | defense | experience_growth | hp | name | speed | type1 | type2 | weight_kg | generation | is_legendary | atk_gen_dnorm | Against_M | group1 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 3 | 52 | 45 | 43 | 1059860 | 39 | Charmander | 65 | fire | NaN | 8.5 | 1 | 0 | 0.313333 | 0.941176 | energy | 
| 4 | 64 | 45 | 58 | 1059860 | 58 | Charmeleon | 80 | fire | NaN | 19.0 | 1 | 0 | 0.393333 | 0.941176 | energy | 
| 5 | 104 | 45 | 78 | 1059860 | 78 | Charizard | 100 | fire | flying | 90.5 | 1 | 0 | 0.660000 | 0.970588 | energy | 
| 6 | 48 | 45 | 65 | 1059860 | 44 | Squirtle | 43 | water | NaN | 9.0 | 1 | 0 | 0.286667 | 1.029412 | aqua | 
| 7 | 63 | 45 | 80 | 1059860 | 59 | Wartortle | 58 | water | NaN | 22.5 | 1 | 0 | 0.386667 | 1.029412 | aqua | 
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | 
| 751 | 70 | 100 | 92 | 1000000 | 68 | Araquanid | 42 | water | bug | 82.0 | 7 | 0 | 0.269737 | 1.058824 | aqua | 
| 770 | 60 | 60 | 130 | 800000 | 55 | Pyukumuku | 5 | water | NaN | 1.2 | 7 | 0 | 0.203947 | 1.029412 | aqua | 
| 775 | 78 | 70 | 135 | 1000000 | 60 | Turtonator | 36 | fire | dragon | 212.0 | 7 | 0 | 0.322368 | 1.000000 | energy | 
| 778 | 105 | 80 | 70 | 1000000 | 68 | Bruxish | 92 | water | psychic | 19.0 | 7 | 0 | 0.500000 | 1.147059 | aqua | 
| 787 | 75 | 3 | 115 | 1250000 | 70 | Tapu Fini | 85 | water | fairy | 21.2 | 7 | 1 | 0.302632 | 0.970588 | aqua | 
166 rows × 15 columns
dd.groupby("type1").mean().sort_values("attack")
| attack | capture_rate | defense | experience_growth | hp | speed | weight_kg | atk_gen_dnorm | Against_M | |
|---|---|---|---|---|---|---|---|---|---|
| type1 | |||||||||
| fairy | 62.111111 | 116.944444 | 68.166667 | 9.138889e+05 | 73.944444 | 53.666667 | 23.555556 | 0.305327 | 0.977124 | 
| psychic | 65.566038 | 86.660377 | 69.264151 | 1.079405e+06 | 72.943396 | 75.150943 | 57.328846 | 0.323916 | 1.112375 | 
| flying | 66.666667 | 79.333333 | 65.000000 | 1.083333e+06 | 68.000000 | 99.666667 | 52.000000 | 0.335266 | 1.117647 | 
| bug | 70.125000 | 119.833333 | 70.847222 | 1.009006e+06 | 56.722222 | 63.569444 | 33.083333 | 0.361085 | 1.119077 | 
| electric | 70.820513 | 106.974359 | 61.820513 | 1.073312e+06 | 60.512821 | 85.410256 | 37.944737 | 0.378562 | 0.952489 | 
| poison | 72.656250 | 128.437500 | 70.031250 | 1.086849e+06 | 65.593750 | 64.187500 | 33.830000 | 0.399824 | 0.978860 | 
| ghost | 72.740741 | 93.814815 | 79.518519 | 1.018117e+06 | 63.370370 | 58.333333 | 69.570370 | 0.365167 | 0.971678 | 
| ice | 73.304348 | 94.173913 | 71.913043 | 1.086069e+06 | 72.086957 | 62.739130 | 103.260870 | 0.386124 | 1.201407 | 
| water | 73.307018 | 99.412281 | 73.482456 | 1.056716e+06 | 70.219298 | 63.921053 | 51.071930 | 0.387995 | 1.043473 | 
| grass | 73.769231 | 106.935897 | 70.871795 | 1.079791e+06 | 65.358974 | 59.025641 | 33.255844 | 0.379938 | 1.200792 | 
| normal | 75.161905 | 120.219048 | 59.695238 | 1.009973e+06 | 76.723810 | 69.533333 | 46.158416 | 0.399823 | 1.005322 | 
| fire | 81.500000 | 72.403846 | 67.788462 | 1.064735e+06 | 68.730769 | 73.346154 | 66.096000 | 0.439117 | 0.950509 | 
| dark | 87.793103 | 84.482759 | 70.517241 | 1.102720e+06 | 72.551724 | 75.310345 | 69.096552 | 0.472996 | 1.080122 | 
| rock | 90.666667 | 76.533333 | 96.266667 | 9.793116e+05 | 66.333333 | 57.422222 | 92.946341 | 0.505399 | 1.146078 | 
| steel | 93.083333 | 58.916667 | 120.208333 | 1.126232e+06 | 66.791667 | 56.583333 | 188.841667 | 0.493987 | 0.841912 | 
| ground | 94.812500 | 108.312500 | 83.906250 | 1.069652e+06 | 73.187500 | 59.968750 | 150.044444 | 0.540137 | 1.028493 | 
| fighting | 99.178571 | 103.750000 | 66.392857 | 1.076021e+06 | 71.428571 | 64.285714 | 58.675000 | 0.571414 | 1.090336 | 
| dragon | 106.407407 | 37.333333 | 86.259259 | 1.216667e+06 | 79.851852 | 76.111111 | 107.125926 | 0.604641 | 1.129085 | 
sea.set()
g1  = sea.histplot(dd, x="attack")
g11 = sea.histplot(dd, x="attack", kde=True)
g2 = sea.relplot(x= 'attack', y='defense', data=dd)
g3 = sea.relplot(data=dd,
                 x= 'attack',
                 y='defense',
                 hue='group1')
g4 = sea.jointplot(data=dd,
                   x= 'attack',
                   y='defense',
                   hue='group1')
g41 = sea.jointplot(data=dd,
                   x= 'attack',
                   y='defense',
                   hue='is_legendary',
                   kind= 'kde')
g42 = sea.jointplot(data=dd,
                   x= 'attack',
                   y='defense',
                   #hue='is_legendary',
                   kind= 'reg')
g43 = sea.jointplot(data=dd,
                   x= 'attack',
                   y='defense',
                   #hue='is_legendary',
                   kind= 'hex')
g5 = sea.jointplot(data=dd,
                   y='hp',
                   x='capture_rate')
g5.plot_joint(sea.kdeplot,
              color='r',
              zorder=0,
              levels=6)
g5.plot_marginals(sea.rugplot,
                  color="r",
                  height=-.15,
                  clip_on=False)
<seaborn.axisgrid.JointGrid at 0x7fdb1c5b16a0>
g6 = sea.pairplot(dd[['attack', 'defense', 'hp', 'speed', 'group1']],
                  hue='group1', height=2.5)
sea.set_theme(style="ticks")
# Initialize the figure with a logarithmic x axis
f, ax = plt.subplots(figsize=(7, 6))
# Plot the orbital period with horizontal boxes
sea.boxplot(data=dd,
            y="group1",
            x="hp", 
            whis=[0, 100],
            width=.6,
            palette="Set3", showmeans=True) # the means are hidden by scarttes
# Add in points to show each observation
sea.stripplot(data=dd,
              y="group1",
              x="hp",
              size=4,
              color=".3",
              linewidth=0,
              alpha=.45)
# Tweak the visual presentation
ax.xaxis.grid(True)
ax.set(ylabel="Primary Type Group")
sea.despine(trim=True, left=True)
 https://towardsdatascience.com/violin-strip-swarm-and-raincloud-plots-in-python-as-better-sometimes-alternatives-to-a-boxplot-15019bdff8f8plt.figure(figsize=(15, 10))
# Create violin plots without mini-boxplots inside.
ax = sea.violinplot(data=dd,
                    x='speed',
                    y='group1',
                    color='paleturquoise', 
                    cut=0,
                    inner=None)
# Clip the lower half of each violin.
for item in ax.collections:
    x0, y0, width, height = item.get_paths()[0].get_extents().bounds
    item.set_clip_path(plt.Rectangle((x0, y0), width, height/2,
                       transform=ax.transData))
# Create [swarm vs strip] plots with partially transparent points of different colors depending if is legendary.
num_items = len(ax.collections)
sea.stripplot(data=dd,
              x='speed',
              y='group1',
              hue='is_legendary', 
              palette=['deepskyblue', 'navy'],
              alpha=0.6,
              size=7)
# Shift each strip plot strictly below the correponding volin.
for item in ax.collections[num_items:]:
    item.set_offsets(item.get_offsets() + 0.15)
# Create narrow boxplots on top of the corresponding violin and strip plots, with thick lines, the mean values, without the outliers.
sea.boxplot(data=dd,
            x='speed',
            y='group1',
            width=0.25,
            showfliers=False,
            showmeans=True, 
            meanprops=dict(marker='o', markerfacecolor='gold',
                           markersize=10, zorder=3),
            boxprops=dict(facecolor=(0,0,0,0), 
                          linewidth=3, zorder=3),
            whiskerprops=dict(linewidth=3),
            capprops=dict(linewidth=3),
            medianprops=dict(linewidth=3))
plt.legend(frameon=False, fontsize=15, loc='upper left')
<matplotlib.legend.Legend at 0x7fdb15f20220>